https://github.com/hnakamura5 created https://github.com/llvm/llvm-project/pull/78032
Support the handling of TableGen's multiline string (code) literal. That has the form, [{ this is the string possibly with multi line... }] This is a separated part from https://github.com/llvm/llvm-project/pull/76059. >From d0767350f26215e86dee039427183630b3f02668 Mon Sep 17 00:00:00 2001 From: hnakamura5 <hnakamu...@outlook.com> Date: Sat, 13 Jan 2024 21:44:34 +0900 Subject: [PATCH] [clang-format] TableGen multi line string support. --- clang/lib/Format/ContinuationIndenter.cpp | 3 + clang/lib/Format/FormatToken.h | 1 + clang/lib/Format/FormatTokenLexer.cpp | 57 +++++++++++++++++++ clang/lib/Format/FormatTokenLexer.h | 3 + clang/lib/Format/TokenAnnotator.cpp | 2 +- clang/unittests/Format/TokenAnnotatorTest.cpp | 5 ++ 6 files changed, 70 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 102504182c4505..e6eaaa9ab45706 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -1591,6 +1591,9 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, State.StartOfStringLiteral = State.Column + 1; if (Current.is(TT_CSharpStringLiteral) && State.StartOfStringLiteral == 0) { State.StartOfStringLiteral = State.Column + 1; + } else if (Current.is(TT_TableGenMultiLineString) && + State.StartOfStringLiteral == 0) { + State.StartOfStringLiteral = State.Column + 1; } else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) { State.StartOfStringLiteral = State.Column; } else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) && diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index d5ef627f1348d3..dede89f2600150 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -148,6 +148,7 @@ namespace format { TYPE(StructLBrace) \ TYPE(StructRBrace) \ TYPE(StructuredBindingLSquare) \ + TYPE(TableGenMultiLineString) \ TYPE(TemplateCloser) \ TYPE(TemplateOpener) \ TYPE(TemplateString) \ diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index a1fd6dd6effe6c..1060009bdcf131 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -93,6 +93,8 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() { // string literals are correctly identified. handleCSharpVerbatimAndInterpolatedStrings(); } + if (Style.isTableGen()) + handleTableGenMultilineString(); if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) FirstInLineIndex = Tokens.size() - 1; } while (Tokens.back()->isNot(tok::eof)); @@ -272,6 +274,14 @@ void FormatTokenLexer::tryMergePreviousTokens() { return; } } + if (Style.isTableGen()) { + if (tryMergeTokens({tok::l_square, tok::l_brace}, + TT_TableGenMultiLineString)) { + // Multi line string starts with [{ + Tokens.back()->Tok.setKind(tok::string_literal); + return; + } + } } bool FormatTokenLexer::tryMergeNSStringLiteral() { @@ -763,6 +773,53 @@ void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() { resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1))); } +void FormatTokenLexer::handleTableGenMultilineString() { + FormatToken *MultiLineString = Tokens.back(); + if (MultiLineString->isNot(TT_TableGenMultiLineString)) + return; + + bool PrevIsRBrace = false; + const char *FirstBreak = nullptr; + const char *LastBreak = nullptr; + const char *Begin = MultiLineString->TokenText.begin(); + // Skip until }], the closer of multi line string found. + for (const char *Current = Begin, *End = Lex->getBuffer().end(); + Current != End; ++Current) { + if (PrevIsRBrace && *Current == ']') { + // }] is the end of multi line string. + if (!FirstBreak) + FirstBreak = Current; + MultiLineString->TokenText = StringRef(Begin, Current - Begin + 1); + // ColumnWidth is only the width of the first line. + MultiLineString->ColumnWidth = encoding::columnWidthWithTabs( + StringRef(Begin, FirstBreak - Begin + 1), + MultiLineString->OriginalColumn, Style.TabWidth, Encoding); + if (LastBreak) { + // Set LastLineColumnWidth if multi line string has multiple lines. + MultiLineString->LastLineColumnWidth = encoding::columnWidthWithTabs( + StringRef(LastBreak + 1, Current - LastBreak), + MultiLineString->OriginalColumn, Style.TabWidth, Encoding); + } + resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Current + 1))); + return; + } + PrevIsRBrace = false; + if (*Current == '\n') { + MultiLineString->IsMultiline = true; + // Assure LastBreak is not equal to FirstBreak. + if (!FirstBreak) + FirstBreak = Current; + LastBreak = Current; + continue; + } + if (*Current == '}') { + // Memorize '}'. If next character is ']', they are the closer. + PrevIsRBrace = true; + continue; + } + } +} + void FormatTokenLexer::handleTemplateStrings() { FormatToken *BacktickToken = Tokens.back(); diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h index bb6a8ab69c1be1..1dec6bbc41514c 100644 --- a/clang/lib/Format/FormatTokenLexer.h +++ b/clang/lib/Format/FormatTokenLexer.h @@ -95,6 +95,9 @@ class FormatTokenLexer { void handleCSharpVerbatimAndInterpolatedStrings(); + // Handles TableGen multiline strings. It has the form [{ ... }]. + void handleTableGenMultilineString(); + void tryParsePythonComment(); bool tryMerge_TMacro(); diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 24ce18a64348c1..661118970336a2 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1710,7 +1710,7 @@ class AnnotatingParser { TT_UnionLBrace, TT_RequiresClause, TT_RequiresClauseInARequiresExpression, TT_RequiresExpression, TT_RequiresExpressionLParen, TT_RequiresExpressionLBrace, - TT_BracedListLBrace)) { + TT_BracedListLBrace, TT_TableGenMultiLineString)) { CurrentToken->setType(TT_Unknown); } CurrentToken->Role.reset(); diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 92f57a77cdaf01..5ca6a76f840bdf 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -2193,6 +2193,11 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) { ASSERT_TRUE(Keywords.isTableGenDefinition(*Tokens[0])); ASSERT_TRUE(Tokens[0]->is(Keywords.kw_def)); ASSERT_TRUE(Tokens[1]->is(TT_StartOfName)); + + // Code, the multiline string token. + Tokens = Annotate("[{ code is multiline string }]"); + ASSERT_EQ(Tokens.size(), 2u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::string_literal, TT_TableGenMultiLineString); } TEST_F(TokenAnnotatorTest, UnderstandConstructors) { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits