https://github.com/owenca updated https://github.com/llvm/llvm-project/pull/83709
>From 91d6e4c6e0ae2e1d79edf496df22978a4e1f3e1a Mon Sep 17 00:00:00 2001 From: Owen Pan <owenpi...@gmail.com> Date: Sat, 2 Mar 2024 22:08:29 -0800 Subject: [PATCH 1/3] [clang-format] Handle common C++ non-keyword types as such Fixes #83400. --- clang/lib/Format/FormatToken.cpp | 16 ++++++++-- clang/lib/Format/FormatToken.h | 4 +-- clang/lib/Format/QualifierAlignmentFixer.cpp | 25 +++++++++------- clang/lib/Format/QualifierAlignmentFixer.h | 5 ++-- clang/lib/Format/TokenAnnotator.cpp | 29 ++++++++++--------- clang/lib/Format/UnwrappedLineParser.cpp | 12 ++++---- clang/unittests/Format/TokenAnnotatorTest.cpp | 6 ++++ 7 files changed, 61 insertions(+), 36 deletions(-) diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp index 56a7b2d6387765..02952bd20acf9a 100644 --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -34,9 +34,15 @@ const char *getTokenTypeName(TokenType Type) { return nullptr; } +// Sorted common C++ non-keyword types. +static SmallVector<StringRef> CppNonKeywordTypes = { + "byte", "int16_t", "int32_t", "int64_t", "int8_t", + "size_t", "uint16_t", "uint32_t", "uint64_t", "uint8_t", +}; + // FIXME: This is copy&pasted from Sema. Put it in a common place and remove // duplication. -bool FormatToken::isSimpleTypeSpecifier() const { +bool FormatToken::isSimpleTypeSpecifier(bool IsCpp) const { switch (Tok.getKind()) { case tok::kw_short: case tok::kw_long: @@ -66,13 +72,17 @@ bool FormatToken::isSimpleTypeSpecifier() const { case tok::kw_decltype: case tok::kw__Atomic: return true; + case tok::identifier: + return IsCpp && std::binary_search(CppNonKeywordTypes.begin(), + CppNonKeywordTypes.end(), TokenText); default: return false; } } -bool FormatToken::isTypeOrIdentifier() const { - return isSimpleTypeSpecifier() || Tok.isOneOf(tok::kw_auto, tok::identifier); +bool FormatToken::isTypeOrIdentifier(bool IsCpp) const { + return isSimpleTypeSpecifier(IsCpp) || + Tok.isOneOf(tok::kw_auto, tok::identifier); } bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const { diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index 31245495041960..2bc136c51d23f1 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -674,9 +674,9 @@ struct FormatToken { } /// Determine whether the token is a simple-type-specifier. - [[nodiscard]] bool isSimpleTypeSpecifier() const; + [[nodiscard]] bool isSimpleTypeSpecifier(bool IsCpp) const; - [[nodiscard]] bool isTypeOrIdentifier() const; + [[nodiscard]] bool isTypeOrIdentifier(bool IsCpp) const; bool isObjCAccessSpecifier() const { return is(tok::at) && Next && diff --git a/clang/lib/Format/QualifierAlignmentFixer.cpp b/clang/lib/Format/QualifierAlignmentFixer.cpp index 0c63d330b5aed4..834ae115908856 100644 --- a/clang/lib/Format/QualifierAlignmentFixer.cpp +++ b/clang/lib/Format/QualifierAlignmentFixer.cpp @@ -268,11 +268,13 @@ const FormatToken *LeftRightQualifierAlignmentFixer::analyzeRight( if (isPossibleMacro(TypeToken)) return Tok; + const bool IsCpp = Style.isCpp(); + // The case `const long long int volatile` -> `long long int const volatile` // The case `long const long int volatile` -> `long long int const volatile` // The case `long long volatile int const` -> `long long int const volatile` // The case `const long long volatile int` -> `long long int const volatile` - if (TypeToken->isSimpleTypeSpecifier()) { + if (TypeToken->isSimpleTypeSpecifier(IsCpp)) { // The case `const decltype(foo)` -> `const decltype(foo)` // The case `const typeof(foo)` -> `const typeof(foo)` // The case `const _Atomic(foo)` -> `const _Atomic(foo)` @@ -280,8 +282,10 @@ const FormatToken *LeftRightQualifierAlignmentFixer::analyzeRight( return Tok; const FormatToken *LastSimpleTypeSpecifier = TypeToken; - while (isQualifierOrType(LastSimpleTypeSpecifier->getNextNonComment())) + while (isQualifierOrType(LastSimpleTypeSpecifier->getNextNonComment(), + IsCpp)) { LastSimpleTypeSpecifier = LastSimpleTypeSpecifier->getNextNonComment(); + } rotateTokens(SourceMgr, Fixes, Tok, LastSimpleTypeSpecifier, /*Left=*/false); @@ -291,7 +295,7 @@ const FormatToken *LeftRightQualifierAlignmentFixer::analyzeRight( // The case `unsigned short const` -> `unsigned short const` // The case: // `unsigned short volatile const` -> `unsigned short const volatile` - if (PreviousCheck && PreviousCheck->isSimpleTypeSpecifier()) { + if (PreviousCheck && PreviousCheck->isSimpleTypeSpecifier(IsCpp)) { if (LastQual != Tok) rotateTokens(SourceMgr, Fixes, Tok, LastQual, /*Left=*/false); return Tok; @@ -408,11 +412,12 @@ const FormatToken *LeftRightQualifierAlignmentFixer::analyzeLeft( // The case `volatile long long const int` -> `const volatile long long int` // The case `const long long volatile int` -> `const volatile long long int` // The case `long volatile long int const` -> `const volatile long long int` - if (TypeToken->isSimpleTypeSpecifier()) { + if (const bool IsCpp = Style.isCpp(); + TypeToken->isSimpleTypeSpecifier(IsCpp)) { const FormatToken *LastSimpleTypeSpecifier = TypeToken; while (isConfiguredQualifierOrType( LastSimpleTypeSpecifier->getPreviousNonComment(), - ConfiguredQualifierTokens)) { + ConfiguredQualifierTokens, IsCpp)) { LastSimpleTypeSpecifier = LastSimpleTypeSpecifier->getPreviousNonComment(); } @@ -611,15 +616,15 @@ void prepareLeftRightOrderingForQualifierAlignmentFixer( } bool LeftRightQualifierAlignmentFixer::isQualifierOrType( - const FormatToken *const Tok) { - return Tok && (Tok->isSimpleTypeSpecifier() || Tok->is(tok::kw_auto) || + const FormatToken *const Tok, bool IsCpp) { + return Tok && (Tok->isSimpleTypeSpecifier(IsCpp) || Tok->is(tok::kw_auto) || isQualifier(Tok)); } bool LeftRightQualifierAlignmentFixer::isConfiguredQualifierOrType( - const FormatToken *const Tok, - const std::vector<tok::TokenKind> &Qualifiers) { - return Tok && (Tok->isSimpleTypeSpecifier() || Tok->is(tok::kw_auto) || + const FormatToken *const Tok, const std::vector<tok::TokenKind> &Qualifiers, + bool IsCpp) { + return Tok && (Tok->isSimpleTypeSpecifier(IsCpp) || Tok->is(tok::kw_auto) || isConfiguredQualifier(Tok, Qualifiers)); } diff --git a/clang/lib/Format/QualifierAlignmentFixer.h b/clang/lib/Format/QualifierAlignmentFixer.h index e922d800559510..e1cc27e62b13a0 100644 --- a/clang/lib/Format/QualifierAlignmentFixer.h +++ b/clang/lib/Format/QualifierAlignmentFixer.h @@ -71,10 +71,11 @@ class LeftRightQualifierAlignmentFixer : public TokenAnalyzer { tok::TokenKind QualifierType); // Is the Token a simple or qualifier type - static bool isQualifierOrType(const FormatToken *Tok); + static bool isQualifierOrType(const FormatToken *Tok, bool IsCpp = true); static bool isConfiguredQualifierOrType(const FormatToken *Tok, - const std::vector<tok::TokenKind> &Qualifiers); + const std::vector<tok::TokenKind> &Qualifiers, + bool IsCpp = true); // Is the Token likely a Macro static bool isPossibleMacro(const FormatToken *Tok); diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 04f0374b674e53..3c5d7326224eca 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -562,7 +562,7 @@ class AnnotatingParser { (CurrentToken->is(tok::l_paren) && CurrentToken->Next && CurrentToken->Next->isOneOf(tok::star, tok::amp, tok::caret)); if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) || - CurrentToken->Previous->isSimpleTypeSpecifier()) && + CurrentToken->Previous->isSimpleTypeSpecifier(IsCpp)) && !(CurrentToken->is(tok::l_brace) || (CurrentToken->is(tok::l_paren) && !ProbablyFunctionTypeLParen))) { Contexts.back().IsExpression = false; @@ -2573,7 +2573,7 @@ class AnnotatingParser { return true; // MyClass a; - if (PreviousNotConst->isSimpleTypeSpecifier()) + if (PreviousNotConst->isSimpleTypeSpecifier(IsCpp)) return true; // type[] a in Java @@ -2704,9 +2704,10 @@ class AnnotatingParser { } // Heuristically try to determine whether the parentheses contain a type. - auto IsQualifiedPointerOrReference = [](FormatToken *T) { + auto IsQualifiedPointerOrReference = [this](FormatToken *T) { // This is used to handle cases such as x = (foo *const)&y; - assert(!T->isSimpleTypeSpecifier() && "Should have already been checked"); + assert(!T->isSimpleTypeSpecifier(IsCpp) && + "Should have already been checked"); // Strip trailing qualifiers such as const or volatile when checking // whether the parens could be a cast to a pointer/reference type. while (T) { @@ -2738,7 +2739,7 @@ class AnnotatingParser { bool ParensAreType = !Tok.Previous || Tok.Previous->isOneOf(TT_TemplateCloser, TT_TypeDeclarationParen) || - Tok.Previous->isSimpleTypeSpecifier() || + Tok.Previous->isSimpleTypeSpecifier(IsCpp) || IsQualifiedPointerOrReference(Tok.Previous); bool ParensCouldEndDecl = Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater); @@ -3595,7 +3596,8 @@ static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current, if (!Current.Tok.getIdentifierInfo()) return false; - auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * { + auto skipOperatorName = + [IsCpp](const FormatToken *Next) -> const FormatToken * { for (; Next; Next = Next->Next) { if (Next->is(TT_OverloadedOperatorLParen)) return Next; @@ -3614,7 +3616,7 @@ static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current, Next = Next->Next; continue; } - if ((Next->isSimpleTypeSpecifier() || Next->is(tok::identifier)) && + if ((Next->isSimpleTypeSpecifier(IsCpp) || Next->is(tok::identifier)) && Next->Next && Next->Next->isPointerOrReference()) { // For operator void*(), operator char*(), operator Foo*(). Next = Next->Next; @@ -3712,7 +3714,7 @@ static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current, Tok = Tok->MatchingParen; continue; } - if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() || + if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier(IsCpp) || Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis, TT_TypeName)) { return true; @@ -4376,7 +4378,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Left.Tok.isLiteral()) return true; // for (auto a = 0, b = 0; const auto & c : {1, 2, 3}) - if (Left.isTypeOrIdentifier() && Right.Next && Right.Next->Next && + if (Left.isTypeOrIdentifier(IsCpp) && Right.Next && Right.Next->Next && Right.Next->Next->is(TT_RangeBasedForLoopColon)) { return getTokenPointerOrReferenceAlignment(Right) != FormatStyle::PAS_Left; @@ -4419,8 +4421,8 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Right.is(tok::l_brace) && Right.is(BK_Block)) return true; // for (auto a = 0, b = 0; const auto& c : {1, 2, 3}) - if (Left.Previous && Left.Previous->isTypeOrIdentifier() && Right.Next && - Right.Next->is(TT_RangeBasedForLoopColon)) { + if (Left.Previous && Left.Previous->isTypeOrIdentifier(IsCpp) && + Right.Next && Right.Next->is(TT_RangeBasedForLoopColon)) { return getTokenPointerOrReferenceAlignment(Left) != FormatStyle::PAS_Right; } @@ -4458,7 +4460,8 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Right.isPointerOrReference()) { const FormatToken *Previous = &Left; while (Previous && Previous->isNot(tok::kw_operator)) { - if (Previous->is(tok::identifier) || Previous->isSimpleTypeSpecifier()) { + if (Previous->is(tok::identifier) || + Previous->isSimpleTypeSpecifier(IsCpp)) { Previous = Previous->getPreviousNonComment(); continue; } @@ -4647,7 +4650,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (!Style.isVerilog() && (Left.isOneOf(tok::identifier, tok::greater, tok::r_square, tok::r_paren) || - Left.isSimpleTypeSpecifier()) && + Left.isSimpleTypeSpecifier(IsCpp)) && Right.is(tok::l_brace) && Right.getNextNonComment() && Right.isNot(BK_Block)) { return false; diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 2ce291da11b86a..0b96dd80252ad1 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -1866,7 +1866,7 @@ void UnwrappedLineParser::parseStructuralElement( nextToken(); // Block return type. if (FormatTok->Tok.isAnyIdentifier() || - FormatTok->isSimpleTypeSpecifier()) { + FormatTok->isSimpleTypeSpecifier(IsCpp)) { nextToken(); // Return types: pointers are ok too. while (FormatTok->is(tok::star)) @@ -2222,7 +2222,7 @@ bool UnwrappedLineParser::tryToParseLambda() { bool InTemplateParameterList = false; while (FormatTok->isNot(tok::l_brace)) { - if (FormatTok->isSimpleTypeSpecifier()) { + if (FormatTok->isSimpleTypeSpecifier(IsCpp)) { nextToken(); continue; } @@ -3415,7 +3415,7 @@ bool clang::format::UnwrappedLineParser::parseRequires() { break; } default: - if (PreviousNonComment->isTypeOrIdentifier()) { + if (PreviousNonComment->isTypeOrIdentifier(IsCpp)) { // This is a requires clause. parseRequiresClause(RequiresToken); return true; @@ -3478,7 +3478,7 @@ bool clang::format::UnwrappedLineParser::parseRequires() { --OpenAngles; break; default: - if (NextToken->isSimpleTypeSpecifier()) { + if (NextToken->isSimpleTypeSpecifier(IsCpp)) { FormatTok = Tokens->setPosition(StoredPosition); parseRequiresExpression(RequiresToken); return false; @@ -3962,8 +3962,8 @@ void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { } if (FormatTok->is(tok::l_square)) { FormatToken *Previous = FormatTok->Previous; - if (!Previous || - !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) { + if (!Previous || (Previous->isNot(tok::r_paren) && + !Previous->isTypeOrIdentifier(IsCpp))) { // Don't try parsing a lambda if we had a closing parenthesis before, // it was probably a pointer to an array: int (*)[]. if (!tryToParseLambda()) diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index c736dac8dabf21..49382b595927e1 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -620,6 +620,12 @@ TEST_F(TokenAnnotatorTest, UnderstandsCasts) { ASSERT_EQ(Tokens.size(), 8u) << Tokens; EXPECT_TOKEN(Tokens[3], tok::r_paren, TT_Unknown); EXPECT_TOKEN(Tokens[4], tok::amp, TT_BinaryOperator); + + Tokens = annotate("#define FOO(bar) foo((uint64_t)&bar)"); + ASSERT_EQ(Tokens.size(), 15u) << Tokens; + EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_CastRParen); + // FIXME: should be TT_PointerOrReference instead. + EXPECT_TOKEN(Tokens[11], tok::amp, TT_UnaryOperator); } TEST_F(TokenAnnotatorTest, UnderstandsDynamicExceptionSpecifier) { >From 7728fc5b3431050e2949842b3289467c9403d6b8 Mon Sep 17 00:00:00 2001 From: Owen Pan <owenpi...@gmail.com> Date: Sun, 3 Mar 2024 10:39:04 -0800 Subject: [PATCH 2/3] Update TokenAnnotatorTest.cpp --- clang/unittests/Format/TokenAnnotatorTest.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 49382b595927e1..560042585d4ebe 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -624,7 +624,6 @@ TEST_F(TokenAnnotatorTest, UnderstandsCasts) { Tokens = annotate("#define FOO(bar) foo((uint64_t)&bar)"); ASSERT_EQ(Tokens.size(), 15u) << Tokens; EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_CastRParen); - // FIXME: should be TT_PointerOrReference instead. EXPECT_TOKEN(Tokens[11], tok::amp, TT_UnaryOperator); } >From 939f2f4473382bd5f2b46ace16ffff2a1250684a Mon Sep 17 00:00:00 2001 From: Owen Pan <owenpi...@gmail.com> Date: Sun, 3 Mar 2024 14:12:45 -0800 Subject: [PATCH 3/3] Update FormatToken.cpp Removed `byte` and added `intptr_t`/`uintptr_t`. --- clang/lib/Format/FormatToken.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp index 02952bd20acf9a..2dd6a19944f6cb 100644 --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -36,8 +36,8 @@ const char *getTokenTypeName(TokenType Type) { // Sorted common C++ non-keyword types. static SmallVector<StringRef> CppNonKeywordTypes = { - "byte", "int16_t", "int32_t", "int64_t", "int8_t", - "size_t", "uint16_t", "uint32_t", "uint64_t", "uint8_t", + "int16_t", "int32_t", "int64_t", "int8_t", "intptr_t", "size_t", + "uint16_t", "uint32_t", "uint64_t", "uint8_t", "uintptr_t", }; // FIXME: This is copy&pasted from Sema. Put it in a common place and remove _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits