Author: Manuel Klimek Date: 2021-11-22T11:08:38+01:00 New Revision: 84bf5e328664db2e744c4651c52d2460b1733d09
URL: https://github.com/llvm/llvm-project/commit/84bf5e328664db2e744c4651c52d2460b1733d09 DIFF: https://github.com/llvm/llvm-project/commit/84bf5e328664db2e744c4651c52d2460b1733d09.diff LOG: Fix various problems found by fuzzing. 1. IndexTokenSource::getNextToken cannot return nullptr; some code was still written assuming it can; make getNextToken more resilient against incorrect input and fix its call-sites. 2. Change various asserts that can happen due to user provided input to conditionals in the code. Added: Modified: clang/lib/Format/ContinuationIndenter.cpp clang/lib/Format/TokenAnnotator.cpp clang/lib/Format/UnwrappedLineParser.cpp clang/lib/Format/WhitespaceManager.cpp Removed: ################################################################################ diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 1e4f5690ef241..f56b7c70d18e7 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -1986,7 +1986,9 @@ ContinuationIndenter::createBreakableToken(const FormatToken &Current, Current.Previous->isNot(TT_ImplicitStringLiteral))) { if (!Style.ReflowComments || CommentPragmasRegex.match(Current.TokenText.substr(2)) || - switchesFormatting(Current)) + switchesFormatting(Current) || + !(Current.TokenText.startswith("//") || + Current.TokenText.startswith("#"))) return nullptr; return std::make_unique<BreakableLineCommentSection>( Current, StartColumn, /*InPPDirective=*/false, Encoding, Style); @@ -2195,11 +2197,10 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, // When breaking before a tab character, it may be moved by a few columns, // but will still be expanded to the next tab stop, so we don't save any // columns. - if (NewRemainingTokenColumns == RemainingTokenColumns) { + if (NewRemainingTokenColumns >= RemainingTokenColumns) { // FIXME: Do we need to adjust the penalty? break; } - assert(NewRemainingTokenColumns < RemainingTokenColumns); LLVM_DEBUG(llvm::dbgs() << " Breaking at: " << TailOffset + Split.first << ", " << Split.second << "\n"); diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 3897241cb8589..f3f63b4cad234 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -946,11 +946,15 @@ class AnnotatingParser { !Line.First->isOneOf(tok::kw_enum, tok::kw_case, tok::kw_default)) { FormatToken *Prev = Tok->getPreviousNonComment(); + if (!Prev) + break; if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept)) Tok->setType(TT_CtorInitializerColon); else if (Prev->is(tok::kw_try)) { // Member initializer list within function try block. FormatToken *PrevPrev = Prev->getPreviousNonComment(); + if (!PrevPrev) + break; if (PrevPrev && PrevPrev->isOneOf(tok::r_paren, tok::kw_noexcept)) Tok->setType(TT_CtorInitializerColon); } else @@ -1578,6 +1582,8 @@ class AnnotatingParser { if (TemplateCloser->is(tok::l_paren)) { // No Matching Paren yet so skip to matching paren TemplateCloser = untilMatchingParen(TemplateCloser); + if (!TemplateCloser) + break; } if (TemplateCloser->is(tok::less)) NestingLevel++; @@ -2639,8 +2645,8 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { if (Current->Role) Current->Role->precomputeFormattingInfos(Current); if (Current->MatchingParen && - Current->MatchingParen->opensBlockOrBlockTypeList(Style)) { - assert(IndentLevel > 0); + Current->MatchingParen->opensBlockOrBlockTypeList(Style) && + IndentLevel > 0) { --IndentLevel; } Current->IndentLevel = IndentLevel; diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 28d925858f776..c12c7c6ecfa69 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -199,6 +199,8 @@ class IndexedTokenSource : public FormatTokenSource { : Tokens(Tokens), Position(-1) {} FormatToken *getNextToken() override { + if (Position >= 0 && Tokens[Position]->is(tok::eof)) + return Tokens[Position]; ++Position; return Tokens[Position]; } @@ -399,7 +401,7 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { FormatToken *Next; do { Next = Tokens->getNextToken(); - } while (Next && Next->is(tok::comment)); + } while (Next->is(tok::comment)); FormatTok = Tokens->setPosition(StoredPosition); if (Next && Next->isNot(tok::colon)) { // default not followed by ':' is not a case label; treat it like @@ -1097,7 +1099,6 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() { } void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) { - assert(!FormatTok->is(tok::l_brace)); if (Style.Language == FormatStyle::LK_TableGen && FormatTok->is(tok::pp_include)) { nextToken(); @@ -1488,7 +1489,7 @@ void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) { unsigned StoredPosition = Tokens->getPosition(); FormatToken *Next = Tokens->getNextToken(); FormatTok = Tokens->setPosition(StoredPosition); - if (Next && !mustBeJSIdent(Keywords, Next)) { + if (!mustBeJSIdent(Keywords, Next)) { nextToken(); break; } @@ -2653,23 +2654,25 @@ bool UnwrappedLineParser::tryToParseSimpleAttribute() { ScopedTokenPosition AutoPosition(Tokens); FormatToken *Tok = Tokens->getNextToken(); // We already read the first [ check for the second. - if (Tok && !Tok->is(tok::l_square)) { + if (!Tok->is(tok::l_square)) { return false; } // Double check that the attribute is just something // fairly simple. - while (Tok) { + while (Tok->isNot(tok::eof)) { if (Tok->is(tok::r_square)) { break; } Tok = Tokens->getNextToken(); } + if (Tok->is(tok::eof)) + return false; Tok = Tokens->getNextToken(); - if (Tok && !Tok->is(tok::r_square)) { + if (!Tok->is(tok::r_square)) { return false; } Tok = Tokens->getNextToken(); - if (Tok && Tok->is(tok::semi)) { + if (Tok->is(tok::semi)) { return false; } return true; @@ -2682,7 +2685,7 @@ void UnwrappedLineParser::parseJavaEnumBody() { unsigned StoredPosition = Tokens->getPosition(); bool IsSimple = true; FormatToken *Tok = Tokens->getNextToken(); - while (Tok) { + while (!Tok->is(tok::eof)) { if (Tok->is(tok::r_brace)) break; if (Tok->isOneOf(tok::l_brace, tok::semi)) { diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp index 74136d2f5caa1..7a00e93789191 100644 --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -372,8 +372,6 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End, if (ContinuedStringLiteral) Changes[i].Spaces += Shift; - assert(Shift >= 0); - Changes[i].StartOfTokenColumn += Shift; if (i + 1 != Changes.size()) Changes[i + 1].PreviousEndOfTokenColumn += Shift; @@ -915,7 +913,7 @@ void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End, Changes[i].StartOfBlockComment->StartOfTokenColumn - Changes[i].StartOfTokenColumn; } - assert(Shift >= 0); + if (Shift < 0) continue; Changes[i].Spaces += Shift; if (i + 1 != Changes.size()) Changes[i + 1].PreviousEndOfTokenColumn += Shift; @@ -1270,10 +1268,10 @@ WhitespaceManager::linkCells(CellDescriptions &&CellDesc) { void WhitespaceManager::generateChanges() { for (unsigned i = 0, e = Changes.size(); i != e; ++i) { const Change &C = Changes[i]; - if (i > 0) { - assert(Changes[i - 1].OriginalWhitespaceRange.getBegin() != - C.OriginalWhitespaceRange.getBegin() && - "Generating two replacements for the same location"); + if (i > 0 && Changes[i - 1].OriginalWhitespaceRange.getBegin() == + C.OriginalWhitespaceRange.getBegin()) { + // Do not generate two replacements for the same location. + continue; } if (C.CreateReplacement) { std::string ReplacementText = C.PreviousLinePostfix; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits