https://github.com/robozati created https://github.com/llvm/llvm-project/pull/69849
When getting the AST from clangd, if there is an unclosed token, a segfault or an UB can happen when accessing `File.SpelledTokens` because the supposed closing token is not present, so its index is greater than the vec's size. This fixes the issue by returning a `nullptr` on the function that caused the UB and then checking if the spelled token is null, returning `nullopt` for the whole expanded token. Fixes https://github.com/clangd/clangd/issues/1559. >From e57657cf296e1e93fa17f021bb8ce591071b2176 Mon Sep 17 00:00:00 2001 From: robozati <139823421+roboz...@users.noreply.github.com> Date: Sat, 21 Oct 2023 12:04:14 -0300 Subject: [PATCH] Fix UB on unclosed parentheses, brackets or braces in Tokens --- clang/lib/Tooling/Syntax/Tokens.cpp | 23 ++++++++++++++----- clang/unittests/Tooling/Syntax/TokensTest.cpp | 6 +++++ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp index 2f28b9cf286a638..e4c24aa1ef3fbf0 100644 --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -286,9 +286,13 @@ TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const { }); // Our token could only be produced by the previous mapping. if (It == File.Mappings.begin()) { - // No previous mapping, no need to modify offsets. - return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded], - /*Mapping=*/nullptr}; + const auto offset = ExpandedIndex - File.BeginExpanded; + // Bounds check so parsing an unclosed parenthesis, brackets or braces do + // not result in UB. + if (offset >= File.SpelledTokens.size()) { + return {nullptr, nullptr}; + } + return {&File.SpelledTokens[offset], /*Mapping=*/nullptr}; } --It; // 'It' now points to last mapping that started before our token. @@ -298,9 +302,12 @@ TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const { // Not part of the mapping, use the index from previous mapping to compute the // corresponding spelled token. - return { - &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)], - /*Mapping=*/nullptr}; + const auto offset = It->EndSpelled + (ExpandedIndex - It->EndExpanded); + // This index can also result in UB when parsing unclosed tokens. + if (offset >= File.SpelledTokens.size()) { + return {nullptr, nullptr}; + } + return {&File.SpelledTokens[offset], /*Mapping=*/nullptr}; } const TokenBuffer::Mapping * @@ -410,6 +417,10 @@ TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const { auto [FirstSpelled, FirstMapping] = spelledForExpandedToken(First); auto [LastSpelled, LastMapping] = spelledForExpandedToken(Last); + if (FirstSpelled == nullptr || LastSpelled == nullptr) { + return std::nullopt; + }; + FileID FID = SourceMgr->getFileID(FirstSpelled->location()); // FIXME: Handle multi-file changes by trying to map onto a common root. if (FID != SourceMgr->getFileID(LastSpelled->location())) diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp index 0c08318a637c0b6..44f0c57b3bad875 100644 --- a/clang/unittests/Tooling/Syntax/TokensTest.cpp +++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp @@ -814,6 +814,12 @@ TEST_F(TokenBufferTest, SpelledByExpanded) { EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")), ValueIs(SameRange(findSpelled("good")))); EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("prev good")), std::nullopt); + + // Unclosed parentheses, brackets or braces do not result in a UB + recordTokens(R"cpp( + foo(x + )cpp"); + EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("x")), std::nullopt); } TEST_F(TokenBufferTest, ExpandedTokensForRange) { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits