[clang-tools-extra] [clang-tidy][NFC] Add `getCommentsInRange` utility (PR #183940)

Daniil Dudkin via cfe-commits Sat, 28 Feb 2026 11:47:03 -0800

https://github.com/unterumarmung created 
https://github.com/llvm/llvm-project/pull/183940


Introduce `getCommentsInRange` in `LexerUtils` and refactor 
`getTrailingCommentsInRange` to reuse a shared comment collector with 
mode-based behavior. Add unit tests for all-comments behavior and preserve 
existing trailing-comment semantics.

>From b3927d451c872e4865a673f7d15d19010ba36179 Mon Sep 17 00:00:00 2001
From: Daniil Dudkin <[email protected]>
Date: Sat, 28 Feb 2026 21:55:34 +0300
Subject: [PATCH] [clang-tidy][NFC] Add getCommentsInRange utility

Introduce getCommentsInRange in LexerUtils and refactor 
getTrailingCommentsInRange to reuse a shared comment collector with mode-based 
behavior. Add unit tests for all-comments behavior and preserve existing 
trailing-comment semantics.
---
 .../clang-tidy/utils/LexerUtils.cpp           |  27 +++-
 .../clang-tidy/utils/LexerUtils.h             |   5 +
 .../unittests/clang-tidy/LexerUtilsTest.cpp   | 119 ++++++++++++++++++
 3 files changed, 147 insertions(+), 4 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp 
b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
index a9a8c7bbf4c89..4c36d4cacd1ec 100644
--- a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
+++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
@@ -100,9 +100,14 @@ bool rangeContainsExpansionsOrDirectives(SourceRange Range,
   return false;
 }
 
-std::vector<CommentToken>
-getTrailingCommentsInRange(CharSourceRange Range, const SourceManager &SM,
-                           const LangOptions &LangOpts) {
+namespace {
+enum class CommentCollectionMode { AllComments, TrailingComments };
+} // namespace
+
+static std::vector<CommentToken>
+collectCommentsInRange(CharSourceRange Range, const SourceManager &SM,
+                       const LangOptions &LangOpts,
+                       CommentCollectionMode Mode) {
   std::vector<CommentToken> Comments;
   if (Range.isInvalid())
     return Comments;
@@ -149,7 +154,7 @@ getTrailingCommentsInRange(CharSourceRange Range, const 
SourceManager &SM,
           Tok.getLocation(),
           StringRef(Buffer.begin() + CommentLoc.second, Tok.getLength()),
       });
-    } else {
+    } else if (Mode == CommentCollectionMode::TrailingComments) {
       // Clear comments found before the different token, e.g. comma. Callers
       // use this to retrieve only the contiguous comment block that directly
       // precedes a token of interest.
@@ -160,6 +165,20 @@ getTrailingCommentsInRange(CharSourceRange Range, const 
SourceManager &SM,
   return Comments;
 }
 
+std::vector<CommentToken> getCommentsInRange(CharSourceRange Range,
+                                             const SourceManager &SM,
+                                             const LangOptions &LangOpts) {
+  return collectCommentsInRange(Range, SM, LangOpts,
+                                CommentCollectionMode::AllComments);
+}
+
+std::vector<CommentToken>
+getTrailingCommentsInRange(CharSourceRange Range, const SourceManager &SM,
+                           const LangOptions &LangOpts) {
+  return collectCommentsInRange(Range, SM, LangOpts,
+                                CommentCollectionMode::TrailingComments);
+}
+
 std::optional<Token> getQualifyingToken(tok::TokenKind TK,
                                         CharSourceRange Range,
                                         const ASTContext &Context,
diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.h 
b/clang-tools-extra/clang-tidy/utils/LexerUtils.h
index 38123ae14cff7..681fc6194d45c 100644
--- a/clang-tools-extra/clang-tidy/utils/LexerUtils.h
+++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.h
@@ -120,6 +120,11 @@ struct CommentToken {
   StringRef Text;
 };
 
+/// Returns all comment tokens found in the given range.
+std::vector<CommentToken> getCommentsInRange(CharSourceRange Range,
+                                             const SourceManager &SM,
+                                             const LangOptions &LangOpts);
+
 /// Returns comment tokens found in the given range. If a non-comment token is
 /// encountered, clears previously collected comments and continues.
 std::vector<CommentToken>
diff --git a/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp 
b/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp
index 438a78b4694ee..0e0c3d60dedfe 100644
--- a/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp
+++ b/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp
@@ -43,6 +43,125 @@ static CharSourceRange rangeFromAnnotations(const 
llvm::Annotations &A,
 
 namespace {
 
+TEST(LexerUtilsTest, GetCommentsInRangeAdjacentComments) {
+  llvm::Annotations Code(R"cpp(
+void f() {
+  $range[[/*first*/ /*second*/]]
+  int x = 0;
+}
+)cpp");
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CharSourceRange Range =
+      rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
+  const std::vector<utils::lexer::CommentToken> Comments =
+      utils::lexer::getCommentsInRange(Range, SM, LangOpts);
+  ASSERT_EQ(2u, Comments.size());
+  EXPECT_EQ("/*first*/", Comments[0].Text);
+  EXPECT_EQ("/*second*/", Comments[1].Text);
+  const StringRef CodeText = Code.code();
+  const size_t FirstOffset = CodeText.find("/*first*/");
+  ASSERT_NE(StringRef::npos, FirstOffset);
+  const size_t SecondOffset = CodeText.find("/*second*/");
+  ASSERT_NE(StringRef::npos, SecondOffset);
+  EXPECT_EQ(FirstOffset, SM.getFileOffset(Comments[0].Loc));
+  EXPECT_EQ(SecondOffset, SM.getFileOffset(Comments[1].Loc));
+}
+
+TEST(LexerUtilsTest, GetCommentsInRangeKeepsCommentsAcrossTokens) {
+  llvm::Annotations Code(R"cpp(
+void f() {
+  int x = ($range[[/*first*/ 0, /*second*/]] 1);
+}
+)cpp");
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CharSourceRange Range =
+      rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
+  const std::vector<utils::lexer::CommentToken> Comments =
+      utils::lexer::getCommentsInRange(Range, SM, LangOpts);
+  ASSERT_EQ(2u, Comments.size());
+  EXPECT_EQ("/*first*/", Comments[0].Text);
+  EXPECT_EQ("/*second*/", Comments[1].Text);
+  const StringRef CodeText = Code.code();
+  const size_t FirstOffset = CodeText.find("/*first*/");
+  ASSERT_NE(StringRef::npos, FirstOffset);
+  const size_t SecondOffset = CodeText.find("/*second*/");
+  ASSERT_NE(StringRef::npos, SecondOffset);
+  EXPECT_EQ(FirstOffset, SM.getFileOffset(Comments[0].Loc));
+  EXPECT_EQ(SecondOffset, SM.getFileOffset(Comments[1].Loc));
+}
+
+TEST(LexerUtilsTest, GetCommentsInRangeLineComments) {
+  llvm::Annotations Code(R"cpp(
+void f() {
+  $range[[// first
+  // second
+  ]]
+  int x = 0;
+}
+)cpp");
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CharSourceRange Range =
+      rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
+  const std::vector<utils::lexer::CommentToken> Comments =
+      utils::lexer::getCommentsInRange(Range, SM, LangOpts);
+  ASSERT_EQ(2u, Comments.size());
+  EXPECT_EQ("// first", Comments[0].Text);
+  EXPECT_EQ("// second", Comments[1].Text);
+  const StringRef CodeText = Code.code();
+  const size_t FirstOffset = CodeText.find("// first");
+  ASSERT_NE(StringRef::npos, FirstOffset);
+  const size_t SecondOffset = CodeText.find("// second");
+  ASSERT_NE(StringRef::npos, SecondOffset);
+  EXPECT_EQ(FirstOffset, SM.getFileOffset(Comments[0].Loc));
+  EXPECT_EQ(SecondOffset, SM.getFileOffset(Comments[1].Loc));
+}
+
+TEST(LexerUtilsTest, GetCommentsInRangeNoComments) {
+  llvm::Annotations Code(R"cpp(
+void f() {
+  int x = $range[[0 + 1]];
+}
+)cpp");
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CharSourceRange Range =
+      rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
+  const std::vector<utils::lexer::CommentToken> Comments =
+      utils::lexer::getCommentsInRange(Range, SM, LangOpts);
+  EXPECT_TRUE(Comments.empty());
+}
+
+TEST(LexerUtilsTest, GetCommentsInRangeInvalidRange) {
+  std::unique_ptr<ASTUnit> AST = buildAST("int value = 0;");
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const std::vector<utils::lexer::CommentToken> Comments =
+      utils::lexer::getCommentsInRange(CharSourceRange(), SM, LangOpts);
+  EXPECT_TRUE(Comments.empty());
+}
+
 TEST(LexerUtilsTest, GetTrailingCommentsInRangeAdjacentComments) {
   llvm::Annotations Code(R"cpp(
 void f() {

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang-tools-extra] [clang-tidy][NFC] Add `getCommentsInRange` utility (PR #183940)

Reply via email to