https://github.com/hdoc updated https://github.com/llvm/llvm-project/pull/91100
>From 2fecd6eb6eec59eee1e6146b72458fe4bb1bc971 Mon Sep 17 00:00:00 2001 From: hdoc <git...@hdoc.io> Date: Sat, 4 May 2024 18:50:16 -0700 Subject: [PATCH 1/7] Support for parsing headers in Doxygen \par commands --- .../include/clang/AST/CommentCommandTraits.h | 4 + clang/include/clang/AST/CommentCommands.td | 3 +- clang/include/clang/AST/CommentParser.h | 4 +- clang/lib/AST/CommentParser.cpp | 77 ++++++++++ clang/test/Index/comment-misc-tags.m | 8 +- clang/unittests/AST/CommentParser.cpp | 137 ++++++++++++++++++ .../ClangCommentCommandInfoEmitter.cpp | 1 + 7 files changed, 227 insertions(+), 7 deletions(-) diff --git a/clang/include/clang/AST/CommentCommandTraits.h b/clang/include/clang/AST/CommentCommandTraits.h index 0c3254d84eb00..78c484fff3aed 100644 --- a/clang/include/clang/AST/CommentCommandTraits.h +++ b/clang/include/clang/AST/CommentCommandTraits.h @@ -88,6 +88,10 @@ struct CommandInfo { LLVM_PREFERRED_TYPE(bool) unsigned IsHeaderfileCommand : 1; + /// True if this is a \\par command. + LLVM_PREFERRED_TYPE(bool) + unsigned IsParCommand : 1; + /// True if we don't want to warn about this command being passed an empty /// paragraph. Meaningful only for block commands. LLVM_PREFERRED_TYPE(bool) diff --git a/clang/include/clang/AST/CommentCommands.td b/clang/include/clang/AST/CommentCommands.td index 06b2fa9b5531c..a410cd4039bee 100644 --- a/clang/include/clang/AST/CommentCommands.td +++ b/clang/include/clang/AST/CommentCommands.td @@ -18,6 +18,7 @@ class Command<string name> { bit IsThrowsCommand = 0; bit IsDeprecatedCommand = 0; bit IsHeaderfileCommand = 0; + bit IsParCommand = 0; bit IsEmptyParagraphAllowed = 0; @@ -156,7 +157,7 @@ def Date : BlockCommand<"date">; def Invariant : BlockCommand<"invariant">; def Li : BlockCommand<"li">; def Note : BlockCommand<"note">; -def Par : BlockCommand<"par">; +def Par : BlockCommand<"par"> { let IsParCommand = 1; let NumArgs = 1; } def Post : BlockCommand<"post">; def Pre : BlockCommand<"pre">; def Remark : BlockCommand<"remark">; diff --git a/clang/include/clang/AST/CommentParser.h b/clang/include/clang/AST/CommentParser.h index a2d0e30835e2c..289f0b2c066b9 100644 --- a/clang/include/clang/AST/CommentParser.h +++ b/clang/include/clang/AST/CommentParser.h @@ -105,6 +105,9 @@ class Parser { ArrayRef<Comment::Argument> parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs); + ArrayRef<Comment::Argument> + parseParCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs); + BlockCommandComment *parseBlockCommand(); InlineCommandComment *parseInlineCommand(); @@ -123,4 +126,3 @@ class Parser { } // end namespace clang #endif - diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp index 5baf81a509fb6..bbe93ebc37d13 100644 --- a/clang/lib/AST/CommentParser.cpp +++ b/clang/lib/AST/CommentParser.cpp @@ -222,6 +222,63 @@ class TextTokenRetokenizer { return true; } + /// Check if this line starts with @par or \par + bool startsWithParCommand() { + unsigned Offset = 1; + + /// Skip all whitespace characters at the beginning. + /// This needs to backtrack because Pos has already advanced past the + /// actual \par or @par command by the time this function is called. + while (isWhitespace(*(Pos.BufferPtr - Offset))) + Offset++; + + /// Check if next four characters are \par or @par + llvm::StringRef LineStart(Pos.BufferPtr - 5, 4); + return LineStart.starts_with("\\par") || LineStart.starts_with("@par"); + } + + /// Extract a par command argument-header. + bool lexParHeading(Token &Tok) { + if (isEnd()) + return false; + + Position SavedPos = Pos; + + consumeWhitespace(); + SmallString<32> WordText; + const char *WordBegin = Pos.BufferPtr; + SourceLocation Loc = getSourceLocation(); + + if (!startsWithParCommand()) + return false; + + // Read until the end of this token, which is effectively the end of the + // line This gets us the content of the par header, if there is one. + while (!isEnd()) { + WordText.push_back(peek()); + if (Pos.BufferPtr + 1 == Pos.BufferEnd) { + consumeChar(); + break; + } else { + consumeChar(); + } + } + + const unsigned Length = WordText.size(); + if (Length == 0) { + Pos = SavedPos; + return false; + } + + char *TextPtr = Allocator.Allocate<char>(Length + 1); + + memcpy(TextPtr, WordText.c_str(), Length + 1); + StringRef Text = StringRef(TextPtr, Length); + + formTokenWithChars(Tok, Loc, WordBegin, Length, Text); + return true; + } + /// Extract a word -- sequence of non-whitespace characters. bool lexWord(Token &Tok) { if (isEnd()) @@ -394,6 +451,23 @@ Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer, return llvm::ArrayRef(Args, ParsedArgs); } +ArrayRef<Comment::Argument> +Parser::parseParCommandArgs(TextTokenRetokenizer &Retokenizer, + unsigned NumArgs) { + auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs)) + Comment::Argument[NumArgs]; + unsigned ParsedArgs = 0; + Token Arg; + + while (ParsedArgs < NumArgs && Retokenizer.lexParHeading(Arg)) { + Args[ParsedArgs] = Comment::Argument{ + SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()}; + ParsedArgs++; + } + + return llvm::ArrayRef(Args, ParsedArgs); +} + BlockCommandComment *Parser::parseBlockCommand() { assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command)); @@ -449,6 +523,9 @@ BlockCommandComment *Parser::parseBlockCommand() { else if (Info->IsThrowsCommand) S.actOnBlockCommandArgs( BC, parseThrowCommandArgs(Retokenizer, Info->NumArgs)); + else if (Info->IsParCommand) + S.actOnBlockCommandArgs(BC, + parseParCommandArgs(Retokenizer, Info->NumArgs)); else S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, Info->NumArgs)); diff --git a/clang/test/Index/comment-misc-tags.m b/clang/test/Index/comment-misc-tags.m index 47ee9d9aa392a..6d018dbfcf193 100644 --- a/clang/test/Index/comment-misc-tags.m +++ b/clang/test/Index/comment-misc-tags.m @@ -91,18 +91,16 @@ @interface IOCommandGate struct Test {int filler;}; -// CHECK: (CXComment_BlockCommand CommandName=[par] +// CHECK: (CXComment_BlockCommand CommandName=[par] Arg[0]=User defined paragraph: // CHECK-NEXT: (CXComment_Paragraph -// CHECK-NEXT: (CXComment_Text Text=[ User defined paragraph:] HasTrailingNewline) // CHECK-NEXT: (CXComment_Text Text=[ Contents of the paragraph.]))) // CHECK: (CXComment_BlockCommand CommandName=[par] // CHECK-NEXT: (CXComment_Paragraph -// CHECK-NEXT: (CXComment_Text Text=[ New paragraph under the same heading.]))) +// CHECK-NEXT: (CXComment_Text Text=[New paragraph under the same heading.]))) // CHECK: (CXComment_BlockCommand CommandName=[note] // CHECK-NEXT: (CXComment_Paragraph // CHECK-NEXT: (CXComment_Text Text=[ This note consists of two paragraphs.] HasTrailingNewline) // CHECK-NEXT: (CXComment_Text Text=[ This is the first paragraph.]))) // CHECK: (CXComment_BlockCommand CommandName=[par] // CHECK-NEXT: (CXComment_Paragraph -// CHECK-NEXT: (CXComment_Text Text=[ And this is the second paragraph.]))) - +// CHECK-NEXT: (CXComment_Text Text=[And this is the second paragraph.]))) diff --git a/clang/unittests/AST/CommentParser.cpp b/clang/unittests/AST/CommentParser.cpp index 1c57c899f9074..e0df182d430c3 100644 --- a/clang/unittests/AST/CommentParser.cpp +++ b/clang/unittests/AST/CommentParser.cpp @@ -1639,6 +1639,143 @@ TEST_F(CommentParserTest, ThrowsCommandHasArg9) { } } +TEST_F(CommentParserTest, ParCommandHasArg1) { + const char *Sources[] = { + "/// @par Paragraph header:", "/// @par Paragraph header:\n", + "/// @par Paragraph header:\r\n", "/// @par Paragraph header:\n\r", + "/** @par Paragraph header:*/", + }; + + for (size_t i = 0, e = std::size(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " ")); + { + BlockCommandComment *BCC; + ParagraphComment *PC; + ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "par", PC)); + ASSERT_TRUE(HasChildCount(PC, 0)); + ASSERT_TRUE(BCC->getNumArgs() == 1); + ASSERT_TRUE(BCC->getArgText(0) == "Paragraph header:"); + } + } +} + +TEST_F(CommentParserTest, ParCommandHasArg2) { + const char *Sources[] = { + "/// @par Paragraph header: ", "/// @par Paragraph header: \n", + "/// @par Paragraph header: \r\n", "/// @par Paragraph header: \n\r", + "/** @par Paragraph header: */", + }; + + for (size_t i = 0, e = std::size(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " ")); + { + BlockCommandComment *BCC; + ParagraphComment *PC; + ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "par", PC)); + ASSERT_TRUE(HasChildCount(PC, 0)); + ASSERT_TRUE(BCC->getNumArgs() == 1); + ASSERT_TRUE(BCC->getArgText(0) == "Paragraph header: "); + } + } +} + +TEST_F(CommentParserTest, ParCommandHasArg3) { + const char *Sources[] = { + ("/// @par Paragraph header:\n" + "/// Paragraph body"), + ("/// @par Paragraph header:\r\n" + "/// Paragraph body"), + ("/// @par Paragraph header:\n\r" + "/// Paragraph body"), + }; + + for (size_t i = 0, e = std::size(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " ")); + { + BlockCommandComment *BCC; + ParagraphComment *PC; + TextComment *TC; + ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "par", PC)); + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(BCC->getNumArgs() == 1); + ASSERT_TRUE(BCC->getArgText(0) == "Paragraph header:"); + ASSERT_TRUE(GetChildAt(PC, 0, TC)); + ASSERT_TRUE(TC->getText() == " Paragraph body"); + } + } +} + +TEST_F(CommentParserTest, ParCommandHasArg4) { + const char *Sources[] = { + ("/// @par Paragraph header:\n" + "/// Paragraph body1\n" + "/// Paragraph body2"), + ("/// @par Paragraph header:\r\n" + "/// Paragraph body1\n" + "/// Paragraph body2"), + ("/// @par Paragraph header:\n\r" + "/// Paragraph body1\n" + "/// Paragraph body2"), + }; + + for (size_t i = 0, e = std::size(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " ")); + { + BlockCommandComment *BCC; + ParagraphComment *PC; + TextComment *TC; + ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "par", PC)); + ASSERT_TRUE(HasChildCount(PC, 2)); + ASSERT_TRUE(BCC->getNumArgs() == 1); + ASSERT_TRUE(BCC->getArgText(0) == "Paragraph header:"); + ASSERT_TRUE(GetChildAt(PC, 0, TC)); + ASSERT_TRUE(TC->getText() == " Paragraph body1"); + ASSERT_TRUE(GetChildAt(PC, 1, TC)); + ASSERT_TRUE(TC->getText() == " Paragraph body2"); + } + } +} + +TEST_F(CommentParserTest, ParCommandHasArg5) { + const char *Sources[] = { + ("/// @par \n" + "/// Paragraphs with no text before newline have no heading"), + ("/// @par \r\n" + "/// Paragraphs with no text before newline have no heading"), + ("/// @par \n\r" + "/// Paragraphs with no text before newline have no heading"), + }; + + for (size_t i = 0, e = std::size(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " ")); + { + BlockCommandComment *BCC; + ParagraphComment *PC; + TextComment *TC; + ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "par", PC)); + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(BCC->getNumArgs() == 0); + ASSERT_TRUE(GetChildAt(PC, 0, TC)); + ASSERT_TRUE(TC->getText() == + "Paragraphs with no text before newline have no heading"); + } + } +} } // unnamed namespace diff --git a/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp b/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp index a113b02e19995..07b26dc2f6b8b 100644 --- a/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp +++ b/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp @@ -44,6 +44,7 @@ void clang::EmitClangCommentCommandInfo(RecordKeeper &Records, << Tag.getValueAsBit("IsThrowsCommand") << ", " << Tag.getValueAsBit("IsDeprecatedCommand") << ", " << Tag.getValueAsBit("IsHeaderfileCommand") << ", " + << Tag.getValueAsBit("IsParCommand") << ", " << Tag.getValueAsBit("IsEmptyParagraphAllowed") << ", " << Tag.getValueAsBit("IsVerbatimBlockCommand") << ", " << Tag.getValueAsBit("IsVerbatimBlockEndCommand") << ", " >From ddc9fffccffa061616e61540246200eb0b9bb598 Mon Sep 17 00:00:00 2001 From: hdoc <git...@hdoc.io> Date: Mon, 6 May 2024 13:57:42 -0700 Subject: [PATCH 2/7] Address review feedback --- clang/lib/AST/CommentParser.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp index bbe93ebc37d13..1158df610d560 100644 --- a/clang/lib/AST/CommentParser.cpp +++ b/clang/lib/AST/CommentParser.cpp @@ -259,12 +259,11 @@ class TextTokenRetokenizer { if (Pos.BufferPtr + 1 == Pos.BufferEnd) { consumeChar(); break; - } else { - consumeChar(); } + consumeChar(); } - const unsigned Length = WordText.size(); + unsigned Length = WordText.size(); if (Length == 0) { Pos = SavedPos; return false; @@ -454,6 +453,7 @@ Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer, ArrayRef<Comment::Argument> Parser::parseParCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) { + assert(NumArgs > 0); auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs)) Comment::Argument[NumArgs]; unsigned ParsedArgs = 0; >From 63306cc150da343539008c05adcd9201d307b54b Mon Sep 17 00:00:00 2001 From: hdoc <git...@hdoc.io> Date: Mon, 3 Jun 2024 14:31:40 -0700 Subject: [PATCH 3/7] Small fix to par heading check and comments --- clang/lib/AST/CommentParser.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp index 1158df610d560..24aa1ab8fc189 100644 --- a/clang/lib/AST/CommentParser.cpp +++ b/clang/lib/AST/CommentParser.cpp @@ -222,18 +222,19 @@ class TextTokenRetokenizer { return true; } - /// Check if this line starts with @par or \par + // Check if this line starts with @par or \par bool startsWithParCommand() { unsigned Offset = 1; - /// Skip all whitespace characters at the beginning. - /// This needs to backtrack because Pos has already advanced past the - /// actual \par or @par command by the time this function is called. + // Skip all whitespace characters at the beginning. + // This needs to backtrack because Pos has already advanced past the + // actual \par or @par command by the time this function is called. while (isWhitespace(*(Pos.BufferPtr - Offset))) Offset++; - /// Check if next four characters are \par or @par - llvm::StringRef LineStart(Pos.BufferPtr - 5, 4); + // Once we've reached the whitespace, backtrack and check if the previous four + // characters are \par or @par. + llvm::StringRef LineStart(Pos.BufferPtr - Offset - 3, 4); return LineStart.starts_with("\\par") || LineStart.starts_with("@par"); } @@ -253,7 +254,7 @@ class TextTokenRetokenizer { return false; // Read until the end of this token, which is effectively the end of the - // line This gets us the content of the par header, if there is one. + // line. This gets us the content of the par header, if there is one. while (!isEnd()) { WordText.push_back(peek()); if (Pos.BufferPtr + 1 == Pos.BufferEnd) { >From 55a18a8048f650799bc60c07d7072eea57cd19bd Mon Sep 17 00:00:00 2001 From: hdoc <git...@hdoc.io> Date: Sat, 15 Jun 2024 00:38:40 -0700 Subject: [PATCH 4/7] Run clang-format over failing file Not sure why a failure is being triggered here as we didn't touch this exact section. Our changes affect code a little further down in the file. --- clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp b/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp index 07b26dc2f6b8b..f90ebc463005b 100644 --- a/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp +++ b/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp @@ -32,8 +32,7 @@ void clang::EmitClangCommentCommandInfo(RecordKeeper &Records, Record &Tag = *Tags[i]; OS << " { " << "\"" << Tag.getValueAsString("Name") << "\", " - << "\"" << Tag.getValueAsString("EndCommandName") << "\", " - << i << ", " + << "\"" << Tag.getValueAsString("EndCommandName") << "\", " << i << ", " << Tag.getValueAsInt("NumArgs") << ", " << Tag.getValueAsBit("IsInlineCommand") << ", " << Tag.getValueAsBit("IsBlockCommand") << ", " >From a4fed5fc4fe278dee1990fc708b88eeee0504cb0 Mon Sep 17 00:00:00 2001 From: hdoc <git...@hdoc.io> Date: Sat, 15 Jun 2024 00:40:29 -0700 Subject: [PATCH 5/7] More clang-format fixes --- clang/lib/AST/CommentParser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp index 24aa1ab8fc189..d5e5bb27ceba3 100644 --- a/clang/lib/AST/CommentParser.cpp +++ b/clang/lib/AST/CommentParser.cpp @@ -232,8 +232,8 @@ class TextTokenRetokenizer { while (isWhitespace(*(Pos.BufferPtr - Offset))) Offset++; - // Once we've reached the whitespace, backtrack and check if the previous four - // characters are \par or @par. + // Once we've reached the whitespace, backtrack and check if the previous + // four characters are \par or @par. llvm::StringRef LineStart(Pos.BufferPtr - Offset - 3, 4); return LineStart.starts_with("\\par") || LineStart.starts_with("@par"); } >From caba5c3f5c95950de8394ce69389682d30ec7d7d Mon Sep 17 00:00:00 2001 From: hdoc <git...@hdoc.io> Date: Sat, 15 Jun 2024 14:23:50 -0700 Subject: [PATCH 6/7] Clang format fix --- clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp b/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp index f90ebc463005b..aee7d38786a51 100644 --- a/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp +++ b/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp @@ -52,8 +52,7 @@ void clang::EmitClangCommentCommandInfo(RecordKeeper &Records, << Tag.getValueAsBit("IsFunctionDeclarationCommand") << ", " << Tag.getValueAsBit("IsRecordLikeDetailCommand") << ", " << Tag.getValueAsBit("IsRecordLikeDeclarationCommand") << ", " - << /* IsUnknownCommand = */ "0" - << " }"; + << /* IsUnknownCommand = */ "0" << " }"; if (i + 1 != e) OS << ","; OS << "\n"; >From 7db1807ea540bb707ee295826a4c81d5a82e5620 Mon Sep 17 00:00:00 2001 From: hdoc <git...@hdoc.io> Date: Tue, 18 Jun 2024 19:37:58 -0700 Subject: [PATCH 7/7] Add release notes blurb for \par command comment arg support --- clang/docs/ReleaseNotes.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7112d1f889fef..07642bc482d43 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -150,6 +150,15 @@ here. Generic improvements to Clang as a whole or to its underlying infrastructure are described first, followed by language-specific sections with improvements to Clang's support for those languages. +- The ``\par`` documentation comment command now supports an optional + argument, which denotes the header of the paragraph started by + an instance of the ``\par`` command comment. The implementation + of the argument handling matches its semantics + `in Doxygen <https://www.doxygen.nl/manual/commands.html#cmdpar>`. + Namely, any text on the same line as the ``\par`` command will become + a header for the paragaph, and if there is no text then the command + will start a new paragraph. + C++ Language Changes -------------------- - C++17 support is now completed, with the enablement of the _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits