lolleko created this revision. lolleko added a reviewer: sammccall. lolleko added a project: clang. Herald added subscribers: cfe-commits, usaxena95, kadircet, arphaman, jkorous, MaskRay, ilya-biryukov.
`convertLineBreaks` retains hard line breaks and removes soft line breaks. Wether a line break is hard or soft is determined by the following rules (some of which have been discussed in https://github.com/clangd/clangd/issues/95): - Hard Markdown line breaks (https://github.github.com/gfm/#hard-line-breaks) are retained - Line breaks that are preceded by a punctuation are retained - Line breaks that are followed by "interesting characters" (e.g. Markdown syntax, doxygen commands) are retained - All other line breaks are removed Even though markdown doc comments in cpp are still rare, I removed the markdown escaping. I think the chance that markdown constructs are introduced by accident is fairly low especially with proper line breaks. That means that doc comments which use markdown are now supported and should be rendered properly (in supported editors). In addition `canonicalizeSpaces` has been removed. `canonicalizeSpaces` removed duplicate whitespaces anywhere in a string. Now only (duplicate) whitespaces before and after a line break are trimmed. Since this is my first contribution feel free to be extra thorough. Fixes https://github.com/clangd/clangd/issues/301, Potentially resolves https://github.com/clangd/clangd/issues/95 Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D76094 Files: clang-tools-extra/clangd/FormattedString.cpp clang-tools-extra/clangd/unittests/FormattedStringTests.cpp clang-tools-extra/clangd/unittests/HoverTests.cpp
Index: clang-tools-extra/clangd/unittests/HoverTests.cpp =================================================================== --- clang-tools-extra/clangd/unittests/HoverTests.cpp +++ clang-tools-extra/clangd/unittests/HoverTests.cpp @@ -1905,7 +1905,7 @@ llvm::StringRef ExpectedMarkdown = R"md(### variable `foo` --- -Value \= `val` +Value = `val` --- ```cpp Index: clang-tools-extra/clangd/unittests/FormattedStringTests.cpp =================================================================== --- clang-tools-extra/clangd/unittests/FormattedStringTests.cpp +++ clang-tools-extra/clangd/unittests/FormattedStringTests.cpp @@ -20,19 +20,6 @@ TEST(Render, Escaping) { // Check some ASCII punctuation Paragraph P; - P.appendText("*!`"); - EXPECT_EQ(P.asMarkdown(), "\\*\\!\\`"); - - // Check all ASCII punctuation. - P = Paragraph(); - std::string Punctuation = R"txt(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)txt"; - // Same text, with each character escaped. - std::string EscapedPunctuation; - EscapedPunctuation.reserve(2 * Punctuation.size()); - for (char C : Punctuation) - EscapedPunctuation += std::string("\\") + C; - P.appendText(Punctuation); - EXPECT_EQ(P.asMarkdown(), EscapedPunctuation); // In code blocks we don't need to escape ASCII punctuation. P = Paragraph(); @@ -98,7 +85,7 @@ } TEST(Paragraph, ExtraSpaces) { - // Make sure spaces inside chunks are dropped. + // Make sure spaces after linebreaks are dropped. Paragraph P; P.appendText("foo\n \t baz"); P.appendCode(" bar\n"); @@ -106,15 +93,92 @@ EXPECT_EQ(P.asPlainText(), "foo baz bar"); } -TEST(Paragraph, NewLines) { +TEST(Paragraph, LineBreakTrim) { // New lines before and after chunks are dropped. Paragraph P; P.appendText(" \n foo\nbar\n "); P.appendCode(" \n foo\nbar \n "); - EXPECT_EQ(P.asMarkdown(), "foo bar `foo bar`"); + EXPECT_EQ(P.asMarkdown(), "foo bar `foo\nbar`"); EXPECT_EQ(P.asPlainText(), "foo bar foo bar"); } +TEST(Paragraph, LineBreakConversionParagraphs) { + // Paragraphs are eindicated by double linebreaks + // additional linebreaks and spaces should be dropped + constexpr auto Expected = "foo\n\nbar"; + + Paragraph P1; + P1.appendText("foo\n\n\n\nbar"); + EXPECT_EQ(P1.asMarkdown(), Expected); + EXPECT_EQ(P1.asPlainText(), Expected); + + Paragraph P2; + P2.appendText("foo\n\n\n\tbar"); + EXPECT_EQ(P2.asMarkdown(), Expected); + EXPECT_EQ(P2.asPlainText(), Expected); + + Paragraph P3; + P3.appendText("foo\n\n\n bar"); + EXPECT_EQ(P3.asMarkdown(), Expected); + EXPECT_EQ(P3.asPlainText(), Expected); +} + +TEST(Paragraph, LineBreakConversionPunctuation) { + // Keep linebreaks after punctuation + constexpr llvm::StringLiteral Punctuation = R"txt(.:,;!?)txt"; + + for (auto Char : Punctuation) { + const auto TestString = std::string("foo") + Char + "\nbar"; + const auto MarkdownExpected = std::string("foo") + Char + " \nbar"; + + Paragraph P; + P.appendText(TestString); + EXPECT_EQ(P.asMarkdown(), MarkdownExpected); + EXPECT_EQ(P.asPlainText(), TestString); + } +} + +TEST(Paragraph, LineBreakConversionIndicator) { + // Keep linebreaks before interesting characters + constexpr llvm::StringLiteral LinbreakIdenticators = R"txt(-*@\>#`)txt"; + + for (auto Char : LinbreakIdenticators) { + const auto TestString = std::string("foo\n") + Char + "bar"; + const auto MarkdownExpected = std::string("foo \n") + Char + "bar"; + + Paragraph P; + P.appendText(TestString); + EXPECT_EQ(P.asMarkdown(), MarkdownExpected); + EXPECT_EQ(P.asPlainText(), TestString); + } +} + +TEST(Paragraph, LineBreakConversionMarkdownLineBreaks) { + // Keep Markdown linebreaks + Paragraph P1; + P1.appendText("foo \nbar"); + EXPECT_EQ(P1.asMarkdown(), "foo \nbar"); + EXPECT_EQ(P1.asPlainText(), "foo bar"); + + Paragraph P2; + P2.appendText("foo \nbar"); + EXPECT_EQ(P2.asMarkdown(), "foo \nbar"); + EXPECT_EQ(P2.asPlainText(), "foo bar"); + + Paragraph P3; + P3.appendText("foo\\\nbar"); + EXPECT_EQ(P3.asMarkdown(), "foo\\\nbar"); + EXPECT_EQ(P3.asPlainText(), "foo\\ bar"); +} + +TEST(Paragraph, LineBreakConversionRemoveSoftLineBreaks) { + // Remove soft linebreaks with no interesting context + Paragraph P1; + P1.appendText("foo\nbar"); + EXPECT_EQ(P1.asMarkdown(), "foo bar"); + EXPECT_EQ(P1.asPlainText(), "foo bar"); +} + TEST(Document, Separators) { Document D; D.addParagraph().appendText("foo"); Index: clang-tools-extra/clangd/FormattedString.cpp =================================================================== --- clang-tools-extra/clangd/FormattedString.cpp +++ clang-tools-extra/clangd/FormattedString.cpp @@ -26,23 +26,122 @@ namespace markup { namespace { -/// Escape a markdown text block. Ensures the punctuation will not introduce -/// any of the markdown constructs. -std::string renderText(llvm::StringRef Input) { - // Escaping ASCII punctuation ensures we can't start a markdown construct. - constexpr llvm::StringLiteral Punctuation = - R"txt(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)txt"; +bool isCharLineBreak(llvm::StringRef Str, size_t LineBreakIndex) { + return Str[LineBreakIndex] == '\n'; +} + +bool isCharParagraphLineBreak(llvm::StringRef Str, size_t LineBreakIndex) { + if (!isCharLineBreak(Str, LineBreakIndex) || + LineBreakIndex + 1 >= Str.size()) { + return false; + } + auto NextNonSpaceCharIndex = Str.find_first_not_of(' ', LineBreakIndex + 1); + + return NextNonSpaceCharIndex != llvm::StringRef::npos && + Str[NextNonSpaceCharIndex] == '\n'; +} + +bool isCharMardkownLineBreak(llvm::StringRef Str, size_t LineBreakIndex) { + if (!isCharLineBreak(Str, LineBreakIndex) || LineBreakIndex <= 2) { + return false; + } + + if (Str[LineBreakIndex - 1] == '\\') { + return true; + } + + if ((Str[LineBreakIndex - 1] == ' ' && Str[LineBreakIndex - 2] == ' ')) { + return true; + } + + return false; +} + +bool isCharPunctuationLineBreak(llvm::StringRef Str, size_t LineBreakIndex) { + if (!isCharLineBreak(Str, LineBreakIndex) || LineBreakIndex <= 1) { + return false; + } + + constexpr llvm::StringLiteral Punctuation = R"txt(.:,;!?)txt"; + + return Punctuation.find(Str[LineBreakIndex - 1]) != llvm::StringRef::npos; +} + +bool isCharFollowedByHardLineBreakIndicator(llvm::StringRef Str, + size_t LineBreakIndex) { + if (!isCharLineBreak(Str, LineBreakIndex)) { + return false; + } + + // '-'/'*' md list, '@'/'\' documentation command, '>' md blockquote, + // '#' headings, '`' code blocks + constexpr llvm::StringLiteral LinbreakIdenticators = R"txt(-*@\>#`)txt"; + + auto NextNonSpaceCharIndex = Str.find_first_not_of(' ', LineBreakIndex + 1); + + if (NextNonSpaceCharIndex == llvm::StringRef::npos) { + + return false; + } + + auto FollowedBySingleCharIndicator = + LinbreakIdenticators.find(Str[NextNonSpaceCharIndex]) != + llvm::StringRef::npos; + + auto FollowedByNumberedListIndicator = + llvm::isDigit(Str[NextNonSpaceCharIndex]) && + NextNonSpaceCharIndex + 1 < Str.size() && + (Str[NextNonSpaceCharIndex + 1] == '.' || + Str[NextNonSpaceCharIndex + 1] == ')'); + + return FollowedBySingleCharIndicator || FollowedByNumberedListIndicator; +} + +// Tries to estimate which linebreak is meant to be soft and which is meant to +// be hard. Soft linebreaks are removed. Hard linebreaks are retained or +// converted to markdown linebreaks if \p Markdown is set. +std::string convertLineBreaks(llvm::StringRef Input, bool Markdown = false) { + Input = Input.trim(); + + constexpr auto WhiteSpaceChars = "\t\n\v\f\r "; std::string R; - for (size_t From = 0; From < Input.size();) { - size_t Next = Input.find_first_of(Punctuation, From); - R += Input.substr(From, Next - From); - if (Next == llvm::StringRef::npos) - break; - R += "\\"; - R += Input[Next]; + R.reserve(Input.size()); - From = Next + 1; + std::string HardLinebreak = "\n"; + if (Markdown) { + HardLinebreak = " \n"; + } + + for (size_t CharIndex = 0; CharIndex < Input.size();) { + if (isCharLineBreak(Input, CharIndex)) { + // Trim spaces infront of linebreak + R.erase(R.find_last_not_of(WhiteSpaceChars) + 1); + if (isCharParagraphLineBreak(Input, CharIndex)) { + R += "\n\n"; + } else if (Markdown && isCharMardkownLineBreak(Input, CharIndex)) { + if (Input[CharIndex - 1] == '\\') { + R += '\n'; + } else { + R += HardLinebreak; + } + } else if (isCharPunctuationLineBreak(Input, CharIndex)) { + R += HardLinebreak; + } else if (isCharFollowedByHardLineBreakIndicator(Input, CharIndex)) { + R += HardLinebreak; + } else { + // Omit line break -> replace with space + R += ' '; + } + CharIndex++; + // After a linebreak always remove spaces to avoid 4 space markdown code + // blocks, also skip all additional linebreaks since they have no effect + CharIndex = Input.find_first_not_of(WhiteSpaceChars, CharIndex); + } else { + // copy the rest of the string + R += Input[CharIndex]; + CharIndex++; + } } return R; } @@ -94,26 +193,6 @@ return std::string(/*Repeat=*/std::max(3u, MaxBackticks + 1), '`'); } -// Trims the input and concatenates whitespace blocks into a single ` `. -std::string canonicalizeSpaces(std::string Input) { - // Goes over the string and preserves only a single ` ` for any whitespace - // chunks, the rest is moved to the end of the string and dropped in the end. - auto WritePtr = Input.begin(); - llvm::SmallVector<llvm::StringRef, 4> Words; - llvm::SplitString(Input, Words); - if (Words.empty()) - return ""; - // Go over each word and add it to the string. - for (llvm::StringRef Word : Words) { - if (WritePtr > Input.begin()) - *WritePtr++ = ' '; // Separate from previous block. - llvm::for_each(Word, [&WritePtr](const char C) { *WritePtr++ = C; }); - } - // Get rid of extra spaces. - Input.resize(WritePtr - Input.begin()); - return Input; -} - std::string renderBlocks(llvm::ArrayRef<std::unique_ptr<Block>> Children, void (Block::*RenderFunc)(llvm::raw_ostream &) const) { std::string R; @@ -236,7 +315,7 @@ OS << Sep; switch (C.Kind) { case Chunk::PlainText: - OS << renderText(C.Contents); + OS << convertLineBreaks(C.Contents, true); break; case Chunk::InlineCode: OS << renderInlineBlock(C.Contents); @@ -253,7 +332,7 @@ void Paragraph::renderPlainText(llvm::raw_ostream &OS) const { llvm::StringRef Sep = ""; for (auto &C : Chunks) { - OS << Sep << C.Contents; + OS << Sep << convertLineBreaks(C.Contents); Sep = " "; } OS << '\n'; @@ -278,7 +357,7 @@ } Paragraph &Paragraph::appendText(std::string Text) { - Text = canonicalizeSpaces(std::move(Text)); + Text = llvm::StringRef(Text).trim().str(); if (Text.empty()) return *this; Chunks.emplace_back(); @@ -289,7 +368,7 @@ } Paragraph &Paragraph::appendCode(std::string Code) { - Code = canonicalizeSpaces(std::move(Code)); + Code = llvm::StringRef(Code).trim().str(); if (Code.empty()) return *this; Chunks.emplace_back();
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits