lolleko updated this revision to Diff 251334.
lolleko edited the summary of this revision.
lolleko added a comment.

Addressed review.

Paragraphs (lines separated by a double line break `\n\n`) are currently not 
retained. Because the current implementation of markup::Paragraph only appends 
a single white space after it's contents 
(https://github.com/llvm/llvm-project/blob/e26e9ba288ce156b851504ebbb7d8a775572957c/clang-tools-extra/clangd/FormattedString.cpp#L368).
I think this is semantically wrong because in natural language aswell as markup 
languages like md, html, ... a paragraph should be followed by a blank line.
But since a lot of code relies on that single white space paragraph, this 
should be addressed in a different patch.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76094/new/

https://reviews.llvm.org/D76094

Files:
  clang-tools-extra/clangd/Hover.cpp
  clang-tools-extra/clangd/unittests/HoverTests.cpp

Index: clang-tools-extra/clangd/unittests/HoverTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/HoverTests.cpp
+++ clang-tools-extra/clangd/unittests/HoverTests.cpp
@@ -1883,6 +1883,118 @@
   }
 }
 
+TEST(Hover, LineBreakConversionDocs) {
+  struct Case {
+    const std::function<void(HoverInfo &)> Builder;
+    llvm::StringRef ExpectedRenderMarkdown;
+    llvm::StringRef ExpectedRenderPlainText;
+  } Cases[] = {{
+                   [](HoverInfo &HI) { HI.Documentation = "foo\n\n\nbar"; },
+                   "foo  \nbar",
+                   "foo\nbar",
+               },
+               {
+                   [](HoverInfo &HI) { HI.Documentation = "foo\n\n\n\tbar"; },
+                   "foo  \nbar",
+                   "foo\nbar",
+               },
+               {
+                   [](HoverInfo &HI) { HI.Documentation = "foo\n\n\n bar"; },
+                   "foo  \nbar",
+                   "foo\nbar",
+               },
+               {
+                   [](HoverInfo &HI) { HI.Documentation = "foo.\nbar"; },
+                   "foo.  \nbar",
+                   "foo.\nbar",
+               },
+               {
+                   [](HoverInfo &HI) { HI.Documentation = "foo:\nbar"; },
+                   "foo:  \nbar",
+                   "foo:\nbar",
+               },
+               {
+                   [](HoverInfo &HI) { HI.Documentation = "foo,\nbar"; },
+                   "foo,  \nbar",
+                   "foo,\nbar",
+               },
+               {
+                   [](HoverInfo &HI) { HI.Documentation = "foo;\nbar"; },
+                   "foo;  \nbar",
+                   "foo;\nbar",
+               },
+               {
+                   [](HoverInfo &HI) { HI.Documentation = "foo!\nbar"; },
+                   "foo!  \nbar",
+                   "foo!\nbar",
+               },
+               {
+                   [](HoverInfo &HI) { HI.Documentation = "foo?\nbar"; },
+                   "foo?  \nbar",
+                   "foo?\nbar",
+               },
+               {
+                   [](HoverInfo &HI) { HI.Documentation = "foo\n-bar"; },
+                   "foo  \n-bar",
+                   "foo\n-bar",
+               },
+               {
+                   [](HoverInfo &HI) { HI.Documentation = "foo\n*bar"; },
+                   // TODO `*` should probably not be escaped after line break
+                   "foo  \n\\*bar",
+                   "foo\n*bar",
+               },
+               {
+                   [](HoverInfo &HI) { HI.Documentation = "foo\n@bar"; },
+                   "foo  \n@bar",
+                   "foo\n@bar",
+               },
+               {
+                   [](HoverInfo &HI) { HI.Documentation = "foo\n\\bar"; },
+                   // TODO `\` should probably not be escaped after line break
+                   "foo  \n\\\\bar",
+                   "foo\n\\bar",
+               },
+               {
+                   [](HoverInfo &HI) { HI.Documentation = "foo\n>bar"; },
+                   // TODO `>` should probably not be escaped after line break
+                   "foo  \n\\>bar",
+                   "foo\n>bar",
+               },
+               {
+                   [](HoverInfo &HI) { HI.Documentation = "foo\n#bar"; },
+                   "foo  \n#bar",
+                   "foo\n#bar",
+               },
+               {
+                   [](HoverInfo &HI) { HI.Documentation = "foo  \nbar"; },
+                   "foo  \nbar",
+                   "foo\nbar",
+               },
+               {
+                   [](HoverInfo &HI) { HI.Documentation = "foo    \nbar"; },
+                   "foo  \nbar",
+                   "foo\nbar",
+               },
+               {
+                   [](HoverInfo &HI) { HI.Documentation = "foo\\\nbar"; },
+                   "foo  \nbar",
+                   "foo\nbar",
+               },
+               {
+                   [](HoverInfo &HI) { HI.Documentation = "foo\nbar"; },
+                   "foo bar",
+                   "foo bar",
+               }};
+
+  for (const auto &C : Cases) {
+    HoverInfo HI;
+    C.Builder(HI);
+    EXPECT_EQ(HI.present().asMarkdown().erase(0, 12), C.ExpectedRenderMarkdown);
+    EXPECT_EQ(HI.present().asPlainText(), C.ExpectedRenderPlainText);
+  }
+}
+
 // This is a separate test as headings don't create any differences in plaintext
 // mode.
 TEST(Hover, PresentHeadings) {
Index: clang-tools-extra/clangd/Hover.cpp
===================================================================
--- clang-tools-extra/clangd/Hover.cpp
+++ clang-tools-extra/clangd/Hover.cpp
@@ -520,6 +520,101 @@
   }
   return llvm::None;
 }
+
+// Parses in documentation comments and tries to preserve the markup in the
+// comment.
+// Currently only linebreaks are handled.
+void parseDocumentation(std::string Input, markup::Document &Output) {
+
+  auto IsParagraphLineBreak = [](llvm::StringRef Str, size_t LineBreakIndex) {
+    if (LineBreakIndex + 1 >= Str.size()) {
+      return false;
+    }
+    auto NextNonSpaceCharIndex = Str.find_first_not_of(' ', LineBreakIndex + 1);
+
+    return NextNonSpaceCharIndex != llvm::StringRef::npos &&
+           Str[NextNonSpaceCharIndex] == '\n';
+  };
+
+  auto IsMardkownLineBreak = [](llvm::StringRef Str, size_t LineBreakIndex) {
+    return LineBreakIndex > 1 &&
+           (Str[LineBreakIndex - 1] == '\\' ||
+            (Str[LineBreakIndex - 1] == ' ' && Str[LineBreakIndex - 2] == ' '));
+  };
+
+  auto IsPunctuationLineBreak = [](llvm::StringRef Str, size_t LineBreakIndex) {
+    constexpr llvm::StringLiteral Punctuation = R"txt(.:,;!?)txt";
+
+    return LineBreakIndex > 0 &&
+           Punctuation.find(Str[LineBreakIndex - 1]) != llvm::StringRef::npos;
+  };
+
+  auto IsFollowedByHardLineBreakIndicator = [](llvm::StringRef Str,
+                                               size_t LineBreakIndex) {
+    // '-'/'*' md list, '@'/'\' documentation command, '>' md blockquote,
+    // '#' headings, '`' code blocks
+    constexpr llvm::StringLiteral LinbreakIdenticators = R"txt(-*@\>#`)txt";
+
+    auto NextNonSpaceCharIndex = Str.find_first_not_of(' ', LineBreakIndex + 1);
+
+    if (NextNonSpaceCharIndex == llvm::StringRef::npos) {
+      return false;
+    }
+
+    auto FollowedBySingleCharIndicator =
+        LinbreakIdenticators.find(Str[NextNonSpaceCharIndex]) !=
+        llvm::StringRef::npos;
+
+    auto FollowedByNumberedListIndicator =
+        llvm::isDigit(Str[NextNonSpaceCharIndex]) &&
+        NextNonSpaceCharIndex + 1 < Str.size() &&
+        (Str[NextNonSpaceCharIndex + 1] == '.' ||
+         Str[NextNonSpaceCharIndex + 1] == ')');
+
+    return FollowedBySingleCharIndicator || FollowedByNumberedListIndicator;
+  };
+
+  constexpr auto WhiteSpaceChars = "\t\n\v\f\r ";
+
+  auto TrimmedInput = llvm::StringRef(Input).trim();
+
+  std::string CurrentLine;
+
+  for (size_t CharIndex = 0; CharIndex < TrimmedInput.size();) {
+    if (TrimmedInput[CharIndex] == '\n') {
+      // Trim whitespace infront of linebreak, also get rif of markdown
+      // linebreak character `\`
+      CurrentLine.erase(
+          CurrentLine.find_last_not_of(std::string(WhiteSpaceChars) + "\\") +
+          1);
+
+      if (IsParagraphLineBreak(TrimmedInput, CharIndex)) {
+        // TODO this should add an actual paragraph (double linebreak)
+        Output.addParagraph().appendText(CurrentLine);
+        CurrentLine = "";
+      } else if (IsMardkownLineBreak(TrimmedInput, CharIndex) ||
+                 IsPunctuationLineBreak(TrimmedInput, CharIndex) ||
+                 IsFollowedByHardLineBreakIndicator(TrimmedInput, CharIndex)) {
+        Output.addParagraph().appendText(CurrentLine);
+        CurrentLine = "";
+      } else {
+        // Ommit linebreak
+        CurrentLine += ' ';
+      }
+
+      CharIndex++;
+      // After a linebreak always remove spaces to avoid 4 space markdown code
+      // blocks, also skip all additional linebreaks since they have no effect
+      CharIndex = TrimmedInput.find_first_not_of(WhiteSpaceChars, CharIndex);
+    } else {
+      CurrentLine += TrimmedInput[CharIndex];
+      CharIndex++;
+    }
+  }
+  if (!CurrentLine.empty()) {
+    Output.addParagraph().appendText(CurrentLine);
+  }
+}
 } // namespace
 
 llvm::Optional<HoverInfo> getHover(ParsedAST &AST, Position Pos,
@@ -652,7 +747,7 @@
   }
 
   if (!Documentation.empty())
-    Output.addParagraph().appendText(Documentation);
+    parseDocumentation(Documentation, Output);
 
   if (!Definition.empty()) {
     Output.addRuler();
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to