================ @@ -0,0 +1,99 @@ +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MD_PARSER_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MD_PARSER_H +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/StringSaver.h" +#include <list> + +using namespace llvm; + +namespace clang { +namespace doc { +using llvm::SmallString; +enum class MDState { Emphasis, Strong, None }; + +enum class MDType { + Paragraph, + Emphasis, + Strong, + Text, + Softbreak, +}; + +enum class MDTokenType { LeftDelimiterRun, RightDelimiterRun, Text }; + +struct Node { + SmallVector<Node*> Children; + MDType Type; + Node *Parent; + std::string Content; +}; + +struct DelimiterContext { + bool RightFlanking; + bool LeftFlanking; + bool CanOpen; + bool CanClose; + char DelimChar; + // Since Content is a StringRef, we separately track the length so that we can + // decrement when necessary without modifying the string. + size_t Length; +}; + +/// A LineNode might be a valid delimiter run, text, or a delimiter run that +/// will later be merged with a text if there is no matching run e.g. ***foo. +/// @brief A preprocessing structure for tracking text in a line. +struct LineNode { + StringRef Content; + // Instantiated if the line is a delimiter run. + std::optional<DelimiterContext> DelimiterContext; +}; + +class MarkdownParser { + // MDState State; + BumpPtrAllocator Arena; + StringSaver Saver; + + /// If a delimiter is found, determine if it is a delimiter run, what type of + /// run it is, and whether it can be an opener or closer. + /// + /// The CommonMark specification defines delimiter runs as: + /// A delimiter run is either a sequence of one or more * or _ characters that + /// is not preceded or followed by a non-backslash-escaped * or _ character + /// + /// A left-flanking delimiter run is a delimiter run that is (1) not followed + /// by Unicode whitespace, and either (2a) not followed by a Unicode + /// punctuation character, or (2b) followed by a Unicode punctuation character + /// and preceded by Unicode whitespace or a Unicode punctuation character. + /// + /// A right-flanking delimiter run is a delimiter run that is (1) not preceded + /// by Unicode whitespace, and either (2a) not preceded by a Unicode + /// punctuation character, or (2b) preceded by a Unicode punctuation character + /// and followed by Unicode whitespace or a Unicode punctuation character. + /// + /// @param IdxOrigin the index of * or _ that might start a delimiter run. + /// @return A pair denoting the type of run and the index where the run stops + std::pair<std::optional<DelimiterContext>, size_t> + processDelimiters(SmallString<64> &Line, const size_t &Origin = 0); ---------------- ilovepi wrote:
```suggestion /// @param Origin the index of * or _ that might start a delimiter run. /// @return A pair denoting the type of run and the index where the run stops std::pair<std::optional<DelimiterContext>, size_t> processDelimiters(SmallString<64> &Line, const size_t &Origin = 0); ``` The doc name seems stale. I do wonder if you should just use `Start` or `StartIdx`, though. https://github.com/llvm/llvm-project/pull/155887 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits