================ @@ -0,0 +1,567 @@ +//===-- Mustache.cpp ------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Mustache.h" +#include "llvm/Support/Error.h" +#include <sstream> + +using namespace llvm; +using namespace llvm::json; +using namespace llvm::mustache; + +SmallString<0> escapeString(StringRef Input, + DenseMap<char, StringRef> &Escape) { + SmallString<0> Output; + Output.reserve(Input.size()); + for (char C : Input) { + auto It = Escape.find(C); + if (It != Escape.end()) + Output += It->getSecond(); + else + Output += C; + } + return Output; +} + +Accessor split(StringRef Str, char Delimiter) { + Accessor Tokens; + if (Str == ".") { + Tokens.emplace_back(Str); + return Tokens; + } + StringRef Ref(Str); + while (!Ref.empty()) { + StringRef Part; + std::tie(Part, Ref) = Ref.split(Delimiter); + Tokens.emplace_back(Part.trim()); + } + return Tokens; +} + +void addIndentation(llvm::SmallString<0> &PartialStr, size_t IndentationSize) { + std::string Indent(IndentationSize, ' '); + llvm::SmallString<0> Result; + for (size_t I = 0; I < PartialStr.size(); ++I) { + Result.push_back(PartialStr[I]); + if (PartialStr[I] == '\n' && I < PartialStr.size() - 1) + Result.append(Indent); + } + PartialStr = Result; +} + +Token::Token(StringRef RawBody, StringRef InnerBody, char Identifier) + : RawBody(RawBody), TokenBody(InnerBody), Indentation(0) { + TokenType = getTokenType(Identifier); + if (TokenType == Type::Comment) + return; + + StringRef AccessorStr = + TokenType == Type::Variable ? InnerBody : InnerBody.substr(1); + + Accessor = split(AccessorStr.trim(), '.'); +} + +Token::Token(StringRef Str) + : TokenType(Type::Text), RawBody(Str), Accessor({}), TokenBody(Str), + Indentation(0) {} + +Token::Type Token::getTokenType(char Identifier) { + switch (Identifier) { + case '#': + return Type::SectionOpen; + case '/': + return Type::SectionClose; + case '^': + return Type::InvertSectionOpen; + case '!': + return Type::Comment; + case '>': + return Type::Partial; + case '&': + return Type::UnescapeVariable; + default: + return Type::Variable; + } +} + +// Function to check if there's no meaningful text behind +bool noTextBehind(size_t Idx, const SmallVector<Token, 0> &Tokens) { + if (Idx == 0) + return false; + + int PrevIdx = Idx - 1; + if (Tokens[PrevIdx].getType() != Token::Type::Text) + return false; + + const Token &PrevToken = Tokens[Idx - 1]; + StringRef TokenBody = PrevToken.getRawBody().rtrim(" \t\v\t"); + // We make an exception for when previous token is empty + // and the current token is the second token + // ex. + // " {{#section}}A{{/section}}" + // the output of this is + // "A" + return TokenBody.ends_with("\n") || (TokenBody.empty() && Idx == 1); +} + +// Function to check if there's no meaningful text ahead +bool noTextAhead(size_t Idx, const SmallVector<Token, 0> &Tokens) { + if (Idx >= Tokens.size() - 1) + return false; + + int PrevIdx = Idx + 1; + if (Tokens[Idx + 1].getType() != Token::Type::Text) + return false; + + const Token &NextToken = Tokens[Idx + 1]; + StringRef TokenBody = NextToken.getRawBody().ltrim(" "); + return TokenBody.starts_with("\r\n") || TokenBody.starts_with("\n"); +} + +bool requiresCleanUp(Token::Type T) { + // We must clean up all the tokens that could contain child nodes + return T == Token::Type::SectionOpen || T == Token::Type::InvertSectionOpen || + T == Token::Type::SectionClose || T == Token::Type::Comment || + T == Token::Type::Partial; +} + +// Adjust next token body if there is no text ahead +// eg. +// The template string +// "{{! Comment }} \nLine 2" +// would be considered as no text ahead and should be render as +// " Line 2" +void stripTokenAhead(SmallVector<Token, 0> &Tokens, size_t Idx) { + Token &NextToken = Tokens[Idx + 1]; + StringRef NextTokenBody = NextToken.getTokenBody(); + // cut off the leading newline which could be \n or \r\n + if (NextTokenBody.starts_with("\r\n")) + NextToken.setTokenBody(NextTokenBody.substr(2)); + else if (NextTokenBody.starts_with("\n")) + NextToken.setTokenBody(NextTokenBody.substr(1)); +} + +// Adjust previous token body if there no text behind +// eg. +// The template string +// " \t{{#section}}A{{/section}}" +// would be considered as no text ahead and should be render as +// "A" +// The exception for this is partial tag which requires us to +// keep track of the indentation once it's rendered +void stripTokenBefore(SmallVector<Token, 0> &Tokens, size_t Idx, + Token &CurrentToken, Token::Type CurrentType) { + Token &PrevToken = Tokens[Idx - 1]; + StringRef PrevTokenBody = PrevToken.getTokenBody(); + StringRef Unindented = PrevTokenBody.rtrim(" \t\v"); + size_t Indentation = PrevTokenBody.size() - Unindented.size(); + if (CurrentType != Token::Type::Partial) + PrevToken.setTokenBody(Unindented); + CurrentToken.setIndentation(Indentation); +} + +// Simple tokenizer that splits the template into tokens. +// The mustache spec allows {{{ }}} to unescape variables +// but we don't support that here. An unescape variable +// is represented only by {{& variable}}. +SmallVector<Token, 0> tokenize(StringRef Template) { + SmallVector<Token, 0> Tokens; + StringRef Open("{{"); + StringRef Close("}}"); + size_t Start = 0; + size_t DelimiterStart = Template.find(Open); + if (DelimiterStart == StringRef::npos) { + Tokens.emplace_back(Template); + return Tokens; + } + while (DelimiterStart != StringRef::npos) { + if (DelimiterStart != Start) { + Tokens.emplace_back(Template.substr(Start, DelimiterStart - Start)); + } + + size_t DelimiterEnd = Template.find(Close, DelimiterStart); + if (DelimiterEnd == StringRef::npos) { + break; + } + + // Extract the Interpolated variable without delimiters {{ and }} + size_t InterpolatedStart = DelimiterStart + Open.size(); + size_t InterpolatedEnd = DelimiterEnd - DelimiterStart - Close.size(); + SmallString<0> Interpolated = + Template.substr(InterpolatedStart, InterpolatedEnd); + SmallString<0> RawBody({Open, Interpolated, Close}); + Tokens.emplace_back(RawBody, Interpolated, Interpolated[0]); + Start = DelimiterEnd + Close.size(); + DelimiterStart = Template.find(Open, Start); + } + + if (Start < Template.size()) + Tokens.emplace_back(Template.substr(Start)); + + // Fix up white spaces for: + // - open sections + // - inverted sections + // - close sections + // - comments + // + // This loop attempts to find standalone tokens and tries to trim out + // the surrounding whitespace. + // For example: + // if you have the template string + // "Line 1\n {{#section}} \n Line 2 \n {{/section}} \n Line 3" + // The output would be + // "Line 1\n Line 2\n Line 3" + size_t LastIdx = Tokens.size() - 1; + for (size_t Idx = 0, End = Tokens.size(); Idx < End; ++Idx) { + Token &CurrentToken = Tokens[Idx]; + Token::Type CurrentType = CurrentToken.getType(); + // Check if token type requires cleanup + bool RequiresCleanUp = requiresCleanUp(CurrentType); + + if (!RequiresCleanUp) + continue; + + // We adjust the token body if there's no text behind or ahead. + // A token is considered to have no text ahead if the right of the previous + // token is a newline followed by spaces. + // A token is considered to have no text behind if the left of the next + // token is spaces followed by a newline. + // eg. + // "Line 1\n {{#section}} \n Line 2 \n {{/section}} \n Line 3" + bool NoTextBehind = noTextBehind(Idx, Tokens); + bool NoTextAhead = noTextAhead(Idx, Tokens); + + if ((NoTextBehind && NoTextAhead) || (NoTextAhead && Idx == 0)) { + stripTokenAhead(Tokens, Idx); + } + + if (((NoTextBehind && NoTextAhead) || (NoTextBehind && Idx == LastIdx))) { + stripTokenBefore(Tokens, Idx, CurrentToken, CurrentType); + } + } + return Tokens; +} + +class Parser { +public: + Parser(StringRef TemplateStr, BumpPtrAllocator &Allocator) + : Allocator(Allocator), TemplateStr(TemplateStr) {} + + ASTNode *parse(); + +private: + void parseMustache(ASTNode *Parent); + + BumpPtrAllocator &Allocator; + SmallVector<Token, 0> Tokens; + size_t CurrentPtr; + StringRef TemplateStr; +}; + +ASTNode *Parser::parse() { + Tokens = tokenize(TemplateStr); + CurrentPtr = 0; + void *Root = Allocator.Allocate(sizeof(ASTNode), alignof(ASTNode)); + ASTNode *RootNode = new (Root) ASTNode(); + parseMustache(RootNode); + return RootNode; +} + +void Parser::parseMustache(ASTNode *Parent) { + + while (CurrentPtr < Tokens.size()) { + Token CurrentToken = Tokens[CurrentPtr]; + CurrentPtr++; + Accessor A = CurrentToken.getAccessor(); + ASTNode *CurrentNode; + void *Node = Allocator.Allocate(sizeof(ASTNode), alignof(ASTNode)); + + switch (CurrentToken.getType()) { + case Token::Type::Text: { + CurrentNode = new (Node) ASTNode(CurrentToken.getTokenBody(), Parent); + Parent->addChild(CurrentNode); + break; + } + case Token::Type::Variable: { + CurrentNode = new (Node) ASTNode(ASTNode::Variable, A, Parent); + Parent->addChild(CurrentNode); + break; + } + case Token::Type::UnescapeVariable: { + CurrentNode = new (Node) ASTNode(ASTNode::UnescapeVariable, A, Parent); + Parent->addChild(CurrentNode); + break; + } + case Token::Type::Partial: { + CurrentNode = new (Node) ASTNode(ASTNode::Partial, A, Parent); + CurrentNode->setIndentation(CurrentToken.getIndentation()); + Parent->addChild(CurrentNode); + break; + } + case Token::Type::SectionOpen: { + CurrentNode = new (Node) ASTNode(ASTNode::Section, A, Parent); + size_t Start = CurrentPtr; + parseMustache(CurrentNode); + size_t End = CurrentPtr; + SmallString<0> RawBody; + for (std::size_t I = Start; I < End - 1; I++) + RawBody += Tokens[I].getRawBody(); + CurrentNode->setRawBody(RawBody); + Parent->addChild(CurrentNode); + break; + } + case Token::Type::InvertSectionOpen: { + CurrentNode = new (Node) ASTNode(ASTNode::InvertSection, A, Parent); + size_t Start = CurrentPtr; + parseMustache(CurrentNode); + size_t End = CurrentPtr; + SmallString<0> RawBody; + for (size_t Idx = Start; Idx < End - 1; Idx++) + RawBody += Tokens[Idx].getRawBody(); + CurrentNode->setRawBody(RawBody); + Parent->addChild(CurrentNode); + break; + } + case Token::Type::Comment: + break; + case Token::Type::SectionClose: + return; + } + } +} + +StringRef Template::render(Value &Data) { + Tree->render(Data, Output); + LocalAllocator.Reset(); + return Output.str(); +} + +void Template::registerPartial(StringRef Name, StringRef Partial) { + Parser P = Parser(Partial, Allocator); + ASTNode *PartialTree = P.parse(); + PartialTree->setUpNode(LocalAllocator, Partials, Lambdas, SectionLambdas, + Escapes); + Partials.insert(std::make_pair(Name, PartialTree)); +} + +void Template::registerLambda(StringRef Name, Lambda L) { Lambdas[Name] = L; } + +void Template::registerLambda(StringRef Name, SectionLambda L) { + SectionLambdas[Name] = L; +} + +void Template::registerEscape(DenseMap<char, StringRef> E) { Escapes = E; } + +Template::Template(StringRef TemplateStr) { + Parser P = Parser(TemplateStr, Allocator); + Tree = P.parse(); + // the default behaviour is to escape html entities + DenseMap<char, StringRef> HtmlEntities = {{'&', "&"}, + {'<', "<"}, + {'>', ">"}, + {'"', """}, + {'\'', "'"}}; + registerEscape(HtmlEntities); + Tree->setUpNode(LocalAllocator, Partials, Lambdas, SectionLambdas, Escapes); +} + +void toJsonString(const Value &Data, SmallString<0> &Output) { + if (Data.getAsNull()) + return; + if (auto *Arr = Data.getAsArray()) + if (Arr->empty()) + return; + if (Data.getAsString()) { + Output = Data.getAsString()->str(); + return; + } + if (auto Num = Data.getAsNumber()) { + std::ostringstream Oss; + Oss << *Num; + Output = Oss.str(); + return; + } + Output = formatv("{0:2}", Data); +} + +bool isFalsey(const Value &V) { + return V.getAsNull() || (V.getAsBoolean() && !V.getAsBoolean().value()) || + (V.getAsArray() && V.getAsArray()->empty()) || + (V.getAsObject() && V.getAsObject()->empty()); +} + +void ASTNode::render(const Value &Data, SmallString<0> &Output) { ---------------- PeterChou1 wrote:
I think you're viewing an outdated version of the PR I've already refactor this function to use OStream? https://github.com/llvm/llvm-project/pull/105893 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits