Author: mprobst Date: Thu Aug 25 05:13:21 2016 New Revision: 279727 URL: http://llvm.org/viewvc/llvm-project?rev=279727&view=rev Log: clang-format: [JS] nested and tagged template strings.
JavaScript template strings can be nested arbitrarily: foo = `text ${es.map(e => { return `<${e}>`; })} text`; This change lexes nested template strings using a stack of lexer states to correctly switch back to template string lexing on closing braces. Also, reuse the same stack for the token-stashed logic. Reviewers: djasper Subscribers: cfe-commits, klimek Differential Revision: https://reviews.llvm.org/D22431 Modified: cfe/trunk/lib/Format/FormatTokenLexer.cpp cfe/trunk/lib/Format/FormatTokenLexer.h cfe/trunk/lib/Format/TokenAnnotator.cpp cfe/trunk/unittests/Format/FormatTestJS.cpp Modified: cfe/trunk/lib/Format/FormatTokenLexer.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/FormatTokenLexer.cpp?rev=279727&r1=279726&r2=279727&view=diff ============================================================================== --- cfe/trunk/lib/Format/FormatTokenLexer.cpp (original) +++ cfe/trunk/lib/Format/FormatTokenLexer.cpp Thu Aug 25 05:13:21 2016 @@ -26,12 +26,11 @@ namespace format { FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, const FormatStyle &Style, encoding::Encoding Encoding) - : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false), - LessStashed(false), Column(0), TrailingWhitespace(0), - SourceMgr(SourceMgr), ID(ID), Style(Style), - IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable), - Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false), - MacroBlockBeginRegex(Style.MacroBlockBegin), + : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}), + Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), + Style(Style), IdentTable(getFormattingLangOpts(Style)), + Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0), + FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin), MacroBlockEndRegex(Style.MacroBlockEnd) { Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr, getFormattingLangOpts(Style))); @@ -49,7 +48,7 @@ ArrayRef<FormatToken *> FormatTokenLexer Tokens.push_back(getNextToken()); if (Style.Language == FormatStyle::LK_JavaScript) { tryParseJSRegexLiteral(); - tryParseTemplateString(); + handleTemplateStrings(); } tryMergePreviousTokens(); if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) @@ -228,17 +227,42 @@ void FormatTokenLexer::tryParseJSRegexLi resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset))); } -void FormatTokenLexer::tryParseTemplateString() { +void FormatTokenLexer::handleTemplateStrings() { FormatToken *BacktickToken = Tokens.back(); - if (!BacktickToken->is(tok::unknown) || BacktickToken->TokenText != "`") + + if (BacktickToken->is(tok::l_brace)) { + StateStack.push(LexerState::NORMAL); return; + } + if (BacktickToken->is(tok::r_brace)) { + StateStack.pop(); + if (StateStack.top() != LexerState::TEMPLATE_STRING) + return; + // If back in TEMPLATE_STRING, fallthrough and continue parsing the + } else if (BacktickToken->is(tok::unknown) && + BacktickToken->TokenText == "`") { + StateStack.push(LexerState::TEMPLATE_STRING); + } else { + return; // Not actually a template + } // 'Manually' lex ahead in the current file buffer. const char *Offset = Lex->getBufferLocation(); const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`" - for (; Offset != Lex->getBuffer().end() && *Offset != '`'; ++Offset) { - if (*Offset == '\\') + for (; Offset != Lex->getBuffer().end(); ++Offset) { + if (Offset[0] == '`') { + StateStack.pop(); + break; + } + if (Offset[0] == '\\') { ++Offset; // Skip the escaped character. + } else if (Offset + 1 < Lex->getBuffer().end() && Offset[0] == '$' && + Offset[1] == '{') { + // '${' introduces an expression interpolation in the template string. + StateStack.push(LexerState::NORMAL); + ++Offset; + break; + } } StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1); @@ -262,7 +286,10 @@ void FormatTokenLexer::tryParseTemplateS Style.TabWidth, Encoding); } - resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1))); + SourceLocation loc = Offset < Lex->getBuffer().end() + ? Lex->getSourceLocation(Offset + 1) + : SourceMgr.getLocForEndOfFile(ID); + resetLexer(SourceMgr.getFileOffset(loc)); } bool FormatTokenLexer::tryMerge_TMacro() { @@ -384,12 +411,8 @@ FormatToken *FormatTokenLexer::getStashe } FormatToken *FormatTokenLexer::getNextToken() { - if (GreaterStashed) { - GreaterStashed = false; - return getStashedToken(); - } - if (LessStashed) { - LessStashed = false; + if (StateStack.top() == LexerState::TOKEN_STASHED) { + StateStack.pop(); return getStashedToken(); } @@ -500,11 +523,11 @@ FormatToken *FormatTokenLexer::getNextTo } else if (FormatTok->Tok.is(tok::greatergreater)) { FormatTok->Tok.setKind(tok::greater); FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); - GreaterStashed = true; + StateStack.push(LexerState::TOKEN_STASHED); } else if (FormatTok->Tok.is(tok::lessless)) { FormatTok->Tok.setKind(tok::less); FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); - LessStashed = true; + StateStack.push(LexerState::TOKEN_STASHED); } // Now FormatTok is the next non-whitespace token. Modified: cfe/trunk/lib/Format/FormatTokenLexer.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/FormatTokenLexer.h?rev=279727&r1=279726&r2=279727&view=diff ============================================================================== --- cfe/trunk/lib/Format/FormatTokenLexer.h (original) +++ cfe/trunk/lib/Format/FormatTokenLexer.h Thu Aug 25 05:13:21 2016 @@ -23,9 +23,17 @@ #include "clang/Format/Format.h" #include "llvm/Support/Regex.h" +#include <stack> + namespace clang { namespace format { +enum LexerState { + NORMAL, + TEMPLATE_STRING, + TOKEN_STASHED, +}; + class FormatTokenLexer { public: FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, @@ -53,7 +61,16 @@ private: // its text if successful. void tryParseJSRegexLiteral(); - void tryParseTemplateString(); + // Handles JavaScript template strings. + // + // JavaScript template strings use backticks ('`') as delimiters, and allow + // embedding expressions nested in ${expr-here}. Template strings can be + // nested recursively, i.e. expressions can contain template strings in turn. + // + // The code below parses starting from a backtick, up to a closing backtick or + // an opening ${. It also maintains a stack of lexing contexts to handle + // nested template parts by balancing curly braces. + void handleTemplateStrings(); bool tryMerge_TMacro(); @@ -65,7 +82,7 @@ private: FormatToken *FormatTok; bool IsFirstToken; - bool GreaterStashed, LessStashed; + std::stack<LexerState> StateStack; unsigned Column; unsigned TrailingWhitespace; std::unique_ptr<Lexer> Lex; Modified: cfe/trunk/lib/Format/TokenAnnotator.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/TokenAnnotator.cpp?rev=279727&r1=279726&r2=279727&view=diff ============================================================================== --- cfe/trunk/lib/Format/TokenAnnotator.cpp (original) +++ cfe/trunk/lib/Format/TokenAnnotator.cpp Thu Aug 25 05:13:21 2016 @@ -858,7 +858,7 @@ private: if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro, TT_FunctionLBrace, TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow, - TT_RegexLiteral)) + TT_RegexLiteral, TT_TemplateString)) CurrentToken->Type = TT_Unknown; CurrentToken->Role.reset(); CurrentToken->MatchingParen = nullptr; @@ -1816,6 +1816,9 @@ unsigned TokenAnnotator::splitPenalty(co return 100; if (Left.is(TT_JsTypeColon)) return 35; + if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) || + (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) + return 100; } if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next && @@ -2114,6 +2117,11 @@ bool TokenAnnotator::spaceRequiredBefore } else if (Style.Language == FormatStyle::LK_JavaScript) { if (Left.is(TT_JsFatArrow)) return true; + if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) || + (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) + return false; + if (Left.is(tok::identifier) && Right.is(TT_TemplateString)) + return false; if (Right.is(tok::star) && Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) return false; Modified: cfe/trunk/unittests/Format/FormatTestJS.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTestJS.cpp?rev=279727&r1=279726&r2=279727&view=diff ============================================================================== --- cfe/trunk/unittests/Format/FormatTestJS.cpp (original) +++ cfe/trunk/unittests/Format/FormatTestJS.cpp Thu Aug 25 05:13:21 2016 @@ -1122,7 +1122,7 @@ TEST_F(FormatTestJS, ImportWrapping) { TEST_F(FormatTestJS, TemplateStrings) { // Keeps any whitespace/indentation within the template string. verifyFormat("var x = `hello\n" - " ${ name }\n" + " ${name}\n" " !`;", "var x = `hello\n" " ${ name }\n" @@ -1206,6 +1206,18 @@ TEST_F(FormatTestJS, TemplateStrings) { "var y;", "var x = ` \\` a`;\n" "var y;"); + // Escaped dollar. + verifyFormat("var x = ` \\${foo}`;\n"); +} + +TEST_F(FormatTestJS, NestedTemplateStrings) { + verifyFormat( + "var x = `<ul>${xs.map(x => `<li>${x}</li>`).join('\\n')}</ul>`;"); + verifyFormat("var x = `he${({text: 'll'}.text)}o`;"); +} + +TEST_F(FormatTestJS, TaggedTemplateStrings) { + verifyFormat("var x = html`<ul>`;"); } TEST_F(FormatTestJS, CastSyntax) { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits