Re: [PATCH] D20208: clang-format: [JS] fix template string width counting.

Martin Probst via cfe-commits Thu, 12 May 2016 10:24:06 -0700

mprobst updated this revision to Diff 57066.
mprobst added a comment.

- simplify logic by parsing forward, reduce complexity to O(n) from O(n^2)



http://reviews.llvm.org/D20208

Files:
  lib/Format/Format.cpp
  unittests/Format/FormatTestJS.cpp

Index: unittests/Format/FormatTestJS.cpp
===================================================================
--- unittests/Format/FormatTestJS.cpp
+++ unittests/Format/FormatTestJS.cpp
@@ -1076,6 +1076,8 @@
                getGoogleJSStyleWithColumns(34)); // Barely doesn't fit.
   verifyFormat("var x = `hello ${world}` >= some();",
                getGoogleJSStyleWithColumns(35)); // Barely fits.
+  verifyFormat("var x = `hellö ${wörld}` >= söme();",
+               getGoogleJSStyleWithColumns(35)); // Fits due to UTF-8.
   verifyFormat("var x = `hello\n"
             "  ${world}` >=\n"
             "    some();",
@@ -1097,6 +1099,13 @@
                getGoogleJSStyleWithColumns(13));
   verifyFormat("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n"
                "    `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`);");
+  // Repro for an obscure width-miscounting issue with template strings.
+  verifyFormat(
+      "someLongVariable =\n"
+      "    "
+      "`${logPrefix[11]}/${logPrefix[12]}/${logPrefix[13]}${logPrefix[14]}`;",
+      "someLongVariable = "
+      "`${logPrefix[11]}/${logPrefix[12]}/${logPrefix[13]}${logPrefix[14]}`;");
 
   // Make sure template strings get a proper ColumnWidth assigned, even if they
   // are first token in line.
Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -807,8 +807,10 @@
     assert(FirstInLineIndex == 0);
     do {
       Tokens.push_back(getNextToken());
-      if (Style.Language == FormatStyle::LK_JavaScript)
+      if (Style.Language == FormatStyle::LK_JavaScript) {
         tryParseJSRegexLiteral();
+        tryParseTemplateString();
+      }
       tryMergePreviousTokens();
       if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
         FirstInLineIndex = Tokens.size() - 1;
@@ -828,9 +830,6 @@
       return;
 
     if (Style.Language == FormatStyle::LK_JavaScript) {
-      if (tryMergeTemplateString())
-        return;
-
       static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
       static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal,
                                                      tok::equal};
@@ -992,92 +991,42 @@
     resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
   }
 
-  bool tryMergeTemplateString() {
-    if (Tokens.size() < 2)
-      return false;
-
-    FormatToken *EndBacktick = Tokens.back();
-    // Backticks get lexed as tok::unknown tokens. If a template string contains
-    // a comment start, it gets lexed as a tok::comment, or tok::unknown if
-    // unterminated.
-    if (!EndBacktick->isOneOf(tok::comment, tok::string_literal,
-                              tok::char_constant, tok::unknown))
-      return false;
-    size_t CommentBacktickPos = EndBacktick->TokenText.find('`');
-    // Unknown token that's not actually a backtick, or a comment that doesn't
-    // contain a backtick.
-    if (CommentBacktickPos == StringRef::npos)
-      return false;
-
-    unsigned TokenCount = 0;
-    bool IsMultiline = false;
-    unsigned EndColumnInFirstLine =
-        EndBacktick->OriginalColumn + EndBacktick->ColumnWidth;
-    for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; I++) {
-      ++TokenCount;
-      if (I[0]->IsMultiline)
-        IsMultiline = true;
-
-      // If there was a preceding template string, this must be the start of a
-      // template string, not the end.
-      if (I[0]->is(TT_TemplateString))
-        return false;
+  void tryParseTemplateString() {
+    FormatToken *BacktickToken = Tokens.back();
+    if (!BacktickToken->is(tok::unknown) || BacktickToken->TokenText != "`")
+      return;
 
-      if (I[0]->isNot(tok::unknown) || I[0]->TokenText != "`") {
-        // Keep track of the rhs offset of the last token to wrap across lines -
-        // its the rhs offset of the first line of the template string, used to
-        // determine its width.
-        if (I[0]->IsMultiline)
-          EndColumnInFirstLine = I[0]->OriginalColumn + I[0]->ColumnWidth;
-        // If the token has newlines, the token before it (if it exists) is the
-        // rhs end of the previous line.
-        if (I[0]->NewlinesBefore > 0 && (I + 1 != E)) {
-          EndColumnInFirstLine = I[1]->OriginalColumn + I[1]->ColumnWidth;
-          IsMultiline = true;
-        }
-        continue;
-      }
+    // 'Manually' lex ahead in the current file buffer.
+    const char *Offset = Lex->getBufferLocation();
+    const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`"
+    for (; Offset != Lex->getBuffer().end() && *Offset != '`'; ++Offset) {
+      if (*Offset == '\\')
+        ++Offset; // Skip the escaped character.
+    }
 
-      Tokens.resize(Tokens.size() - TokenCount);
-      Tokens.back()->Type = TT_TemplateString;
-      const char *EndOffset =
-          EndBacktick->TokenText.data() + 1 + CommentBacktickPos;
-      if (CommentBacktickPos != 0) {
-        // If the backtick was not the first character (e.g. in a comment),
-        // re-lex after the backtick position.
-        SourceLocation Loc = EndBacktick->Tok.getLocation();
-        resetLexer(SourceMgr.getFileOffset(Loc) + CommentBacktickPos + 1);
-      }
-      Tokens.back()->TokenText =
-          StringRef(Tokens.back()->TokenText.data(),
-                    EndOffset - Tokens.back()->TokenText.data());
-
-      unsigned EndOriginalColumn = EndBacktick->OriginalColumn;
-      if (EndOriginalColumn == 0) {
-        SourceLocation Loc = EndBacktick->Tok.getLocation();
-        EndOriginalColumn = SourceMgr.getSpellingColumnNumber(Loc);
-      }
-      // If the ` is further down within the token (e.g. in a comment).
-      EndOriginalColumn += CommentBacktickPos;
-
-      if (IsMultiline) {
-        // ColumnWidth is from backtick to last token in line.
-        // LastLineColumnWidth is 0 to backtick.
-        // x = `some content
-        //     until here`;
-        Tokens.back()->ColumnWidth =
-            EndColumnInFirstLine - Tokens.back()->OriginalColumn;
-        // +1 for the ` itself.
-        Tokens.back()->LastLineColumnWidth = EndOriginalColumn + 1;
-        Tokens.back()->IsMultiline = true;
-      } else {
-        // Token simply spans from start to end, +1 for the ` itself.
-        Tokens.back()->ColumnWidth =
-            EndOriginalColumn - Tokens.back()->OriginalColumn + 1;
-      }
-      return true;
+    StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1);
+    BacktickToken->Type = TT_TemplateString;
+    BacktickToken->Tok.setKind(tok::string_literal);
+    BacktickToken->TokenText = LiteralText;
+
+    // Adjust width for potentially multiline string literals.
+    size_t FirstBreak = LiteralText.find('\n');
+    StringRef FirstLineText = FirstBreak == StringRef::npos
+                                  ? LiteralText
+                                  : LiteralText.substr(0, FirstBreak);
+    BacktickToken->ColumnWidth = encoding::columnWidthWithTabs(
+        FirstLineText, BacktickToken->OriginalColumn, Style.TabWidth, Encoding);
+    size_t LastBreak = LiteralText.rfind('\n');
+    if (LastBreak != StringRef::npos) {
+      BacktickToken->IsMultiline = true;
+      unsigned StartColumn = 0; // The template tail spans the entire line.
+      BacktickToken->LastLineColumnWidth = encoding::columnWidthWithTabs(
+          LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn,
+          Style.TabWidth, Encoding);
     }
-    return false;
+
+    resetLexer(
+        SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1)));
   }
 
   bool tryMerge_TMacro() {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Re: [PATCH] D20208: clang-format: [JS] fix template string width counting.

Reply via email to