clang-format: [JS] nested and tagged template strings. JavaScript template strings can be nested arbitrarily: foo = `text ${es.map(e => { return `<${e}>`; })} text`; This change lexes nested template strings using a stack of lexer states to correctly switch back to template string lexing on closing braces. Also, reuse the same stack for the token-stashed logic. Reviewers: djasper Subscribers: cfe-commits, klimek Differential Revision: https://reviews.llvm.org/D22431 llvm-svn: 279727

commit: 6181da4796ef8c095e3e5250413ff7e82878cba4 [log] [tgz]
author: Martin Probst <martin@probst.io> Thu Aug 25 10:13:21 2016 +0000
committer: Martin Probst <martin@probst.io> Thu Aug 25 10:13:21 2016 +0000
tree: d15bbe889e7006f431ffc90df9825ee54d81319a
parent: 86ce267a4ace2ec170653611099786324808d68e [diff] [blame]
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 9778f84..8c795fb 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp

@@ -26,12 +26,11 @@
 FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
                                    const FormatStyle &Style,
                                    encoding::Encoding Encoding)
-    : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
-      LessStashed(false), Column(0), TrailingWhitespace(0),
-      SourceMgr(SourceMgr), ID(ID), Style(Style),
-      IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
-      Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
-      MacroBlockBeginRegex(Style.MacroBlockBegin),
+    : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}),
+      Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),
+      Style(Style), IdentTable(getFormattingLangOpts(Style)),
+      Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0),
+      FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
       MacroBlockEndRegex(Style.MacroBlockEnd) {
   Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
                       getFormattingLangOpts(Style)));
@@ -49,7 +48,7 @@
     Tokens.push_back(getNextToken());
     if (Style.Language == FormatStyle::LK_JavaScript) {
       tryParseJSRegexLiteral();
-      tryParseTemplateString();
+      handleTemplateStrings();
     }
     tryMergePreviousTokens();
     if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
@@ -228,17 +227,42 @@
   resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
 }
 
-void FormatTokenLexer::tryParseTemplateString() {
+void FormatTokenLexer::handleTemplateStrings() {
   FormatToken *BacktickToken = Tokens.back();
-  if (!BacktickToken->is(tok::unknown) || BacktickToken->TokenText != "`")
+
+  if (BacktickToken->is(tok::l_brace)) {
+    StateStack.push(LexerState::NORMAL);
     return;
+  }
+  if (BacktickToken->is(tok::r_brace)) {
+    StateStack.pop();
+    if (StateStack.top() != LexerState::TEMPLATE_STRING)
+      return;
+    // If back in TEMPLATE_STRING, fallthrough and continue parsing the
+  } else if (BacktickToken->is(tok::unknown) &&
+             BacktickToken->TokenText == "`") {
+    StateStack.push(LexerState::TEMPLATE_STRING);
+  } else {
+    return; // Not actually a template
+  }
 
   // 'Manually' lex ahead in the current file buffer.
   const char *Offset = Lex->getBufferLocation();
   const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`"
-  for (; Offset != Lex->getBuffer().end() && *Offset != '`'; ++Offset) {
-    if (*Offset == '\\')
+  for (; Offset != Lex->getBuffer().end(); ++Offset) {
+    if (Offset[0] == '`') {
+      StateStack.pop();
+      break;
+    }
+    if (Offset[0] == '\\') {
       ++Offset; // Skip the escaped character.
+    } else if (Offset + 1 < Lex->getBuffer().end() && Offset[0] == '$' &&
+               Offset[1] == '{') {
+      // '${' introduces an expression interpolation in the template string.
+      StateStack.push(LexerState::NORMAL);
+      ++Offset;
+      break;
+    }
   }
 
   StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1);
@@ -262,7 +286,10 @@
         Style.TabWidth, Encoding);
   }
 
-  resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1)));
+  SourceLocation loc = Offset < Lex->getBuffer().end()
+                           ? Lex->getSourceLocation(Offset + 1)
+                           : SourceMgr.getLocForEndOfFile(ID);
+  resetLexer(SourceMgr.getFileOffset(loc));
 }
 
 bool FormatTokenLexer::tryMerge_TMacro() {
@@ -384,12 +411,8 @@
 }
 
 FormatToken *FormatTokenLexer::getNextToken() {
-  if (GreaterStashed) {
-    GreaterStashed = false;
-    return getStashedToken();
-  }
-  if (LessStashed) {
-    LessStashed = false;
+  if (StateStack.top() == LexerState::TOKEN_STASHED) {
+    StateStack.pop();
     return getStashedToken();
   }
 
@@ -500,11 +523,11 @@
   } else if (FormatTok->Tok.is(tok::greatergreater)) {
     FormatTok->Tok.setKind(tok::greater);
     FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
-    GreaterStashed = true;
+    StateStack.push(LexerState::TOKEN_STASHED);
   } else if (FormatTok->Tok.is(tok::lessless)) {
     FormatTok->Tok.setKind(tok::less);
     FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
-    LessStashed = true;
+    StateStack.push(LexerState::TOKEN_STASHED);
   }
 
   // Now FormatTok is the next non-whitespace token.
commit	6181da4796ef8c095e3e5250413ff7e82878cba4	[log] [tgz]
author	Martin Probst <martin@probst.io>	Thu Aug 25 10:13:21 2016 +0000
committer	Martin Probst <martin@probst.io>	Thu Aug 25 10:13:21 2016 +0000
tree	d15bbe889e7006f431ffc90df9825ee54d81319a
parent	86ce267a4ace2ec170653611099786324808d68e [diff] [blame]