Etch out the code path for MS-style inline assembly.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@158325 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Parse/ParseStmt.cpp b/lib/Parse/ParseStmt.cpp
index 6652bea..9f49ab5 100644
--- a/lib/Parse/ParseStmt.cpp
+++ b/lib/Parse/ParseStmt.cpp
@@ -20,6 +20,7 @@
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/PrettyStackTrace.h"
 #include "clang/Basic/SourceManager.h"
+#include "llvm/ADT/SmallString.h"
 using namespace clang;
 
 //===----------------------------------------------------------------------===//
@@ -1627,9 +1628,24 @@
 
 /// ParseMicrosoftAsmStatement. When -fms-extensions/-fasm-blocks is enabled,
 /// this routine is called to collect the tokens for an MS asm statement.
+///
+/// [MS]  ms-asm-statement:
+///         ms-asm-block
+///         ms-asm-block ms-asm-statement
+///
+/// [MS]  ms-asm-block:
+///         '__asm' ms-asm-line '\n'
+///         '__asm' '{' ms-asm-instruction-block[opt] '}' ';'[opt]
+///
+/// [MS]  ms-asm-instruction-block
+///         ms-asm-line
+///         ms-asm-line '\n' ms-asm-instruction-block
+///
 StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) {
   SourceManager &SrcMgr = PP.getSourceManager();
   SourceLocation EndLoc = AsmLoc;
+  SmallVector<Token, 4> AsmToks;
+  SmallVector<unsigned, 4> LineEnds;
   do {
     bool InBraces = false;
     unsigned short savedBraceCount = 0;
@@ -1658,9 +1674,6 @@
       // If we hit EOF, we're done, period.
       if (Tok.is(tok::eof))
         break;
-      // When we consume the closing brace, we're done.
-      if (InBraces && BraceCount == savedBraceCount)
-        break;
 
       if (!InAsmComment && Tok.is(tok::semi)) {
         // A semicolon in an asm is the start of a comment.
@@ -1690,8 +1703,15 @@
           // does MSVC do here?
           break;
         }
+      } else if (InBraces && Tok.is(tok::r_brace) &&
+                 BraceCount == savedBraceCount + 1) {
+        // Consume the closing brace, and finish
+        EndLoc = ConsumeBrace();
+        break;
       }
 
+      AsmToks.push_back(Tok);
+
       // Consume the next token; make sure we don't modify the brace count etc.
       // if we are in a comment.
       EndLoc = TokLoc;
@@ -1703,6 +1723,8 @@
       ++NumTokensRead;
     } while (1);
 
+    LineEnds.push_back(AsmToks.size());
+
     if (InBraces && BraceCount != savedBraceCount) {
       // __asm without closing brace (this can happen at EOF).
       Diag(Tok, diag::err_expected_rbrace);
@@ -1719,24 +1741,33 @@
       break;
     EndLoc = ConsumeToken();
   } while (1);
-  // FIXME: Need to actually grab the data and pass it on to Sema.  Ideally,
-  // what Sema wants is a string of the entire inline asm, with one instruction
-  // per line and all the __asm keywords stripped out, and a way of mapping
-  // from any character of that string to its location in the original source
-  // code. I'm not entirely sure how to go about that, though.
-  Token t;
-  t.setKind(tok::string_literal);
-  t.setLiteralData("\"/*FIXME: not done*/\"");
-  t.clearFlag(Token::NeedsCleaning);
-  t.setLength(21);
-  ExprResult AsmString(Actions.ActOnStringLiteral(&t, 1));
-  ExprVector Constraints(Actions);
-  ExprVector Exprs(Actions);
-  ExprVector Clobbers(Actions);
-  return Actions.ActOnAsmStmt(AsmLoc, true, true, 0, 0, 0,
-                              move_arg(Constraints), move_arg(Exprs),
-                              AsmString.take(), move_arg(Clobbers),
-                              EndLoc, true);
+
+  // Collect the tokens into a string
+  SmallString<512> Asm;
+  SmallString<512> TokenBuf;
+  TokenBuf.resize(512);
+  unsigned AsmLineNum = 0;
+  for (unsigned i = 0, e = AsmToks.size(); i < e; i++) {
+    const char *ThisTokBuf = &TokenBuf[0];
+    bool StringInvalid = false;
+    unsigned ThisTokLen = 
+      Lexer::getSpelling(AsmToks[i], ThisTokBuf, PP.getSourceManager(),
+                         PP.getLangOpts(), &StringInvalid);
+    if (i && (!AsmLineNum || i != LineEnds[AsmLineNum-1]))
+      Asm += ' '; // FIXME: Too much whitespace around punctuation
+    Asm += StringRef(ThisTokBuf, ThisTokLen);
+    if (i + 1 == LineEnds[AsmLineNum] && i + 1 != AsmToks.size()) {
+      Asm += '\n';
+      ++AsmLineNum;
+    }
+  }
+
+  // FIXME: We should be passing the tokens and source locations, rather than
+  // (or possibly in addition to the) AsmString.  Sema is going to interact with
+  // MC to determine Constraints, Clobbers, etc., which would be simplest to
+  // do with the tokens.
+  std::string AsmString = Asm.data();
+  return Actions.ActOnMSAsmStmt(AsmLoc, AsmString, EndLoc);
 }
 
 /// ParseAsmStatement - Parse a GNU extended asm statement.
@@ -1758,18 +1789,6 @@
 ///         asm-string-literal
 ///         asm-clobbers ',' asm-string-literal
 ///
-/// [MS]  ms-asm-statement:
-///         ms-asm-block
-///         ms-asm-block ms-asm-statement
-///
-/// [MS]  ms-asm-block:
-///         '__asm' ms-asm-line '\n'
-///         '__asm' '{' ms-asm-instruction-block[opt] '}' ';'[opt]
-///
-/// [MS]  ms-asm-instruction-block
-///         ms-asm-line
-///         ms-asm-line '\n' ms-asm-instruction-block
-///
 StmtResult Parser::ParseAsmStatement(bool &msAsm) {
   assert(Tok.is(tok::kw_asm) && "Not an asm stmt");
   SourceLocation AsmLoc = ConsumeToken();