Re-commit r192758 - MC: quote tricky symbol names in asm output

The reason this got reverted was that the @feat.00 symbol which was emitted
for every TU became quoted, and on cygwin/mingw we use the gas assembler which
couldn't handle the quotes.

This commit fixes the problem by only emitting @feat.00 for win32, where we use
clang -cc1as to assemble. gas would just drop this symbol anyway, so there is no
loss there.

With @feat.00 gone, there shouldn't be quoted symbols showing up on cygwin since
it uses the Itanium ABI, which doesn't put these funny characters in symbols.

> Because of win32 mangling, we produce symbol and section names with
> funny characters in them, most notably @ characters.
>
> MC would choke on trying to parse its own assembly output. This patch addresses
> that by:
>
> - Making @ trigger quoting of symbol names
> - Also quote section names in the same way
> - Just parse section names like other identifiers (to allow for quotes)
> - Don't assume @ signifies a symbol variant if it is in a string.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192859 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 9a36256..75f5c78 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -792,19 +792,25 @@
     EndLoc = SMLoc::getFromPointer(Identifier.end());
 
     // This is a symbol reference.
-    std::pair<StringRef, StringRef> Split = Identifier.split('@');
-    MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first);
-
-    // Lookup the symbol variant if used.
+    StringRef SymbolName = Identifier;
     MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
-    if (Split.first.size() != Identifier.size()) {
-      Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
+    std::pair<StringRef, StringRef> Split = Identifier.split('@');
+
+    if (Split.first.size() != Identifier.size() &&
+        FirstTokenKind != AsmToken::String) {
+      SymbolName = Split.first;
+      StringRef VariantName = Split.second;
+
+      // Lookup the symbol variant.
+      Variant = MCSymbolRefExpr::getVariantKindForName(VariantName);
       if (Variant == MCSymbolRefExpr::VK_Invalid) {
         Variant = MCSymbolRefExpr::VK_None;
-        return TokError("invalid variant '" + Split.second + "'");
+        return TokError("invalid variant '" + VariantName + "'");
       }
     }
 
+    MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName);
+
     // If this is an absolute variable reference, substitute it now to preserve
     // semantics in the face of reassignment.
     if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) {
diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp
index df1794c..b3c0943 100644
--- a/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/lib/MC/MCParser/COFFAsmParser.cpp
@@ -295,12 +295,7 @@
 }
 
 bool COFFAsmParser::ParseSectionName(StringRef &SectionName) {
-  if (!getLexer().is(AsmToken::Identifier))
-    return true;
-
-  SectionName = getTok().getIdentifier();
-  Lex();
-  return false;
+  return getParser().parseIdentifier(SectionName);
 }
 
 // .section name [, "flags"]
diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp
index 64aa2c5..a8f5db0 100644
--- a/lib/MC/MCSectionCOFF.cpp
+++ b/lib/MC/MCSectionCOFF.cpp
@@ -39,6 +39,22 @@
   Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
 }
 
+static bool isAcceptableSectionNameChar(char C) {
+  return (C >= 'a' && C <= 'z') ||
+         (C >= 'A' && C <= 'Z') ||
+         (C >= '0' && C <= '9') ||
+         C == '_' || C == '$' || C == '.';
+}
+
+/// NameNeedsQuoting - Return true if the identifier \p Str needs quotes to be
+/// syntactically correct.
+static bool sectionNameNeedsQuoting(StringRef Name) {
+  for (unsigned i = 0, e = Name.size(); i != e; ++i)
+    if (!isAcceptableSectionNameChar(Name[i]))
+      return true;
+  return false;
+}
+
 void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
                                          raw_ostream &OS,
                                          const MCExpr *Subsection) const {
@@ -49,7 +65,10 @@
     return;
   }
 
-  OS << "\t.section\t" << getSectionName() << ",\"";
+  if (sectionNameNeedsQuoting(getSectionName()))
+    OS << "\t.section\t" << '"' << getSectionName() << '"' << ",\"";
+  else
+    OS << "\t.section\t" << getSectionName() << ",\"";
   if (getKind().isText())
     OS << 'x';
   if (getKind().isWriteable())
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index b973c57..f386c3b 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -18,12 +18,10 @@
   reinterpret_cast<const MCSection *>(1);
 
 static bool isAcceptableChar(char C) {
-  if ((C < 'a' || C > 'z') &&
-      (C < 'A' || C > 'Z') &&
-      (C < '0' || C > '9') &&
-      C != '_' && C != '$' && C != '.' && C != '@')
-    return false;
-  return true;
+  return (C >= 'a' && C <= 'z') ||
+         (C >= 'A' && C <= 'Z') ||
+         (C >= '0' && C <= '9') ||
+         C == '_' || C == '$' || C == '.';
 }
 
 /// NameNeedsQuoting - Return true if the identifier \p Str needs quotes to be