[x86/MIR] Implement support for pre- and post-instruction symbols, as
well as MIR parsing support for `MCSymbol` `MachineOperand`s.

The only real way to test pre- and post-instruction symbol support is to
use them in operands, so I ended up implementing that within the patch
as well. I can split out the operand support if folks really want but it
doesn't really seem worth it.

The functional implementation of pre- and post-instruction symbols is
now *completely trivial*. Two tiny bits of code in the (misnamed)
AsmPrinter. It should be completely target independent as well. We emit
these exactly the same way as we emit basic block labels. Most of the
code here is to give full dumping, MIR printing, and MIR parsing support
so that we can write useful tests.

The MIR parsing of MC symbol operands still isn't 100%, as it forces the
symbols to be non-temporary and non-local symbols with names. However,
those names often can encode most (if not all) of the special semantics
desired, and unnamed symbols seem especially annoying to serialize and
de-serialize. While this isn't perfect or full support, it seems plenty
to write tests that exercise usage of these kinds of operands.

The MIR support for pre-and post-instruction symbols was quite
straightforward. I chose to print them out in an as-if-operand syntax
similar to debug locations as this seemed the cleanest way and let me
use nice introducer tokens rather than inventing more magic punctuation
like we use for memoperands.

However, supporting MIR-based parsing of these symbols caused me to
change the design of the symbol support to allow setting arbitrary
symbols. Without this, I don't see any reasonable way to test things
with MIR.

Differential Revision: https://reviews.llvm.org/D50833

llvm-svn: 339962
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 0271e21..3fe5a0b 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1066,6 +1066,10 @@
         ++NumInstsInFunction;
       }
 
+      // If there is a pre-instruction symbol, emit a label for it here.
+      if (MCSymbol *S = MI.getPreInstrSymbol())
+        OutStreamer->EmitLabel(S);
+
       if (ShouldPrintDebugScopes) {
         for (const HandlerInfo &HI : Handlers) {
           NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
@@ -1117,6 +1121,10 @@
         break;
       }
 
+      // If there is a post-instruction symbol, emit a label for it here.
+      if (MCSymbol *S = MI.getPostInstrSymbol())
+        OutStreamer->EmitLabel(S);
+
       if (ShouldPrintDebugScopes) {
         for (const HandlerInfo &HI : Handlers) {
           NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index da05c9a..400e098 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -245,6 +245,8 @@
       .Case("successors", MIToken::kw_successors)
       .Case("floatpred", MIToken::kw_floatpred)
       .Case("intpred", MIToken::kw_intpred)
+      .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol)
+      .Case("post-instr-symbol", MIToken::kw_post_instr_symbol)
       .Default(MIToken::Identifier);
 }
 
@@ -460,6 +462,53 @@
                  ErrorCallback);
 }
 
+static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token,
+                               ErrorCallbackType ErrorCallback) {
+  const StringRef Rule = "<mcsymbol ";
+  if (!C.remaining().startswith(Rule))
+    return None;
+  auto Start = C;
+  C.advance(Rule.size());
+
+  // Try a simple unquoted name.
+  if (C.peek() != '"') {
+    while (isIdentifierChar(C.peek()))
+      C.advance();
+    StringRef String = Start.upto(C).drop_front(Rule.size());
+    if (C.peek() != '>') {
+      ErrorCallback(C.location(),
+                    "expected the '<mcsymbol ...' to be closed by a '>'");
+      Token.reset(MIToken::Error, Start.remaining());
+      return Start;
+    }
+    C.advance();
+
+    Token.reset(MIToken::MCSymbol, Start.upto(C)).setStringValue(String);
+    return C;
+  }
+
+  // Otherwise lex out a quoted name.
+  Cursor R = lexStringConstant(C, ErrorCallback);
+  if (!R) {
+    ErrorCallback(C.location(),
+                  "unable to parse quoted string from opening quote");
+    Token.reset(MIToken::Error, Start.remaining());
+    return Start;
+  }
+  StringRef String = Start.upto(R).drop_front(Rule.size());
+  if (R.peek() != '>') {
+    ErrorCallback(R.location(),
+                  "expected the '<mcsymbol ...' to be closed by a '>'");
+    Token.reset(MIToken::Error, Start.remaining());
+    return Start;
+  }
+  R.advance();
+
+  Token.reset(MIToken::MCSymbol, Start.upto(R))
+      .setOwnedStringValue(unescapeQuotedString(String));
+  return R;
+}
+
 static bool isValidHexFloatingPointPrefix(char C) {
   return C == 'H' || C == 'K' || C == 'L' || C == 'M';
 }
@@ -657,6 +706,8 @@
     return R.remaining();
   if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback))
     return R.remaining();
+  if (Cursor R = maybeLexMCSymbol(C, Token, ErrorCallback))
+    return R.remaining();
   if (Cursor R = maybeLexHexadecimalLiteral(C, Token))
     return R.remaining();
   if (Cursor R = maybeLexNumericalLiteral(C, Token))
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h
index e21c715..94b460e 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -113,6 +113,8 @@
     kw_successors,
     kw_floatpred,
     kw_intpred,
+    kw_pre_instr_symbol,
+    kw_post_instr_symbol,
 
     // Named metadata keywords
     md_tbaa,
@@ -132,6 +134,7 @@
     NamedGlobalValue,
     GlobalValue,
     ExternalSymbol,
+    MCSymbol,
 
     // Other tokens
     IntegerLiteral,
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 86d4a38..956f6ec 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -54,6 +54,7 @@
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCRegisterInfo.h"
@@ -221,6 +222,7 @@
   bool parseSubRegisterIndexOperand(MachineOperand &Dest);
   bool parseJumpTableIndexOperand(MachineOperand &Dest);
   bool parseExternalSymbolOperand(MachineOperand &Dest);
+  bool parseMCSymbolOperand(MachineOperand &Dest);
   bool parseMDNode(MDNode *&Node);
   bool parseDIExpression(MDNode *&Expr);
   bool parseMetadataOperand(MachineOperand &Dest);
@@ -250,6 +252,7 @@
   bool parseOptionalScope(LLVMContext &Context, SyncScope::ID &SSID);
   bool parseOptionalAtomicOrdering(AtomicOrdering &Order);
   bool parseMachineMemoryOperand(MachineMemOperand *&Dest);
+  bool parsePreOrPostInstrSymbol(MCSymbol *&Symbol);
 
 private:
   /// Convert the integer literal in the current token into an unsigned integer.
@@ -346,6 +349,9 @@
   /// Return true if the name isn't a name of a target MMO flag.
   bool getMMOTargetFlag(StringRef Name, MachineMemOperand::Flags &Flag);
 
+  /// Get or create an MCSymbol for a given name.
+  MCSymbol *getOrCreateMCSymbol(StringRef Name);
+
   /// parseStringConstant
   ///   ::= StringConstant
   bool parseStringConstant(std::string &Result);
@@ -737,7 +743,9 @@
     return true;
 
   // Parse the remaining machine operands.
-  while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_debug_location) &&
+  while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_pre_instr_symbol) &&
+         Token.isNot(MIToken::kw_post_instr_symbol) &&
+         Token.isNot(MIToken::kw_debug_location) &&
          Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) {
     auto Loc = Token.location();
     Optional<unsigned> TiedDefIdx;
@@ -753,6 +761,15 @@
     lex();
   }
 
+  MCSymbol *PreInstrSymbol = nullptr;
+  if (Token.is(MIToken::kw_pre_instr_symbol))
+    if (parsePreOrPostInstrSymbol(PreInstrSymbol))
+      return true;
+  MCSymbol *PostInstrSymbol = nullptr;
+  if (Token.is(MIToken::kw_post_instr_symbol))
+    if (parsePreOrPostInstrSymbol(PostInstrSymbol))
+      return true;
+
   DebugLoc DebugLocation;
   if (Token.is(MIToken::kw_debug_location)) {
     lex();
@@ -795,9 +812,12 @@
     MI->addOperand(MF, Operand.Operand);
   if (assignRegisterTies(*MI, Operands))
     return true;
-  if (MemOperands.empty())
-    return false;
-  MI->setMemRefs(MF, MemOperands);
+  if (PreInstrSymbol)
+    MI->setPreInstrSymbol(MF, PreInstrSymbol);
+  if (PostInstrSymbol)
+    MI->setPostInstrSymbol(MF, PostInstrSymbol);
+  if (!MemOperands.empty())
+    MI->setMemRefs(MF, MemOperands);
   return false;
 }
 
@@ -1570,6 +1590,16 @@
   return false;
 }
 
+bool MIParser::parseMCSymbolOperand(MachineOperand &Dest) {
+  assert(Token.is(MIToken::MCSymbol));
+  MCSymbol *Symbol = getOrCreateMCSymbol(Token.stringValue());
+  lex();
+  Dest = MachineOperand::CreateMCSymbol(Symbol);
+  if (parseOperandsOffset(Dest))
+    return true;
+  return false;
+}
+
 bool MIParser::parseSubRegisterIndexOperand(MachineOperand &Dest) {
   assert(Token.is(MIToken::SubRegisterIndex));
   StringRef Name = Token.stringValue();
@@ -2047,6 +2077,8 @@
     return parseJumpTableIndexOperand(Dest);
   case MIToken::ExternalSymbol:
     return parseExternalSymbolOperand(Dest);
+  case MIToken::MCSymbol:
+    return parseMCSymbolOperand(Dest);
   case MIToken::SubRegisterIndex:
     return parseSubRegisterIndexOperand(Dest);
   case MIToken::md_diexpr:
@@ -2526,6 +2558,24 @@
   return false;
 }
 
+bool MIParser::parsePreOrPostInstrSymbol(MCSymbol *&Symbol) {
+  assert((Token.is(MIToken::kw_pre_instr_symbol) ||
+          Token.is(MIToken::kw_post_instr_symbol)) &&
+         "Invalid token for a pre- post-instruction symbol!");
+  lex();
+  if (Token.isNot(MIToken::MCSymbol))
+    return error("expected a symbol after 'pre-instr-symbol'");
+  Symbol = getOrCreateMCSymbol(Token.stringValue());
+  lex();
+  if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
+      Token.is(MIToken::lbrace))
+    return false;
+  if (Token.isNot(MIToken::comma))
+    return error("expected ',' before the next machine operand");
+  lex();
+  return false;
+}
+
 void MIParser::initNames2InstrOpCodes() {
   if (!Names2InstrOpCodes.empty())
     return;
@@ -2756,6 +2806,15 @@
   return false;
 }
 
+MCSymbol *MIParser::getOrCreateMCSymbol(StringRef Name) {
+  // FIXME: Currently we can't recognize temporary or local symbols and call all
+  // of the appropriate forms to create them. However, this handles basic cases
+  // well as most of the special aspects are recognized by a prefix on their
+  // name, and the input names should already be unique. For test cases, keeping
+  // the symbol name out of the symbol table isn't terribly important.
+  return MF.getContext().getOrCreateSymbol(Name);
+}
+
 bool MIParser::parseStringConstant(std::string &Result) {
   if (Token.isNot(MIToken::StringConstant))
     return error("expected string constant");
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index bf8cd14..67c5463 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -50,6 +50,7 @@
 #include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/IR/Value.h"
 #include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/AtomicOrdering.h"
@@ -708,6 +709,23 @@
     NeedComma = true;
   }
 
+  // Print any optional symbols attached to this instruction as-if they were
+  // operands.
+  if (MCSymbol *PreInstrSymbol = MI.getPreInstrSymbol()) {
+    if (NeedComma)
+      OS << ',';
+    OS << " pre-instr-symbol ";
+    MachineOperand::printSymbol(OS, *PreInstrSymbol);
+    NeedComma = true;
+  }
+  if (MCSymbol *PostInstrSymbol = MI.getPostInstrSymbol()) {
+    if (NeedComma)
+      OS << ',';
+    OS << " post-instr-symbol ";
+    MachineOperand::printSymbol(OS, *PostInstrSymbol);
+    NeedComma = true;
+  }
+
   if (const DebugLoc &DL = MI.getDebugLoc()) {
     if (NeedComma)
       OS << ',';
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index fb1d392..9503537 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -447,45 +447,68 @@
   setMemRefs(MF, MergedMMOs);
 }
 
-MCSymbol *MachineInstr::getOrCreatePreInstrTempSymbol(MCContext &MCCtx) {
-  MCSymbol *S = getPreInstrSymbol();
-  if (S)
-    return S;
+void MachineInstr::setPreInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
+  MCSymbol *OldSymbol = getPreInstrSymbol();
+  if (OldSymbol == Symbol)
+    return;
+  if (OldSymbol && !Symbol) {
+    // We're removing a symbol rather than adding one. Try to clean up any
+    // extra info carried around.
+    if (Info.is<EIIK_PreInstrSymbol>()) {
+      Info.clear();
+      return;
+    }
 
-  // Create a new temp symbol.
-  S = MCCtx.createTempSymbol();
+    if (memoperands_empty()) {
+      assert(getPostInstrSymbol() &&
+             "Should never have only a single symbol allocated out-of-line!");
+      Info.set<EIIK_PostInstrSymbol>(getPostInstrSymbol());
+      return;
+    }
 
-  if (!Info) {
+    // Otherwise fallback on the generic update.
+  } else if (!Info || Info.is<EIIK_PreInstrSymbol>()) {
     // If we don't have any other extra info, we can store this inline.
-    Info.set<EIIK_PreInstrSymbol>(S);
-    return S;
+    Info.set<EIIK_PreInstrSymbol>(Symbol);
+    return;
   }
 
-  // Otherwise, allocate a fully set of extra info.
+  // Otherwise, allocate a full new set of extra info.
+  // FIXME: Maybe we should make the symbols in the extra info mutable?
   Info.set<EIIK_OutOfLine>(
-      getMF()->createMIExtraInfo(memoperands(), S, getPostInstrSymbol()));
-
-  return S;
+      MF.createMIExtraInfo(memoperands(), Symbol, getPostInstrSymbol()));
 }
 
-MCSymbol *MachineInstr::getOrCreatePostInstrTempSymbol(MCContext &MCCtx) {
-  MCSymbol *S = getPostInstrSymbol();
-  if (S)
-    return S;
+void MachineInstr::setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
+  MCSymbol *OldSymbol = getPostInstrSymbol();
+  if (OldSymbol == Symbol)
+    return;
+  if (OldSymbol && !Symbol) {
+    // We're removing a symbol rather than adding one. Try to clean up any
+    // extra info carried around.
+    if (Info.is<EIIK_PostInstrSymbol>()) {
+      Info.clear();
+      return;
+    }
 
-  // Create a new temp symbol.
-  S = MCCtx.createTempSymbol();
+    if (memoperands_empty()) {
+      assert(getPreInstrSymbol() &&
+             "Should never have only a single symbol allocated out-of-line!");
+      Info.set<EIIK_PreInstrSymbol>(getPreInstrSymbol());
+      return;
+    }
 
-  if (!Info) {
+    // Otherwise fallback on the generic update.
+  } else if (!Info || Info.is<EIIK_PostInstrSymbol>()) {
     // If we don't have any other extra info, we can store this inline.
-    Info.set<EIIK_PostInstrSymbol>(S);
-    return S;
+    Info.set<EIIK_PostInstrSymbol>(Symbol);
+    return;
   }
 
-  // Otherwise, allocate a fully set of extra info.
+  // Otherwise, allocate a full new set of extra info.
+  // FIXME: Maybe we should make the symbols in the extra info mutable?
   Info.set<EIIK_OutOfLine>(
-      getMF()->createMIExtraInfo(memoperands(), getPreInstrSymbol(), S));
-  return S;
+      MF.createMIExtraInfo(memoperands(), getPreInstrSymbol(), Symbol));
 }
 
 uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
@@ -1592,6 +1615,25 @@
     }
   }
 
+  // Print any optional symbols attached to this instruction as-if they were
+  // operands.
+  if (MCSymbol *PreInstrSymbol = getPreInstrSymbol()) {
+    if (!FirstOp) {
+      FirstOp = false;
+      OS << ',';
+    }
+    OS << " pre-instr-symbol ";
+    MachineOperand::printSymbol(OS, *PreInstrSymbol);
+  }
+  if (MCSymbol *PostInstrSymbol = getPostInstrSymbol()) {
+    if (!FirstOp) {
+      FirstOp = false;
+      OS << ',';
+    }
+    OS << " post-instr-symbol ";
+    MachineOperand::printSymbol(OS, *PostInstrSymbol);
+  }
+
   if (!SkipDebugLoc) {
     if (const DebugLoc &DL = getDebugLoc()) {
       if (!FirstOp)