Allow x86 mov instructions to/from memory with absolute address to be encoded and disassembled with a segment override prefix. Fixes PR16962.

llvm-svn: 199364
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 39e5855..aa857f0 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -923,19 +923,19 @@
   }
 
   bool isMemOffs8() const {
-    return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
+    return Kind == Memory && !getMemBaseReg() &&
       !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 8);
   }
   bool isMemOffs16() const {
-    return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
+    return Kind == Memory && !getMemBaseReg() &&
       !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 16);
   }
   bool isMemOffs32() const {
-    return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
+    return Kind == Memory && !getMemBaseReg() &&
       !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 32);
   }
   bool isMemOffs64() const {
-    return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
+    return Kind == Memory && !getMemBaseReg() &&
       !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 64);
   }
 
@@ -1015,12 +1015,13 @@
   }
 
   void addMemOffsOperands(MCInst &Inst, unsigned N) const {
-    assert((N == 1) && "Invalid number of operands!");
+    assert((N == 2) && "Invalid number of operands!");
     // Add as immediates when possible.
     if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
       Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
     else
       Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
+    Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
   }
 
   static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 240d8ce..5ded46d 100644
--- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -207,6 +207,16 @@
   Dis->tryAddingPcLoadReferenceComment(Value, Address);
 }
 
+static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
+  0,        // SEG_OVERRIDE_NONE
+  X86::CS,
+  X86::SS,
+  X86::DS,
+  X86::ES,
+  X86::FS,
+  X86::GS
+};
+
 /// translateImmediate  - Appends an immediate operand to an MCInst.
 ///
 /// @param mcInst       - The MCInst to append to.
@@ -315,6 +325,13 @@
                                insn.immediateOffset, insn.immediateSize,
                                mcInst, Dis))
     mcInst.addOperand(MCOperand::CreateImm(immediate));
+
+  if (type == TYPE_MOFFS8 || type == TYPE_MOFFS16 ||
+      type == TYPE_MOFFS32 || type == TYPE_MOFFS64) {
+    MCOperand segmentReg;
+    segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
+    mcInst.addOperand(segmentReg);
+  }
 }
 
 /// translateRMRegister - Translates a register stored in the R/M field of the
@@ -522,17 +539,7 @@
   }
   
   displacement = MCOperand::CreateImm(insn.displacement);
-  
-  static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
-    0,        // SEG_OVERRIDE_NONE
-    X86::CS,
-    X86::SS,
-    X86::DS,
-    X86::ES,
-    X86::FS,
-    X86::GS
-  };
-  
+
   segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
   
   mcInst.addOperand(baseReg);
diff --git a/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 8c2bb0f..11a9ada 100644
--- a/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -229,9 +229,16 @@
 void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
                                        raw_ostream &O) {
   const MCOperand &DispSpec = MI->getOperand(Op);
+  const MCOperand &SegReg = MI->getOperand(Op+1);
 
   O << markup("<mem:");
 
+  // If this has a segment register, print it.
+  if (SegReg.getReg()) {
+    printOperand(MI, Op+1, O);
+    O << ':';
+  }
+
   if (DispSpec.isImm()) {
     O << formatImm(DispSpec.getImm());
   } else {
diff --git a/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 7255d56..59634f9 100644
--- a/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -215,6 +215,13 @@
 void X86IntelInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
                                          raw_ostream &O) {
   const MCOperand &DispSpec = MI->getOperand(Op);
+  const MCOperand &SegReg   = MI->getOperand(Op+1);
+
+  // If this has a segment register, print it.
+  if (SegReg.getReg()) {
+    printOperand(MI, Op+1, O);
+    O << ':';
+  }
 
   O << '[';
 
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 404a7e8..b25a35e 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -255,6 +255,10 @@
     ///
     MRMSrcMem      = 6,
 
+    /// RawFrmMemOffs - This form is for instructions that store an absolute
+    /// memory offset as an immediate with a possible segment override.
+    RawFrmMemOffs  = 7,
+
     /// MRM[0-7][rm] - These forms are used to represent instructions that use
     /// a Mod/RM byte, and use the middle field to hold extended opcode
     /// information.  In the intel manual these are represented as /0, /1, ...
@@ -607,6 +611,7 @@
     case X86II::MRMSrcReg:
     case X86II::RawFrmImm8:
     case X86II::RawFrmImm16:
+    case X86II::RawFrmMemOffs:
        return -1;
     case X86II::MRMDestMem:
       return 0;
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 55a8932..a48f979 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -158,9 +158,8 @@
                            const MCInst &MI, const MCInstrDesc &Desc,
                            raw_ostream &OS) const;
 
-  void EmitSegmentOverridePrefix(uint64_t TSFlags, unsigned &CurByte,
-                                 int MemOperand, const MCInst &MI,
-                                 raw_ostream &OS) const;
+  void EmitSegmentOverridePrefix(unsigned &CurByte, unsigned SegOperand,
+                                 const MCInst &MI, raw_ostream &OS) const;
 
   void EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
                         const MCInst &MI, const MCInstrDesc &Desc,
@@ -977,7 +976,8 @@
   }
 
   // Emit segment override opcode prefix as needed.
-  EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS);
+  if (MemOperand >= 0)
+    EmitSegmentOverridePrefix(CurByte, MemOperand+X86::AddrSegmentReg, MI, OS);
 
   if (!HasEVEX) {
     // VEX opcode prefix can have 2 or 3 bytes
@@ -1134,15 +1134,12 @@
 }
 
 /// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed
-void X86MCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags,
-                                        unsigned &CurByte, int MemOperand,
-                                        const MCInst &MI,
-                                        raw_ostream &OS) const {
-  if (MemOperand < 0)
-    return; // No memory operand
-
+void X86MCCodeEmitter::EmitSegmentOverridePrefix(unsigned &CurByte,
+                                                 unsigned SegOperand,
+                                                 const MCInst &MI,
+                                                 raw_ostream &OS) const {
   // Check for explicit segment override on memory operand.
-  switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) {
+  switch (MI.getOperand(SegOperand).getReg()) {
   default: llvm_unreachable("Unknown segment register!");
   case 0: break;
   case X86::CS: EmitByte(0x2E, CurByte, OS); break;
@@ -1168,7 +1165,8 @@
     EmitByte(0xF0, CurByte, OS);
 
   // Emit segment override opcode prefix as needed.
-  EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS);
+  if (MemOperand >= 0)
+    EmitSegmentOverridePrefix(CurByte, MemOperand+X86::AddrSegmentReg, MI, OS);
 
   // Emit the repeat opcode prefix as needed.
   if ((TSFlags & X86II::Op0Mask) == X86II::REP)
@@ -1337,6 +1335,15 @@
   case X86II::RawFrm:
     EmitByte(BaseOpcode, CurByte, OS);
     break;
+  case X86II::RawFrmMemOffs:
+    // Emit segment override opcode prefix as needed.
+    EmitSegmentOverridePrefix(CurByte, 1, MI, OS);
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
+                  X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+                  CurByte, OS, Fixups);
+    ++CurOp; // skip segment operand
+    break;
   case X86II::RawFrmImm8:
     EmitByte(BaseOpcode, CurByte, OS);
     EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td
index 3e27176..eed3619 100644
--- a/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/llvm/lib/Target/X86/X86InstrFormats.td
@@ -21,7 +21,7 @@
 def Pseudo     : Format<0>; def RawFrm     : Format<1>;
 def AddRegFrm  : Format<2>; def MRMDestReg : Format<3>;
 def MRMDestMem : Format<4>; def MRMSrcReg  : Format<5>;
-def MRMSrcMem  : Format<6>;
+def MRMSrcMem  : Format<6>; def RawFrmMemOffs : Format<7>;
 def MRM0r  : Format<16>; def MRM1r  : Format<17>; def MRM2r  : Format<18>;
 def MRM3r  : Format<19>; def MRM4r  : Format<20>; def MRM5r  : Format<21>;
 def MRM6r  : Format<22>; def MRM7r  : Format<23>;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 1681ad7..1a5deab 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -467,17 +467,21 @@
 }
 
 let OperandType = "OPERAND_MEMORY" in {
-def offset8 : Operand<i64> {
+def offset8 : Operand<iPTR> {
   let ParserMatchClass = X86MemOffs8AsmOperand;
+  let MIOperandInfo = (ops i64imm, i8imm);
   let PrintMethod = "printMemOffs8"; }
-def offset16 : Operand<i64> {
+def offset16 : Operand<iPTR> {
   let ParserMatchClass = X86MemOffs16AsmOperand;
+  let MIOperandInfo = (ops i64imm, i8imm);
   let PrintMethod = "printMemOffs16"; }
-def offset32 : Operand<i64> {
+def offset32 : Operand<iPTR> {
   let ParserMatchClass = X86MemOffs32AsmOperand;
+  let MIOperandInfo = (ops i64imm, i8imm);
   let PrintMethod = "printMemOffs32"; }
-def offset64 : Operand<i64> {
+def offset64 : Operand<iPTR> {
   let ParserMatchClass = X86MemOffs64AsmOperand;
+  let MIOperandInfo = (ops i64imm, i8imm);
   let PrintMethod = "printMemOffs64"; }
 }
 
@@ -1146,44 +1150,44 @@
 /// 32-bit offset from the segment base. These are only valid in x86-32 mode.
 let SchedRW = [WriteALU] in {
 let mayLoad = 1 in {
-def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
+def MOV8o8a : Ii32 <0xA0, RawFrmMemOffs, (outs), (ins offset8:$src),
                    "mov{b}\t{$src, %al|al, $src}", [], IIC_MOV_MEM>,
                    Requires<[In32BitMode]>;
-def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src),
+def MOV16o16a : Ii32 <0xA1, RawFrmMemOffs, (outs), (ins offset16:$src),
                       "mov{w}\t{$src, %ax|ax, $src}", [], IIC_MOV_MEM>, OpSize,
                      Requires<[In32BitMode]>;
-def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src),
+def MOV32o32a : Ii32 <0xA1, RawFrmMemOffs, (outs), (ins offset32:$src),
                       "mov{l}\t{$src, %eax|eax, $src}", [], IIC_MOV_MEM>,
                       OpSize16, Requires<[In32BitMode]>;
 
-def MOV8o8a_16 : Ii16 <0xA0, RawFrm, (outs), (ins offset8:$src),
+def MOV8o8a_16 : Ii16 <0xA0, RawFrmMemOffs, (outs), (ins offset8:$src),
                    "mov{b}\t{$src, %al|al, $src}", [], IIC_MOV_MEM>,
                    AdSize, Requires<[In16BitMode]>;
-def MOV16o16a_16 : Ii16 <0xA1, RawFrm, (outs), (ins offset16:$src),
+def MOV16o16a_16 : Ii16 <0xA1, RawFrmMemOffs, (outs), (ins offset16:$src),
                       "mov{w}\t{$src, %ax|ax, $src}", [], IIC_MOV_MEM>, OpSize,
                      AdSize, Requires<[In16BitMode]>;
-def MOV32o32a_16 : Ii16 <0xA1, RawFrm, (outs), (ins offset32:$src),
+def MOV32o32a_16 : Ii16 <0xA1, RawFrmMemOffs, (outs), (ins offset32:$src),
                       "mov{l}\t{$src, %eax|eax, $src}", [], IIC_MOV_MEM>,
                       AdSize, OpSize16, Requires<[In16BitMode]>;
 }
 let mayStore = 1 in {
-def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins),
+def MOV8ao8 : Ii32 <0xA2, RawFrmMemOffs, (outs offset8:$dst), (ins),
                    "mov{b}\t{%al, $dst|$dst, al}", [], IIC_MOV_MEM>,
                   Requires<[In32BitMode]>;
-def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
+def MOV16ao16 : Ii32 <0xA3, RawFrmMemOffs, (outs offset16:$dst), (ins),
                       "mov{w}\t{%ax, $dst|$dst, ax}", [], IIC_MOV_MEM>, OpSize,
                      Requires<[In32BitMode]>;
-def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
+def MOV32ao32 : Ii32 <0xA3, RawFrmMemOffs, (outs offset32:$dst), (ins),
                       "mov{l}\t{%eax, $dst|$dst, eax}", [], IIC_MOV_MEM>,
                      OpSize16, Requires<[In32BitMode]>;
 
-def MOV8ao8_16 : Ii16 <0xA2, RawFrm, (outs offset8:$dst), (ins),
+def MOV8ao8_16 : Ii16 <0xA2, RawFrmMemOffs, (outs offset8:$dst), (ins),
                    "mov{b}\t{%al, $dst|$dst, al}", [], IIC_MOV_MEM>,
                   AdSize, Requires<[In16BitMode]>;
-def MOV16ao16_16 : Ii16 <0xA3, RawFrm, (outs offset16:$dst), (ins),
+def MOV16ao16_16 : Ii16 <0xA3, RawFrmMemOffs, (outs offset16:$dst), (ins),
                       "mov{w}\t{%ax, $dst|$dst, ax}", [], IIC_MOV_MEM>, OpSize,
                      AdSize, Requires<[In16BitMode]>;
-def MOV32ao32_16 : Ii16 <0xA3, RawFrm, (outs offset32:$dst), (ins),
+def MOV32ao32_16 : Ii16 <0xA3, RawFrmMemOffs, (outs offset32:$dst), (ins),
                       "mov{l}\t{%eax, $dst|$dst, eax}", [], IIC_MOV_MEM>,
                      OpSize16, AdSize, Requires<[In16BitMode]>;
 }
@@ -1192,31 +1196,31 @@
 // These forms all have full 64-bit absolute addresses in their instructions
 // and use the movabs mnemonic to indicate this specific form.
 let mayLoad = 1 in {
-def MOV64o8a : RIi64_NOREX<0xA0, RawFrm, (outs), (ins offset8:$src),
+def MOV64o8a : RIi64_NOREX<0xA0, RawFrmMemOffs, (outs), (ins offset8:$src),
                      "movabs{b}\t{$src, %al|al, $src}", []>,
                      Requires<[In64BitMode]>;
-def MOV64o16a : RIi64_NOREX<0xA1, RawFrm, (outs), (ins offset16:$src),
+def MOV64o16a : RIi64_NOREX<0xA1, RawFrmMemOffs, (outs), (ins offset16:$src),
                      "movabs{w}\t{$src, %ax|ax, $src}", []>, OpSize,
                      Requires<[In64BitMode]>;
-def MOV64o32a : RIi64_NOREX<0xA1, RawFrm, (outs), (ins offset32:$src),
+def MOV64o32a : RIi64_NOREX<0xA1, RawFrmMemOffs, (outs), (ins offset32:$src),
                      "movabs{l}\t{$src, %eax|eax, $src}", []>,
                      Requires<[In64BitMode]>;
-def MOV64o64a : RIi64<0xA1, RawFrm, (outs), (ins offset64:$src),
+def MOV64o64a : RIi64<0xA1, RawFrmMemOffs, (outs), (ins offset64:$src),
                      "movabs{q}\t{$src, %rax|rax, $src}", []>,
                      Requires<[In64BitMode]>;
 }
 
 let mayStore = 1 in {
-def MOV64ao8 : RIi64_NOREX<0xA2, RawFrm, (outs offset8:$dst), (ins),
+def MOV64ao8 : RIi64_NOREX<0xA2, RawFrmMemOffs, (outs offset8:$dst), (ins),
                      "movabs{b}\t{%al, $dst|$dst, al}", []>,
                      Requires<[In64BitMode]>;
-def MOV64ao16 : RIi64_NOREX<0xA3, RawFrm, (outs offset16:$dst), (ins),
+def MOV64ao16 : RIi64_NOREX<0xA3, RawFrmMemOffs, (outs offset16:$dst), (ins),
                      "movabs{w}\t{%ax, $dst|$dst, ax}", []>, OpSize,
                      Requires<[In64BitMode]>;
-def MOV64ao32 : RIi64_NOREX<0xA3, RawFrm, (outs offset32:$dst), (ins),
+def MOV64ao32 : RIi64_NOREX<0xA3, RawFrmMemOffs, (outs offset32:$dst), (ins),
                      "movabs{l}\t{%eax, $dst|$dst, eax}", []>,
                      Requires<[In64BitMode]>;
-def MOV64ao64 : RIi64<0xA3, RawFrm, (outs offset64:$dst), (ins),
+def MOV64ao64 : RIi64<0xA3, RawFrmMemOffs, (outs offset64:$dst), (ins),
                      "movabs{q}\t{%rax, $dst|$dst, rax}", []>,
                      Requires<[In64BitMode]>;
 }