Support explicit argument forms for the X86 string instructions.
For now, only the default segments are supported.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@127875 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 8fe549b..e0989b0 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -53,6 +53,14 @@
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands,
                                MCStreamer &Out);
 
+  /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
+  /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
+  bool isSrcOp(X86Operand &Op);
+
+  /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode
+  /// or %es:(%edi) in 32bit mode.
+  bool isDstOp(X86Operand &Op);
+
   /// @name Auto-generated Matcher Functions
   /// {
 
@@ -356,6 +364,24 @@
 
 } // end anonymous namespace.
 
+bool X86ATTAsmParser::isSrcOp(X86Operand &Op) {
+  unsigned basereg = Is64Bit ? X86::RSI : X86::ESI;
+
+  return (Op.isMem() &&
+    (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
+    isa<MCConstantExpr>(Op.Mem.Disp) &&
+    cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+    Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0);
+}
+
+bool X86ATTAsmParser::isDstOp(X86Operand &Op) {
+  unsigned basereg = Is64Bit ? X86::RDI : X86::EDI;
+
+  return Op.isMem() && Op.Mem.SegReg == X86::ES &&
+    isa<MCConstantExpr>(Op.Mem.Disp) &&
+    cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+    Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
+}
 
 bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
                                     SMLoc &StartLoc, SMLoc &EndLoc) {
@@ -788,7 +814,106 @@
       delete &Op;
     }
   }
-  
+  // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]"
+  if (Name.startswith("ins") && Operands.size() == 3 &&
+      (Name == "insb" || Name == "insw" || Name == "insl")) {
+    X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+    X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+    if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) {
+      Operands.pop_back();
+      Operands.pop_back();
+      delete &Op;
+      delete &Op2;
+    }
+  }
+
+  // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]"
+  if (Name.startswith("outs") && Operands.size() == 3 &&
+      (Name == "outsb" || Name == "outsw" || Name == "outsl")) {
+    X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+    X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+    if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) {
+      Operands.pop_back();
+      Operands.pop_back();
+      delete &Op;
+      delete &Op2;
+    }
+  }
+
+  // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
+  if (Name.startswith("movs") && Operands.size() == 3 &&
+      (Name == "movsb" || Name == "movsw" || Name == "movsl" ||
+       (Is64Bit && Name == "movsq"))) {
+    X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+    X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+    if (isSrcOp(Op) && isDstOp(Op2)) {
+      Operands.pop_back();
+      Operands.pop_back();
+      delete &Op;
+      delete &Op2;
+    }
+  }
+  // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
+  if (Name.startswith("lods") && Operands.size() == 3 &&
+      (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
+       Name == "lodsl" || (Is64Bit && Name == "lodsq"))) {
+    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+    X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
+    if (isSrcOp(*Op1) && Op2->isReg()) {
+      const char *ins;
+      unsigned reg = Op2->getReg();
+      bool isLods = Name == "lods";
+      if (reg == X86::AL && (isLods || Name == "lodsb"))
+        ins = "lodsb";
+      else if (reg == X86::AX && (isLods || Name == "lodsw"))
+        ins = "lodsw";
+      else if (reg == X86::EAX && (isLods || Name == "lodsl"))
+        ins = "lodsl";
+      else if (reg == X86::RAX && (isLods || Name == "lodsq"))
+        ins = "lodsq";
+      else
+        ins = NULL;
+      if (ins != NULL) {
+        Operands.pop_back();
+        Operands.pop_back();
+        delete Op1;
+        delete Op2;
+        if (Name != ins)
+          static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
+      }
+    }
+  }
+  // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
+  if (Name.startswith("stos") && Operands.size() == 3 &&
+      (Name == "stos" || Name == "stosb" || Name == "stosw" ||
+       Name == "stosl" || (Is64Bit && Name == "stosq"))) {
+    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+    X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
+    if (isDstOp(*Op2) && Op1->isReg()) {
+      const char *ins;
+      unsigned reg = Op1->getReg();
+      bool isStos = Name == "stos";
+      if (reg == X86::AL && (isStos || Name == "stosb"))
+        ins = "stosb";
+      else if (reg == X86::AX && (isStos || Name == "stosw"))
+        ins = "stosw";
+      else if (reg == X86::EAX && (isStos || Name == "stosl"))
+        ins = "stosl";
+      else if (reg == X86::RAX && (isStos || Name == "stosq"))
+        ins = "stosq";
+      else
+        ins = NULL;
+      if (ins != NULL) {
+        Operands.pop_back();
+        Operands.pop_back();
+        delete Op1;
+        delete Op2;
+        if (Name != ins)
+          static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
+      }
+    }
+  }
+
   // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>.  Canonicalize to
   // "shift <op>".
   if ((Name.startswith("shr") || Name.startswith("sar") ||