Method prologue and epilogues, add missing x86 functionality.

Enables compiling and running a number of JNI internal managed code
methods on the host.

Change-Id: I56fceb813d0cb24637bc784ba57f2d1d16911d48
diff --git a/src/assembler.h b/src/assembler.h
index ae29a24..af9f94d 100644
--- a/src/assembler.h
+++ b/src/assembler.h
@@ -318,7 +318,8 @@
 
   // Emit code that will create an activation on the stack
   virtual void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                          const std::vector<ManagedRegister>& callee_save_regs) = 0;
+                          const std::vector<ManagedRegister>& callee_save_regs,
+                          const std::vector<ManagedRegister>& entry_spills) = 0;
 
   // Emit code that will remove an activation from the stack
   virtual void RemoveFrame(size_t frame_size,
@@ -364,7 +365,7 @@
                                     ThreadOffset offs) = 0;
 
   // Copying routines
-  virtual void Move(ManagedRegister dest, ManagedRegister src) = 0;
+  virtual void Move(ManagedRegister dest, ManagedRegister src, size_t size) = 0;
 
   virtual void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset thr_offs,
                                     ManagedRegister scratch) = 0;
diff --git a/src/assembler_arm.cc b/src/assembler_arm.cc
index d452a46..e25e656 100644
--- a/src/assembler_arm.cc
+++ b/src/assembler_arm.cc
@@ -1436,8 +1436,10 @@
 }
 
 void ArmAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                              const std::vector<ManagedRegister>& callee_save_regs) {
+                              const std::vector<ManagedRegister>& callee_save_regs,
+                              const std::vector<ManagedRegister>& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  DCHECK_EQ(entry_spills.size(), 0u);
   CHECK_EQ(R0, method_reg.AsArm().AsCoreRegister());
 
   // Push callee saves and link register
@@ -1656,7 +1658,7 @@
   StoreToOffset(kStoreWord, SP, TR, thr_offs.Int32Value());
 }
 
-void ArmAssembler::Move(ManagedRegister mdest, ManagedRegister msrc) {
+void ArmAssembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
   ArmManagedRegister dest = mdest.AsArm();
   ArmManagedRegister src = msrc.AsArm();
   if (!dest.Equals(src)) {
diff --git a/src/assembler_arm.h b/src/assembler_arm.h
index 5b9c32d..c557a36 100644
--- a/src/assembler_arm.h
+++ b/src/assembler_arm.h
@@ -438,7 +438,8 @@
 
   // Emit code that will create an activation on the stack
   virtual void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                          const std::vector<ManagedRegister>& callee_save_regs);
+                          const std::vector<ManagedRegister>& callee_save_regs,
+                          const std::vector<ManagedRegister>& entry_spills);
 
   // Emit code that will remove an activation from the stack
   virtual void RemoveFrame(size_t frame_size,
@@ -484,7 +485,7 @@
                                     ThreadOffset offs);
 
   // Copying routines
-  virtual void Move(ManagedRegister dest, ManagedRegister src);
+  virtual void Move(ManagedRegister dest, ManagedRegister src, size_t size);
 
   virtual void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset thr_offs,
                                     ManagedRegister scratch);
diff --git a/src/assembler_x86.cc b/src/assembler_x86.cc
index d2fc708..97663a7 100644
--- a/src/assembler_x86.cc
+++ b/src/assembler_x86.cc
@@ -1395,13 +1395,18 @@
 }
 
 void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                              const std::vector<ManagedRegister>& spill_regs) {
+                              const std::vector<ManagedRegister>& spill_regs,
+                              const std::vector<ManagedRegister>& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   CHECK_EQ(0u, spill_regs.size());  // no spilled regs on x86
   // return address then method on stack
   addl(ESP, Immediate(-frame_size + kPointerSize /*method*/ +
                       kPointerSize /*return address*/));
   pushl(method_reg.AsX86().AsCpuRegister());
+  for (size_t i = 0; i < entry_spills.size(); ++i) {
+    movl(Address(ESP, frame_size + kPointerSize + (i * kPointerSize)),
+         entry_spills.at(i).AsX86().AsCpuRegister());
+  }
 }
 
 void X86Assembler::RemoveFrame(size_t frame_size,
@@ -1575,12 +1580,25 @@
   fs()->movl(dest.AsCpuRegister(), Address::Absolute(offs));
 }
 
-void X86Assembler::Move(ManagedRegister mdest, ManagedRegister msrc) {
+void X86Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
   X86ManagedRegister dest = mdest.AsX86();
   X86ManagedRegister src = msrc.AsX86();
   if (!dest.Equals(src)) {
     if (dest.IsCpuRegister() && src.IsCpuRegister()) {
       movl(dest.AsCpuRegister(), src.AsCpuRegister());
+    } else if (src.IsX87Register() && dest.IsXmmRegister()) {
+      // Pass via stack and pop X87 register
+      subl(ESP, Immediate(16));
+      if (size == 4) {
+        CHECK_EQ(src.AsX87Register(), ST0);
+        fstps(Address(ESP, 0));
+        movss(dest.AsXmmRegister(), Address(ESP, 0));
+      } else {
+        CHECK_EQ(src.AsX87Register(), ST0);
+        fstpl(Address(ESP, 0));
+        movsd(dest.AsXmmRegister(), Address(ESP, 0));
+      }
+      addl(ESP, Immediate(16));
     } else {
       // TODO: x87, SSE
       UNIMPLEMENTED(FATAL) << ": Move " << dest << ", " << src;
diff --git a/src/assembler_x86.h b/src/assembler_x86.h
index 2fc486d..24a84e7 100644
--- a/src/assembler_x86.h
+++ b/src/assembler_x86.h
@@ -468,7 +468,8 @@
 
   // Emit code that will create an activation on the stack
   virtual void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                          const std::vector<ManagedRegister>& callee_save_regs);
+                          const std::vector<ManagedRegister>& callee_save_regs,
+                          const std::vector<ManagedRegister>& entry_spills);
 
   // Emit code that will remove an activation from the stack
   virtual void RemoveFrame(size_t frame_size,
@@ -516,7 +517,7 @@
                                     ThreadOffset offs);
 
   // Copying routines
-  virtual void Move(ManagedRegister dest, ManagedRegister src);
+  virtual void Move(ManagedRegister dest, ManagedRegister src, size_t size);
 
   virtual void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset thr_offs,
                                     ManagedRegister scratch);
diff --git a/src/calling_convention.h b/src/calling_convention.h
index 09a47e5..3a0eb5a 100644
--- a/src/calling_convention.h
+++ b/src/calling_convention.h
@@ -180,6 +180,9 @@
 
   virtual ~ManagedRuntimeCallingConvention() {}
 
+  // Registers to spill to caller's out registers on entry.
+  virtual const std::vector<ManagedRegister>& EntrySpills() = 0;
+
  protected:
   ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty) :
       CallingConvention(is_static, is_synchronized, shorty) {}
diff --git a/src/calling_convention_arm.cc b/src/calling_convention_arm.cc
index 0ada85a..924b291 100644
--- a/src/calling_convention_arm.cc
+++ b/src/calling_convention_arm.cc
@@ -55,6 +55,8 @@
 
 // Managed runtime calling convention
 
+std::vector<ManagedRegister> ArmManagedRuntimeCallingConvention::entry_spills_;
+
 ManagedRegister ArmManagedRuntimeCallingConvention::MethodRegister() {
   return ArmManagedRegister::FromCoreRegister(R0);
 }
diff --git a/src/calling_convention_arm.h b/src/calling_convention_arm.h
index 4f6d4aa..f03429f 100644
--- a/src/calling_convention_arm.h
+++ b/src/calling_convention_arm.h
@@ -36,8 +36,13 @@
   virtual bool IsCurrentParamOnStack();
   virtual ManagedRegister CurrentParamRegister();
   virtual FrameOffset CurrentParamStackOffset();
-
+  virtual const std::vector<ManagedRegister>& EntrySpills() {
+    DCHECK(entry_spills_.empty());
+    return entry_spills_;
+  }
  private:
+  static std::vector<ManagedRegister> entry_spills_;
+
   DISALLOW_COPY_AND_ASSIGN(ArmManagedRuntimeCallingConvention);
 };
 
diff --git a/src/calling_convention_x86.cc b/src/calling_convention_x86.cc
index 9d5bca9..ee002c5 100644
--- a/src/calling_convention_x86.cc
+++ b/src/calling_convention_x86.cc
@@ -36,9 +36,13 @@
   return ManagedRegister::NoRegister();  // No free regs, so assembler uses push/pop
 }
 
-static ManagedRegister ReturnRegisterForShorty(const char* shorty) {
+static ManagedRegister ReturnRegisterForShorty(const char* shorty, bool jni) {
   if (shorty[0] == 'F' || shorty[0] == 'D') {
-    return X86ManagedRegister::FromX87Register(ST0);
+    if (jni) {
+      return X86ManagedRegister::FromX87Register(ST0);
+    } else {
+      return X86ManagedRegister::FromXmmRegister(XMM0);
+    }
   } else if (shorty[0] == 'J') {
     return X86ManagedRegister::FromRegisterPair(EAX_EDX);
   } else if (shorty[0] == 'V') {
@@ -49,11 +53,11 @@
 }
 
 ManagedRegister X86ManagedRuntimeCallingConvention::ReturnRegister() {
-  return ReturnRegisterForShorty(GetShorty());
+  return ReturnRegisterForShorty(GetShorty(), false);
 }
 
 ManagedRegister X86JniCallingConvention::ReturnRegister() {
-  return ReturnRegisterForShorty(GetShorty());
+  return ReturnRegisterForShorty(GetShorty(), true);
 }
 
 // Managed runtime calling convention
@@ -81,6 +85,21 @@
                      (itr_slots_ * kPointerSize));  // offset into in args
 }
 
+const std::vector<ManagedRegister>& X86ManagedRuntimeCallingConvention::EntrySpills() {
+  // We spill the argument registers on X86 to free them up for scratch use, we then assume
+  // all arguments are on the stack.
+  if (entry_spills_.size() == 0) {
+    size_t num_spills = NumArgs() + NumLongOrDoubleArgs();
+    if (num_spills > 0) {
+      entry_spills_.push_back(X86ManagedRegister::FromCpuRegister(EDX));
+      if (num_spills > 1) {
+        entry_spills_.push_back(X86ManagedRegister::FromCpuRegister(ECX));
+      }
+    }
+  }
+  return entry_spills_;
+}
+
 // JNI calling convention
 
 std::vector<ManagedRegister> X86JniCallingConvention::callee_save_regs_;
@@ -103,11 +122,11 @@
 }
 
 bool X86JniCallingConvention::IsCurrentParamInRegister() {
-  return false;  // Everything is passed by stack
+  return false;  // Everything is passed by stack.
 }
 
 bool X86JniCallingConvention::IsCurrentParamOnStack() {
-  return true;  // Everything is passed by stack
+  return true;  // Everything is passed by stack.
 }
 
 ManagedRegister X86JniCallingConvention::CurrentParamRegister() {
diff --git a/src/calling_convention_x86.h b/src/calling_convention_x86.h
index cb2b89a..4bca318 100644
--- a/src/calling_convention_x86.h
+++ b/src/calling_convention_x86.h
@@ -37,8 +37,9 @@
   virtual bool IsCurrentParamOnStack();
   virtual ManagedRegister CurrentParamRegister();
   virtual FrameOffset CurrentParamStackOffset();
-
+  virtual const std::vector<ManagedRegister>& EntrySpills();
  private:
+  std::vector<ManagedRegister> entry_spills_;
   DISALLOW_COPY_AND_ASSIGN(X86ManagedRuntimeCallingConvention);
 };
 
diff --git a/src/compiled_method.cc b/src/compiled_method.cc
index b998b3c..259ee44 100644
--- a/src/compiled_method.cc
+++ b/src/compiled_method.cc
@@ -35,7 +35,12 @@
     : instruction_set_(instruction_set), frame_size_in_bytes_(frame_size_in_bytes),
       core_spill_mask_(core_spill_mask), fp_spill_mask_(fp_spill_mask) {
   CHECK_NE(code.size(), 0U);
-  CHECK_GE(vmap_table.size(), 1U);  // should always contain an entry for LR
+  if (instruction_set != kX86) {
+    CHECK_GE(vmap_table.size(), 1U);  // should always contain an entry for LR
+  }
+  DCHECK_EQ(vmap_table.size(),
+            static_cast<uint32_t>(__builtin_popcount(core_spill_mask)
+                                  + __builtin_popcount(fp_spill_mask)));
   CHECK_LE(vmap_table.size(), (1U << 16) - 1); // length must fit in 2^16-1
 
   size_t code_byte_count = code.size() * sizeof(code[0]);
diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h
index 4f071b1..611d1df 100644
--- a/src/compiler/CompilerIR.h
+++ b/src/compiler/CompilerIR.h
@@ -134,7 +134,7 @@
     struct LIR* prev;
     struct LIR* target;
     int opcode;
-    int operands[4];            // [0..3] = [dest, src1, src2, extra]
+    int operands[5];            // [0..4] = [dest, src1, src2, extra, extra2]
     struct {
         bool isNop:1;           // LIR is optimized away
         bool pcRelFixup:1;      // May need pc-relative fixup
@@ -450,22 +450,24 @@
 std::ostream& operator<<(std::ostream& os, const OpKind& kind);
 
 enum ConditionCode {
-    kCondEq,
-    kCondNe,
-    kCondCs,
-    kCondCc,
-    kCondMi,
-    kCondPl,
-    kCondVs,
-    kCondVc,
-    kCondHi,
-    kCondLs,
-    kCondGe,
-    kCondLt,
-    kCondGt,
-    kCondLe,
-    kCondAl,
-    kCondNv,
+    kCondEq,  // equal
+    kCondNe,  // not equal
+    kCondCs,  // carry set (unsigned less than)
+    kCondUlt = kCondCs,
+    kCondCc,  // carry clear (unsigned greater than or same)
+    kCondUge = kCondCc,
+    kCondMi,  // minus
+    kCondPl,  // plus, positive or zero
+    kCondVs,  // overflow
+    kCondVc,  // no overflow
+    kCondHi,  // unsigned greater than
+    kCondLs,  // unsigned lower or same
+    kCondGe,  // signed greater than or equal
+    kCondLt,  // signed less than
+    kCondGt,  // signed greater than
+    kCondLe,  // signed less than or equal
+    kCondAl,  // always
+    kCondNv,  // never
 };
 
 enum ThrowKind {
diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc
index 4271722..1b8bcc1 100644
--- a/src/compiler/Frontend.cc
+++ b/src/compiler/Frontend.cc
@@ -1002,18 +1002,15 @@
     for (size_t i = 0 ; i < cUnit->coreVmapTable.size(); i++) {
         vmapTable.push_back(cUnit->coreVmapTable[i]);
     }
-    // Add a marker to take place of lr
-    vmapTable.push_back(INVALID_VREG);
+    if (cUnit->instructionSet != kX86) {
+        // Add a marker to take place of lr
+        vmapTable.push_back(INVALID_VREG);
+    }
     // Combine vmap tables - core regs, then fp regs
     for (uint32_t i = 0; i < cUnit->fpVmapTable.size(); i++) {
         vmapTable.push_back(cUnit->fpVmapTable[i]);
     }
-    DCHECK_EQ(vmapTable.size(),
-              static_cast<uint32_t>(__builtin_popcount(cUnit->coreSpillMask)
-                                    + __builtin_popcount(cUnit->fpSpillMask)));
-    DCHECK_GE(vmapTable.size(), 1U);  // should always at least one INVALID_VREG for lr
-
-    CompiledMethod* result = new CompiledMethod(kThumb2, cUnit->codeBuffer,
+    CompiledMethod* result = new CompiledMethod(cUnit->instructionSet, cUnit->codeBuffer,
                                                 cUnit->frameSize, cUnit->coreSpillMask,
                                                 cUnit->fpSpillMask, cUnit->mappingTable,
                                                 vmapTable);
diff --git a/src/compiler/Ralloc.cc b/src/compiler/Ralloc.cc
index 5176edc..2d85812 100644
--- a/src/compiler/Ralloc.cc
+++ b/src/compiler/Ralloc.cc
@@ -305,7 +305,7 @@
 /*
  * Simple register allocation.  Some Dalvik virtual registers may
  * be promoted to physical registers.  Most of the work for temp
- * allocation is done on the fly.  We also do some initilization and
+ * allocation is done on the fly.  We also do some initialization and
  * type inference here.
  */
 void oatSimpleRegAlloc(CompilationUnit* cUnit)
diff --git a/src/compiler/codegen/CodegenUtil.cc b/src/compiler/codegen/CodegenUtil.cc
index 406c037..27433ca 100644
--- a/src/compiler/codegen/CodegenUtil.cc
+++ b/src/compiler/codegen/CodegenUtil.cc
@@ -51,19 +51,18 @@
 }
 
 /*
- * Mark load/store instructions that access Dalvik registers through r5FP +
- * offset.
+ * Mark load/store instructions that access Dalvik registers through the stack.
  */
-void annotateDalvikRegAccess(LIR* lir, int regId, bool isLoad)
+void annotateDalvikRegAccess(LIR* lir, int regId, bool isLoad, bool is64bit)
 {
     setMemRefType(lir, isLoad, kDalvikReg);
 
     /*
-     * Store the Dalvik register id in aliasInfo. Mark he MSB if it is a 64-bit
+     * Store the Dalvik register id in aliasInfo. Mark the MSB if it is a 64-bit
      * access.
      */
     lir->aliasInfo = regId;
-    if (DOUBLEREG(lir->operands[0])) {
+    if (is64bit) {
         lir->aliasInfo |= 0x80000000;
     }
 }
@@ -416,7 +415,7 @@
 
 
 LIR* rawLIR(CompilationUnit* cUnit, int dalvikOffset, int opcode, int op0,
-            int op1, int op2, int op3, LIR* target)
+            int op1, int op2, int op3, int op4, LIR* target)
 {
     LIR* insn = (LIR* ) oatNew(cUnit, sizeof(LIR), true, kAllocLIR);
     insn->dalvikOffset = dalvikOffset;
@@ -425,6 +424,7 @@
     insn->operands[1] = op1;
     insn->operands[2] = op2;
     insn->operands[3] = op3;
+    insn->operands[4] = op4;
     insn->target = target;
     oatSetupResourceMasks(insn);
     if (opcode == kPseudoTargetLabel) {
@@ -440,7 +440,10 @@
  */
 LIR* newLIR0(CompilationUnit* cUnit, int opcode)
 {
-    DCHECK(isPseudoOpcode(opcode) || (EncodingMap[opcode].flags & NO_OPERAND));
+    DCHECK(isPseudoOpcode(opcode) || (EncodingMap[opcode].flags & NO_OPERAND))
+                << EncodingMap[opcode].name << " " << (int)opcode << " "
+                << PrettyMethod(cUnit->method_idx, *cUnit->dex_file) << " "
+                << cUnit->currentDalvikOffset;
     LIR* insn = rawLIR(cUnit, cUnit->currentDalvikOffset, opcode);
     oatAppendLIR(cUnit, (LIR*) insn);
     return insn;
@@ -449,7 +452,10 @@
 LIR* newLIR1(CompilationUnit* cUnit, int opcode,
                            int dest)
 {
-    DCHECK(isPseudoOpcode(opcode) || (EncodingMap[opcode].flags & IS_UNARY_OP));
+    DCHECK(isPseudoOpcode(opcode) || (EncodingMap[opcode].flags & IS_UNARY_OP))
+                << EncodingMap[opcode].name << " " << (int)opcode << " "
+                << PrettyMethod(cUnit->method_idx, *cUnit->dex_file) << " "
+                << cUnit->currentDalvikOffset;
     LIR* insn = rawLIR(cUnit, cUnit->currentDalvikOffset, opcode, dest);
     oatAppendLIR(cUnit, (LIR*) insn);
     return insn;
@@ -458,8 +464,10 @@
 LIR* newLIR2(CompilationUnit* cUnit, int opcode,
                            int dest, int src1)
 {
-    DCHECK(isPseudoOpcode(opcode) ||
-           (EncodingMap[opcode].flags & IS_BINARY_OP));
+    DCHECK(isPseudoOpcode(opcode) || (EncodingMap[opcode].flags & IS_BINARY_OP))
+                << EncodingMap[opcode].name << " " << (int)opcode << " "
+                << PrettyMethod(cUnit->method_idx, *cUnit->dex_file) << " "
+                << cUnit->currentDalvikOffset;
     LIR* insn = rawLIR(cUnit, cUnit->currentDalvikOffset, opcode, dest, src1);
     oatAppendLIR(cUnit, (LIR*) insn);
     return insn;
@@ -468,11 +476,10 @@
 LIR* newLIR3(CompilationUnit* cUnit, int opcode,
                            int dest, int src1, int src2)
 {
-    DCHECK(isPseudoOpcode(opcode) ||
-           (EncodingMap[opcode].flags & IS_TERTIARY_OP))
-            << (int)opcode << " "
-            << PrettyMethod(cUnit->method_idx, *cUnit->dex_file) << " "
-            << cUnit->currentDalvikOffset;
+    DCHECK(isPseudoOpcode(opcode) || (EncodingMap[opcode].flags & IS_TERTIARY_OP))
+                << EncodingMap[opcode].name << " " << (int)opcode << " "
+                << PrettyMethod(cUnit->method_idx, *cUnit->dex_file) << " "
+                << cUnit->currentDalvikOffset;
     LIR* insn = rawLIR(cUnit, cUnit->currentDalvikOffset, opcode, dest, src1,
                        src2);
     oatAppendLIR(cUnit, (LIR*) insn);
@@ -480,16 +487,31 @@
 }
 
 LIR* newLIR4(CompilationUnit* cUnit, int opcode,
-                           int dest, int src1, int src2, int info)
+            int dest, int src1, int src2, int info)
 {
-    DCHECK(isPseudoOpcode(opcode) ||
-           (EncodingMap[opcode].flags & IS_QUAD_OP));
+    DCHECK(isPseudoOpcode(opcode) || (EncodingMap[opcode].flags & IS_QUAD_OP))
+                << EncodingMap[opcode].name << " " << (int)opcode << " "
+                << PrettyMethod(cUnit->method_idx, *cUnit->dex_file) << " "
+                << cUnit->currentDalvikOffset;
     LIR* insn = rawLIR(cUnit, cUnit->currentDalvikOffset, opcode, dest, src1,
                        src2, info);
     oatAppendLIR(cUnit, (LIR*) insn);
     return insn;
 }
 
+LIR* newLIR5(CompilationUnit* cUnit, int opcode,
+             int dest, int src1, int src2, int info1, int info2)
+{
+    DCHECK(isPseudoOpcode(opcode) || (EncodingMap[opcode].flags & IS_QUIN_OP))
+                << EncodingMap[opcode].name << " " << (int)opcode << " "
+                << PrettyMethod(cUnit->method_idx, *cUnit->dex_file) << " "
+                << cUnit->currentDalvikOffset;
+    LIR* insn = rawLIR(cUnit, cUnit->currentDalvikOffset, opcode, dest, src1,
+                       src2, info1, info2);
+    oatAppendLIR(cUnit, (LIR*) insn);
+    return insn;
+}
+
 /*
  * Search the existing constants in the literal pool for an exact or close match
  * within specified delta (greater or equal to 0).
diff --git a/src/compiler/codegen/CompilerCodegen.h b/src/compiler/codegen/CompilerCodegen.h
index 9537e46..571d6da 100644
--- a/src/compiler/codegen/CompilerCodegen.h
+++ b/src/compiler/codegen/CompilerCodegen.h
@@ -22,7 +22,7 @@
 namespace art {
 
 LIR* rawLIR(CompilationUnit* cUnit, int dalvikOffset, int opcode, int op0 = 0,
-            int op1 = 0, int op2 = 0, int op3 = 0, LIR* target = NULL);
+            int op1 = 0, int op2 = 0, int op3 = 0, int op4 = 0, LIR* target = NULL);
 
 int oatGetInsnSize(LIR* lir);
 
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index c43d8ff..e2c306d 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -94,7 +94,7 @@
 
 /* Perform check on two registers */
 LIR* genRegRegCheck(CompilationUnit* cUnit, ConditionCode cCode,
-                        int reg1, int reg2, MIR* mir, ThrowKind kind)
+                     int reg1, int reg2, MIR* mir, ThrowKind kind)
 {
     LIR* tgt = rawLIR(cUnit, 0, kPseudoThrowTarget, kind,
                       mir ? mir->offset : 0, reg1, reg2);
@@ -325,11 +325,15 @@
 #endif
         // Set up source pointer
         RegLocation rlFirst = oatGetSrc(cUnit, mir, 0);
+#if defined(TARGET_X86)
+        UNIMPLEMENTED(FATAL);
+#else
         opRegRegImm(cUnit, kOpAdd, rSrc, rSP,
                     oatSRegOffset(cUnit, rlFirst.sRegLow));
         // Set up the target pointer
         opRegRegImm(cUnit, kOpAdd, rDst, rRET0,
                     Array::DataOffset(component_size).Int32Value());
+#endif
         // Set up the loop counter (known to be > 0)
         loadConstant(cUnit, rIdx, dInsn->vA - 1);
         // Generate the copy loop.  Going backwards for convenience
@@ -365,9 +369,6 @@
 void genSput(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc,
              bool isLongOrDouble, bool isObject)
 {
-#if defined(TARGET_X86)
-    UNIMPLEMENTED(WARNING) << "genSput";
-#else
     int fieldOffset;
     int ssbIndex;
     bool isVolatile;
@@ -461,13 +462,16 @@
         int rTgt = loadHelper(cUnit, setterOffset);
         loadConstant(cUnit, rARG0, fieldIdx);
         if (isLongOrDouble) {
+#if defined(TARGET_X86)
+            UNIMPLEMENTED(FATAL);
+#else
             loadValueDirectWideFixed(cUnit, rlSrc, rARG2, rARG3);
+#endif
         } else {
             loadValueDirect(cUnit, rlSrc, rARG1);
         }
         callRuntimeHelper(cUnit, rTgt);
     }
-#endif
 }
 
 void genSget(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
@@ -598,9 +602,6 @@
 
 void handleSuspendLaunchpads(CompilationUnit *cUnit)
 {
-#if defined(TARGET_X86)
-    UNIMPLEMENTED(WARNING);
-#else
     LIR** suspendLabel =
         (LIR **) cUnit->suspendLaunchpads.elemList;
     int numElems = cUnit->suspendLaunchpads.numUsed;
@@ -616,7 +617,6 @@
         opReg(cUnit, kOpBlx, rTgt);
         opUnconditionalBranch(cUnit, resumeLab);
     }
-#endif
 }
 
 void handleThrowLaunchpads(CompilationUnit *cUnit)
@@ -723,6 +723,15 @@
         if (isLongOrDouble) {
             DCHECK(rlDest.wide);
             genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir);/* null? */
+#if defined(TARGET_X86)
+            rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
+            genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir);/* null? */
+            loadBaseDispWide(cUnit, mir, rlObj.lowReg, fieldOffset, rlResult.lowReg,
+                             rlResult.highReg, rlObj.sRegLow);
+            if (isVolatile) {
+                oatGenMemBarrier(cUnit, kSY);
+            }
+#else
             int regPtr = oatAllocTemp(cUnit);
             opRegRegImm(cUnit, kOpAdd, regPtr, rlObj.lowReg, fieldOffset);
             rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
@@ -731,6 +740,7 @@
                 oatGenMemBarrier(cUnit, kSY);
             }
             oatFreeTemp(cUnit, regPtr);
+#endif
             storeValueWide(cUnit, rlDest, rlResult);
         } else {
             rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
@@ -763,9 +773,6 @@
 void genIPut(CompilationUnit* cUnit, MIR* mir, OpSize size, RegLocation rlSrc,
              RegLocation rlObj, bool isLongOrDouble, bool isObject)
 {
-#if defined(TARGET_X86)
-    UNIMPLEMENTED(WARNING);
-#else
     int fieldOffset;
     bool isVolatile;
     uint32_t fieldIdx = mir->dalvikInsn.vC;
@@ -813,14 +820,17 @@
         int rTgt = loadHelper(cUnit, setterOffset);
         loadValueDirect(cUnit, rlObj, rARG1);
         if (isLongOrDouble) {
+#if defined(TARGET_X86)
+            UNIMPLEMENTED(FATAL);
+#else
             loadValueDirectWide(cUnit, rlSrc, rARG2, rARG3);
+#endif
         } else {
             loadValueDirect(cUnit, rlSrc, rARG2);
         }
         loadConstant(cUnit, rARG0, fieldIdx);
         callRuntimeHelper(cUnit, rTgt);
     }
-#endif
 }
 
 void genConstClass(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
@@ -1091,7 +1101,7 @@
     /* rARG1 now contains object->clazz */
     int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread,
                           pCheckCastFromCode));
-#if defined(TARGET_MIPS)
+#if defined(TARGET_MIPS) || defined(TARGET_X86)
     LIR* branch2 = opCmpBranch(cUnit, kCondEq, rARG1, classReg, NULL);
 #else
     opRegReg(cUnit, kOpCmp, rARG1, classReg);
@@ -1188,7 +1198,6 @@
     RegLocation rlResult;
     rlArray = loadValue(cUnit, rlArray, kCoreReg);
     rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
-    int regPtr;
 
     if (size == kLong || size == kDouble) {
       dataOffset = Array::DataOffset(sizeof(int64_t)).Int32Value();
@@ -1199,14 +1208,36 @@
     /* null object? */
     genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg, mir);
 
-    regPtr = oatAllocTemp(cUnit);
+#if defined(TARGET_X86)
+    if (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+        /* if (rlIndex >= [rlArray + lenOffset]) goto kThrowArrayBounds */
+        genRegMemCheck(cUnit, kCondUge, rlIndex.lowReg, rlArray.lowReg,
+                       lenOffset, mir, kThrowArrayBounds);
+    }
+    if ((size == kLong) || (size == kDouble)) {
+        rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
+        loadBaseIndexedDisp(cUnit, NULL, rlArray.lowReg, rlIndex.lowReg, scale, dataOffset,
+                            rlResult.lowReg, rlResult.highReg, size, INVALID_SREG);
 
+        storeValueWide(cUnit, rlDest, rlResult);
+    } else {
+        rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
+
+        loadBaseIndexedDisp(cUnit, NULL, rlArray.lowReg, rlIndex.lowReg, scale, dataOffset,
+                            rlResult.lowReg, INVALID_REG, size, INVALID_SREG);
+
+        storeValue(cUnit, rlDest, rlResult);
+    }
+#else
+    int regPtr = oatAllocTemp(cUnit);
     if (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
         int regLen = oatAllocTemp(cUnit);
         /* Get len */
         loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
         /* regPtr -> array data */
         opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
+        // TODO: change kCondCS to a more meaningful name, is the sense of
+        // carry-set/clear flipped?
         genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
                        kThrowArrayBounds);
         oatFreeTemp(cUnit, regLen);
@@ -1240,6 +1271,7 @@
         oatFreeTemp(cUnit, regPtr);
         storeValue(cUnit, rlDest, rlResult);
     }
+#endif
 }
 
 /*
@@ -1405,11 +1437,10 @@
     bool callOut = false;
     bool checkZero = false;
     bool unary = false;
-    int retReg = rRET0;
-    int funcOffset;
     RegLocation rlResult;
     bool shiftOp = false;
-
+    int funcOffset;
+    int retReg = rRET0;
     switch (mir->dalvikInsn.opcode) {
         case Instruction::NEG_INT:
             op = kOpNeg;
@@ -1433,16 +1464,18 @@
             break;
         case Instruction::DIV_INT:
         case Instruction::DIV_INT_2ADDR:
-            callOut = true;
             checkZero = true;
+            op = kOpDiv;
+            callOut = true;
             funcOffset = OFFSETOF_MEMBER(Thread, pIdiv);
             retReg = rRET0;
             break;
         /* NOTE: returns in rARG1 */
         case Instruction::REM_INT:
         case Instruction::REM_INT_2ADDR:
-            callOut = true;
             checkZero = true;
+            op = kOpRem;
+            callOut = true;
             funcOffset = OFFSETOF_MEMBER(Thread, pIdivmod);
             retReg = rRET1;
             break;
@@ -1485,6 +1518,11 @@
                      rlSrc1.lowReg);
         } else {
             rlSrc2 = loadValue(cUnit, rlSrc2, kCoreReg);
+#if defined(TARGET_X86)
+            rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+            opRegRegReg(cUnit, op, rlResult.lowReg,
+                        rlSrc1.lowReg, rlSrc2.lowReg);
+#else
             if (shiftOp) {
                 int tReg = oatAllocTemp(cUnit);
                 opRegRegImm(cUnit, kOpAnd, tReg, rlSrc2.lowReg, 31);
@@ -1497,6 +1535,7 @@
                 opRegRegReg(cUnit, op, rlResult.lowReg,
                             rlSrc1.lowReg, rlSrc2.lowReg);
             }
+#endif
         }
         storeValue(cUnit, rlDest, rlResult);
     } else {
@@ -1782,10 +1821,6 @@
 bool genArithOpLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
                     RegLocation rlSrc1, RegLocation rlSrc2)
 {
-#if defined(TARGET_X86)
-    UNIMPLEMENTED(WARNING) << "genArithOpLong";
-    return false;
-#else
     RegLocation rlResult;
     OpKind firstOp = kOpBkpt;
     OpKind secondOp = kOpBkpt;
@@ -1879,7 +1914,11 @@
         int rTgt;
         oatFlushAllRegs(cUnit);   /* Send everything to home location */
         if (checkZero) {
+#if defined(TARGET_X86)
+            UNIMPLEMENTED(FATAL);
+#else
             loadValueDirectWideFixed(cUnit, rlSrc2, rARG2, rARG3);
+#endif
             rTgt = loadHelper(cUnit, funcOffset);
             loadValueDirectWideFixed(cUnit, rlSrc1, rARG0, rARG1);
             int tReg = oatAllocTemp(cUnit);
@@ -1888,14 +1927,22 @@
             oatFreeTemp(cUnit, tReg);
             genCheck(cUnit, kCondEq, mir, kThrowDivZero);
 #else
+#if defined(TARGET_X86)
+            UNIMPLEMENTED(FATAL);
+#else
             opRegRegReg(cUnit, kOpOr, tReg, rARG2, rARG3);
+#endif
             genImmedCheck(cUnit, kCondEq, tReg, 0, mir, kThrowDivZero);
             oatFreeTemp(cUnit, tReg);
 #endif
         } else {
             rTgt = loadHelper(cUnit, funcOffset);
             loadValueDirectWideFixed(cUnit, rlSrc1, rARG0, rARG1);
+#if defined(TARGET_X86)
+            UNIMPLEMENTED(FATAL);
+#else
             loadValueDirectWideFixed(cUnit, rlSrc2, rARG2, rARG3);
+#endif
         }
         callRuntimeHelper(cUnit, rTgt);
         // Adjust return regs in to handle case of rem returning rARG2/rARG3
@@ -1906,7 +1953,6 @@
         storeValueWide(cUnit, rlDest, rlResult);
     }
     return false;
-#endif
 }
 
 bool genConversionCall(CompilationUnit* cUnit, MIR* mir, int funcOffset,
@@ -1993,11 +2039,6 @@
                               RegLocation rlDest, RegLocation rlSrc1,
                               RegLocation rlSrc2)
 {
-#if defined(TARGET_X86)
-//NOTE: probably don't need the portable versions for x86
-    UNIMPLEMENTED(WARNING) << "genArithOpDoublePortable";
-    return false;
-#else
     RegLocation rlResult;
     int funcOffset;
 
@@ -2032,12 +2073,15 @@
     oatFlushAllRegs(cUnit);   /* Send everything to home location */
     int rTgt = loadHelper(cUnit, funcOffset);
     loadValueDirectWideFixed(cUnit, rlSrc1, rARG0, rARG1);
+#if defined(TARGET_X86)
+    UNIMPLEMENTED(FATAL);
+#else
     loadValueDirectWideFixed(cUnit, rlSrc2, rARG2, rARG3);
+#endif
     callRuntimeHelper(cUnit, rTgt);
     rlResult = oatGetReturnWide(cUnit);
     storeValueWide(cUnit, rlDest, rlResult);
     return false;
-#endif
 }
 
 bool genConversionPortable(CompilationUnit* cUnit, MIR* mir)
@@ -2092,9 +2136,6 @@
  */
 void genDebuggerUpdate(CompilationUnit* cUnit, int32_t offset)
 {
-#if defined(TARGET_X86)
-    UNIMPLEMENTED(WARNING);
-#else
     // Following DCHECK verifies that dPC is in range of single load immediate
     DCHECK((offset == DEBUGGER_METHOD_ENTRY) ||
            (offset == DEBUGGER_METHOD_EXIT) || ((offset & 0xffff) == offset));
@@ -2104,6 +2145,8 @@
     opIT(cUnit, kArmCondNe, "T");
     loadConstant(cUnit, rARG2, offset);     // arg2 <- Entry code
     opReg(cUnit, kOpBlx, rSUSPEND);
+#elif defined(TARGET_X86)
+    UNIMPLEMENTED(FATAL);
 #else
     LIR* branch = opCmpImmBranch(cUnit, kCondEq, rSUSPEND, 0, NULL);
     loadConstant(cUnit, rARG2, offset);
@@ -2112,15 +2155,11 @@
     branch->target = (LIR*)target;
 #endif
     oatFreeTemp(cUnit, rARG2);
-#endif
 }
 
 /* Check if we need to check for pending suspend request */
 void genSuspendTest(CompilationUnit* cUnit, MIR* mir)
 {
-#if defined(TARGET_X86)
-    UNIMPLEMENTED(WARNING) << "genSuspendTest";
-#else
     if (NO_SUSPEND || (mir->optimizationFlags & MIR_IGNORE_SUSPEND_CHECK)) {
         return;
     }
@@ -2131,15 +2170,23 @@
                               pTestSuspendFromCode));
         opReg(cUnit, kOpBlx, rTgt);
         // Refresh rSUSPEND
+#if defined(TARGET_X86)
+        UNIMPLEMENTED(FATAL);
+#else
+
         loadWordDisp(cUnit, rSELF,
                      OFFSETOF_MEMBER(Thread, pUpdateDebuggerFromCode),
                      rSUSPEND);
+#endif
     } else {
-        LIR* branch;
+        LIR* branch = NULL;
 #if defined(TARGET_ARM)
         // In non-debug case, only check periodically
         newLIR2(cUnit, kThumbSubRI8, rSUSPEND, 1);
         branch = opCondBranch(cUnit, kCondEq, NULL);
+#elif defined(TARGET_X86)
+        newLIR2(cUnit, kX86Cmp32TI, Thread::SuspendCountOffset().Int32Value(), 0);
+        branch = opCondBranch(cUnit, kCondNe, NULL);
 #else
         opRegImm(cUnit, kOpSub, rSUSPEND, 1);
         branch = opCmpImmBranch(cUnit, kCondEq, rSUSPEND, 0, NULL);
@@ -2150,7 +2197,6 @@
         branch->target = (LIR*)target;
         oatInsertGrowableList(cUnit, &cUnit->suspendLaunchpads, (intptr_t)target);
     }
-#endif
 }
 
 }  // namespace art
diff --git a/src/compiler/codegen/arm/ArmLIR.h b/src/compiler/codegen/arm/ArmLIR.h
index 2f332f5..e3370df 100644
--- a/src/compiler/codegen/arm/ArmLIR.h
+++ b/src/compiler/codegen/arm/ArmLIR.h
@@ -689,6 +689,7 @@
 #define IS_BINARY_OP    (1 << kIsBinaryOp)
 #define IS_TERTIARY_OP  (1 << kIsTertiaryOp)
 #define IS_QUAD_OP      (1 << kIsQuadOp)
+#define IS_QUIN_OP      0
 #define IS_IT           (1 << kIsIT)
 #define SETS_CCODES     (1 << kSetsCCodes)
 #define USES_CCODES     (1 << kUsesCCodes)
diff --git a/src/compiler/codegen/arm/Assemble.cc b/src/compiler/codegen/arm/Assemble.cc
index e012c4e..8901bf3 100644
--- a/src/compiler/codegen/arm/Assemble.cc
+++ b/src/compiler/codegen/arm/Assemble.cc
@@ -1052,7 +1052,7 @@
 
                     // Add new Adr to generate the address
                     LIR* newAdr = rawLIR(cUnit, lir->dalvikOffset, kThumb2Adr,
-                                         baseReg, 0, 0, 0, lir->target);
+                                         baseReg, 0, 0, 0, 0, lir->target);
                     oatInsertLIRBefore((LIR*)lir, (LIR*)newAdr);
 
                     // Convert to normal load
@@ -1086,7 +1086,7 @@
                     LIR* newInst =
                         rawLIR(cUnit, lir->dalvikOffset, kThumbBCond, 0,
                                (lir->opcode == kThumb2Cbz) ? kArmCondEq : kArmCondNe,
-                               0, 0, lir->target);
+                               0, 0, 0, lir->target);
                     oatInsertLIRAfter((LIR *)lir, (LIR *)newInst);
                     /* Convert the cb[n]z to a cmp rx, #0 ] */
                     lir->opcode = kThumbCmpRI8;
@@ -1206,12 +1206,12 @@
                     LIR *newMov16L =
                         rawLIR(cUnit, lir->dalvikOffset, kThumb2MovImm16LST,
                                lir->operands[0], 0, (intptr_t)lir, (intptr_t)tabRec,
-                               lir->target);
+                               0, lir->target);
                     oatInsertLIRBefore((LIR*)lir, (LIR*)newMov16L);
                     LIR *newMov16H =
                         rawLIR(cUnit, lir->dalvikOffset, kThumb2MovImm16HST,
                                lir->operands[0], 0, (intptr_t)lir, (intptr_t)tabRec,
-                               lir->target);
+                               0, lir->target);
                     oatInsertLIRBefore((LIR*)lir, (LIR*)newMov16H);
                     lir->opcode = kThumb2AddRRR;
                     lir->operands[1] = rPC;
diff --git a/src/compiler/codegen/arm/Thumb2/Factory.cc b/src/compiler/codegen/arm/Thumb2/Factory.cc
index ad6e40d..fdf0ca2 100644
--- a/src/compiler/codegen/arm/Thumb2/Factory.cc
+++ b/src/compiler/codegen/arm/Thumb2/Factory.cc
@@ -69,7 +69,7 @@
         dataTarget = addWordData(cUnit, &cUnit->literalList, value);
     }
     LIR* loadPcRel = rawLIR(cUnit, cUnit->currentDalvikOffset, kThumb2Vldrs,
-                            rDest, r15pc, 0, 0, dataTarget);
+                            rDest, r15pc, 0, 0, 0, dataTarget);
     setMemRefType(loadPcRel, true, kLiteral);
     loadPcRel->aliasInfo = (intptr_t)dataTarget;
     oatAppendLIR(cUnit, (LIR* ) loadPcRel);
@@ -172,7 +172,7 @@
         dataTarget = addWordData(cUnit, &cUnit->literalList, value);
     }
     LIR* loadPcRel = rawLIR(cUnit, cUnit->currentDalvikOffset,
-                            kThumb2LdrPcRel12, rDest, 0, 0, 0, dataTarget);
+                            kThumb2LdrPcRel12, rDest, 0, 0, 0, 0, dataTarget);
     setMemRefType(loadPcRel, true, kLiteral);
     loadPcRel->aliasInfo = (intptr_t)dataTarget;
     res = loadPcRel;
@@ -627,7 +627,7 @@
             }
             LIR* loadPcRel = rawLIR(cUnit, cUnit->currentDalvikOffset,
                                     kThumb2Vldrd, S2D(rDestLo, rDestHi),
-                                    r15pc, 0, 0, dataTarget);
+                                    r15pc, 0, 0, 0, dataTarget);
             setMemRefType(loadPcRel, true, kLiteral);
             loadPcRel->aliasInfo = (intptr_t)dataTarget;
             oatAppendLIR(cUnit, (LIR* ) loadPcRel);
@@ -784,10 +784,11 @@
     bool thumb2Form = (displacement < 4092 && displacement >= 0);
     bool allLowRegs = (LOWREG(rBase) && LOWREG(rDest));
     int encodedDisp = displacement;
-
+    bool is64bit = false;
     switch (size) {
         case kDouble:
         case kLong:
+            is64bit = true;
             if (FPREG(rDest)) {
                 if (SINGLEREG(rDest)) {
                     DCHECK(FPREG(rDestHi));
@@ -883,7 +884,7 @@
 
     // TODO: in future may need to differentiate Dalvik accesses w/ spills
     if (rBase == rSP) {
-        annotateDalvikRegAccess(load, displacement >> 2, true /* isLoad */);
+        annotateDalvikRegAccess(load, displacement >> 2, true /* isLoad */, is64bit);
     }
     return load;
 }
@@ -912,10 +913,11 @@
     bool thumb2Form = (displacement < 4092 && displacement >= 0);
     bool allLowRegs = (LOWREG(rBase) && LOWREG(rSrc));
     int encodedDisp = displacement;
-
+    bool is64bit = false;
     switch (size) {
         case kLong:
         case kDouble:
+            is64bit = true;
             if (!FPREG(rSrc)) {
                 res = storeBaseDispBody(cUnit, rBase, displacement, rSrc,
                                         -1, kWord);
@@ -990,7 +992,7 @@
 
     // TODO: In future, may need to differentiate Dalvik & spill accesses
     if (rBase == rSP) {
-        annotateDalvikRegAccess(store, displacement >> 2, false /* isLoad */);
+        annotateDalvikRegAccess(store, displacement >> 2, false /* isLoad */, is64bit);
     }
     return res;
 }
diff --git a/src/compiler/codegen/mips/Assemble.cc b/src/compiler/codegen/mips/Assemble.cc
index e064da9..07a9975 100644
--- a/src/compiler/codegen/mips/Assemble.cc
+++ b/src/compiler/codegen/mips/Assemble.cc
@@ -488,18 +488,18 @@
     if (!unconditional) {
         hopTarget = rawLIR(cUnit, dalvikOffset, kPseudoTargetLabel);
         LIR* hopBranch = rawLIR(cUnit, dalvikOffset, opcode, lir->operands[0],
-                                lir->operands[1], 0, 0, hopTarget);
+                                lir->operands[1], 0, 0, 0, hopTarget);
         oatInsertLIRBefore(lir, hopBranch);
     }
     LIR* currPC = rawLIR(cUnit, dalvikOffset, kMipsCurrPC);
     oatInsertLIRBefore(lir, currPC);
     LIR* anchor = rawLIR(cUnit, dalvikOffset, kPseudoTargetLabel);
     LIR* deltaHi = rawLIR(cUnit, dalvikOffset, kMipsDeltaHi, r_AT, 0,
-                          (uintptr_t)anchor, 0, lir->target);
+                          (uintptr_t)anchor, 0, 0, lir->target);
     oatInsertLIRBefore(lir, deltaHi);
     oatInsertLIRBefore(lir, anchor);
     LIR* deltaLo = rawLIR(cUnit, dalvikOffset, kMipsDeltaLo, r_AT, 0,
-                          (uintptr_t)anchor, 0, lir->target);
+                          (uintptr_t)anchor, 0, 0, lir->target);
     oatInsertLIRBefore(lir, deltaLo);
     LIR* addu = rawLIR(cUnit, dalvikOffset, kMipsAddu, r_AT, r_AT, r_RA);
     oatInsertLIRBefore(lir, addu);
@@ -557,12 +557,12 @@
                     LIR *newDeltaHi =
                           rawLIR(cUnit, lir->dalvikOffset, kMipsDeltaHi,
                                  lir->operands[0], 0, lir->operands[2],
-                                 lir->operands[3], lir->target);
+                                 lir->operands[3], 0, lir->target);
                     oatInsertLIRBefore((LIR*)lir, (LIR*)newDeltaHi);
                     LIR *newDeltaLo =
                           rawLIR(cUnit, lir->dalvikOffset, kMipsDeltaLo,
                                  lir->operands[0], 0, lir->operands[2],
-                                 lir->operands[3], lir->target);
+                                 lir->operands[3], 0, lir->target);
                     oatInsertLIRBefore((LIR*)lir, (LIR*)newDeltaLo);
                     lir->flags.isNop = true;
                     res = kRetryAll;
diff --git a/src/compiler/codegen/mips/Mips32/Factory.cc b/src/compiler/codegen/mips/Mips32/Factory.cc
index 2868710..0ee936e 100644
--- a/src/compiler/codegen/mips/Mips32/Factory.cc
+++ b/src/compiler/codegen/mips/Mips32/Factory.cc
@@ -619,12 +619,12 @@
     }
 
     if (rBase == rSP) {
-        if (load != NULL)
-            annotateDalvikRegAccess(load, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2,
-                                    true /* isLoad */);
-        if (load2 != NULL)
+        annotateDalvikRegAccess(load, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2,
+                                true /* isLoad */, pair /* is64bit */);
+        if (pair) {
             annotateDalvikRegAccess(load2, (displacement + HIWORD_OFFSET) >> 2,
-                                    true /* isLoad */);
+                                    true /* isLoad */, pair /* is64bit */);
+        }
     }
     return load;
 }
@@ -720,12 +720,12 @@
     }
 
     if (rBase == rSP) {
-        if (store != NULL)
-            annotateDalvikRegAccess(store, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2,
-                                    false /* isLoad */);
-        if (store2 != NULL)
+        annotateDalvikRegAccess(store, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2,
+                                false /* isLoad */, pair /* is64bit */);
+        if (pair) {
             annotateDalvikRegAccess(store2, (displacement + HIWORD_OFFSET) >> 2,
-                                    false /* isLoad */);
+                                    false /* isLoad */, pair /* is64bit */);
+        }
     }
 
     return res;
diff --git a/src/compiler/codegen/mips/MipsLIR.h b/src/compiler/codegen/mips/MipsLIR.h
index 1e7a803..8fdfe59 100644
--- a/src/compiler/codegen/mips/MipsLIR.h
+++ b/src/compiler/codegen/mips/MipsLIR.h
@@ -496,6 +496,7 @@
 #define IS_BINARY_OP    (1 << kIsBinaryOp)
 #define IS_TERTIARY_OP  (1 << kIsTertiaryOp)
 #define IS_QUAD_OP      (1 << kIsQuadOp)
+#define IS_QUIN_OP      0
 #define IS_IT           (1 << kIsIT)
 #define SETS_CCODES     (1 << kSetsCCodes)
 #define USES_CCODES     (1 << kUsesCCodes)
diff --git a/src/compiler/codegen/x86/ArchFactory.cc b/src/compiler/codegen/x86/ArchFactory.cc
index 1bf0a72..2d15318 100644
--- a/src/compiler/codegen/x86/ArchFactory.cc
+++ b/src/compiler/codegen/x86/ArchFactory.cc
@@ -128,18 +128,14 @@
     if (cUnit->numCoreSpills == 0) {
         return;
     }
-    UNIMPLEMENTED(WARNING) << "spillCoreRegs";
-#if 0
     uint32_t mask = cUnit->coreSpillMask;
-    int offset = cUnit->numCoreSpills * 4;
-    opRegImm(cUnit, kOpSub, rSP, offset);
+    int offset = cUnit->frameSize - 4;
     for (int reg = 0; mask; mask >>= 1, reg++) {
         if (mask & 0x1) {
             offset -= 4;
             storeWordDisp(cUnit, rSP, offset, reg);
         }
     }
-#endif
 }
 
 void unSpillCoreRegs(CompilationUnit* cUnit)
@@ -147,35 +143,42 @@
     if (cUnit->numCoreSpills == 0) {
         return;
     }
-    UNIMPLEMENTED(WARNING) << "unSpillCoreRegs";
-#if 0
     uint32_t mask = cUnit->coreSpillMask;
-    int offset = cUnit->frameSize;
+    int offset = cUnit->frameSize - 4;
     for (int reg = 0; mask; mask >>= 1, reg++) {
         if (mask & 0x1) {
             offset -= 4;
             loadWordDisp(cUnit, rSP, offset, reg);
         }
     }
-    opRegImm(cUnit, kOpAdd, rSP, cUnit->frameSize);
-#endif
+}
+
+void opRegThreadMem(CompilationUnit* cUnit, OpKind op, int rDest, int threadOffset) {
+  X86OpCode opcode = kX86Bkpt;
+  switch (op) {
+    case kOpCmp: opcode = kX86Cmp32RT;  break;
+    default:
+      LOG(FATAL) << "Bad opcode: " << op;
+      break;
+  }
+  DCHECK((EncodingMap[opcode].flags & IS_BINARY_OP) != 0);
+  newLIR2(cUnit, opcode, rDest, threadOffset);
 }
 
 void genEntrySequence(CompilationUnit* cUnit, BasicBlock* bb)
 {
-    UNIMPLEMENTED(WARNING) << "genEntrySequence";
-#if 0
-    int spillCount = cUnit->numCoreSpills + cUnit->numFPSpills;
     /*
-     * On entry, rARG0, rARG1, rARG2 & rARG3 are live.  Let the register
+     * On entry, rARG0, rARG1, rARG2 are live.  Let the register
      * allocation mechanism know so it doesn't try to use any of them when
      * expanding the frame or flushing.  This leaves the utility
-     * code with a single temp: r12.  This should be enough.
+     * code with no spare temps.
      */
     oatLockTemp(cUnit, rARG0);
     oatLockTemp(cUnit, rARG1);
     oatLockTemp(cUnit, rARG2);
-    oatLockTemp(cUnit, rARG3);
+
+    /* Build frame, return address already on stack */
+    opRegImm(cUnit, kOpSub, rSP, cUnit->frameSize - 4);
 
     /*
      * We can safely skip the stack overflow check if we're
@@ -185,63 +188,54 @@
                               ((size_t)cUnit->frameSize <
                               Thread::kStackOverflowReservedBytes));
     newLIR0(cUnit, kPseudoMethodEntry);
-    int checkReg = oatAllocTemp(cUnit);
-    int newSP = oatAllocTemp(cUnit);
-    if (!skipOverflowCheck) {
-        /* Load stack limit */
-        loadWordDisp(cUnit, rSELF,
-                     Thread::StackEndOffset().Int32Value(), checkReg);
-    }
     /* Spill core callee saves */
     spillCoreRegs(cUnit);
     /* NOTE: promotion of FP regs currently unsupported, thus no FP spill */
     DCHECK_EQ(cUnit->numFPSpills, 0);
     if (!skipOverflowCheck) {
-        opRegRegImm(cUnit, kOpSub, newSP, rSP,
-                    cUnit->frameSize - (spillCount * 4));
-        genRegRegCheck(cUnit, kCondCc, newSP, checkReg, NULL,
-                       kThrowStackOverflow);
-        opRegCopy(cUnit, rSP, newSP);         // Establish stack
-    } else {
-        opRegImm(cUnit, kOpSub, rSP,
-                 cUnit->frameSize - (spillCount * 4));
+        // cmp rSP, fs:[stack_end_]; jcc throw_launchpad
+        LIR* tgt = rawLIR(cUnit, 0, kPseudoThrowTarget, kThrowStackOverflow, 0, 0, 0, 0);
+        opRegThreadMem(cUnit, kOpCmp, rSP, Thread::StackEndOffset().Int32Value());
+        opCondBranch(cUnit, kCondUlt, tgt);
+        // Remember branch target - will process later
+        oatInsertGrowableList(cUnit, &cUnit->throwLaunchpads, (intptr_t)tgt);
     }
+    /* Spill Method* */
     storeBaseDisp(cUnit, rSP, 0, rARG0, kWord);
     flushIns(cUnit);
 
     if (cUnit->genDebugger) {
         // Refresh update debugger callout
+        UNIMPLEMENTED(WARNING) << "genDebugger";
+#if 0
         loadWordDisp(cUnit, rSELF,
                      OFFSETOF_MEMBER(Thread, pUpdateDebuggerFromCode), rSUSPEND);
         genDebuggerUpdate(cUnit, DEBUGGER_METHOD_ENTRY);
+#endif
     }
 
     oatFreeTemp(cUnit, rARG0);
     oatFreeTemp(cUnit, rARG1);
     oatFreeTemp(cUnit, rARG2);
-    oatFreeTemp(cUnit, rARG3);
-#endif
 }
 
-void genExitSequence(CompilationUnit* cUnit, BasicBlock* bb)
-{
-    UNIMPLEMENTED(WARNING) << "genExitSequence";
-#if 0
-    /*
-     * In the exit path, rRET0/rRET1 are live - make sure they aren't
-     * allocated by the register utilities as temps.
-     */
-    oatLockTemp(cUnit, rRET0);
-    oatLockTemp(cUnit, rRET1);
+void genExitSequence(CompilationUnit* cUnit, BasicBlock* bb) {
+  /*
+   * In the exit path, rRET0/rRET1 are live - make sure they aren't
+   * allocated by the register utilities as temps.
+   */
+  oatLockTemp(cUnit, rRET0);
+  oatLockTemp(cUnit, rRET1);
 
-    newLIR0(cUnit, kPseudoMethodExit);
-    /* If we're compiling for the debugger, generate an update callout */
-    if (cUnit->genDebugger) {
-        genDebuggerUpdate(cUnit, DEBUGGER_METHOD_EXIT);
-    }
-    unSpillCoreRegs(cUnit);
-    opReg(cUnit, kOpBx, r_RA);
-#endif
+  newLIR0(cUnit, kPseudoMethodExit);
+  /* If we're compiling for the debugger, generate an update callout */
+  if (cUnit->genDebugger) {
+    genDebuggerUpdate(cUnit, DEBUGGER_METHOD_EXIT);
+  }
+  unSpillCoreRegs(cUnit);
+  /* Remove frame except for return address */
+  opRegImm(cUnit, kOpAdd, rSP, cUnit->frameSize - 4);
+  newLIR0(cUnit, kX86Ret);
 }
 
 /*
@@ -249,43 +243,39 @@
  * Note: new redundant branches may be inserted later, and we'll
  * use a check in final instruction assembly to nop those out.
  */
-void removeRedundantBranches(CompilationUnit* cUnit)
-{
-    UNIMPLEMENTED(WARNING) << "removeRedundantBranches";
-#if 0
-    LIR* thisLIR;
+void removeRedundantBranches(CompilationUnit* cUnit) {
+  LIR* thisLIR;
 
-    for (thisLIR = (LIR*) cUnit->firstLIRInsn;
-         thisLIR != (LIR*) cUnit->lastLIRInsn;
-         thisLIR = NEXT_LIR(thisLIR)) {
+  for (thisLIR = (LIR*) cUnit->firstLIRInsn;
+      thisLIR != (LIR*) cUnit->lastLIRInsn;
+      thisLIR = NEXT_LIR(thisLIR)) {
 
-        /* Branch to the next instruction */
-        if (thisLIR->opcode == kX86B) {
-            LIR* nextLIR = thisLIR;
+    /* Branch to the next instruction */
+    if (thisLIR->opcode == kX86Jmp) {
+      LIR* nextLIR = thisLIR;
 
-            while (true) {
-                nextLIR = NEXT_LIR(nextLIR);
+      while (true) {
+        nextLIR = NEXT_LIR(nextLIR);
 
-                /*
-                 * Is the branch target the next instruction?
-                 */
-                if (nextLIR == (LIR*) thisLIR->target) {
-                    thisLIR->flags.isNop = true;
-                    break;
-                }
-
-                /*
-                 * Found real useful stuff between the branch and the target.
-                 * Need to explicitly check the lastLIRInsn here because it
-                 * might be the last real instruction.
-                 */
-                if (!isPseudoOpcode(nextLIR->opcode) ||
-                    (nextLIR = (LIR*) cUnit->lastLIRInsn))
-                    break;
-            }
+        /*
+         * Is the branch target the next instruction?
+         */
+        if (nextLIR == (LIR*) thisLIR->target) {
+          thisLIR->flags.isNop = true;
+          break;
         }
+
+        /*
+         * Found real useful stuff between the branch and the target.
+         * Need to explicitly check the lastLIRInsn here because it
+         * might be the last real instruction.
+         */
+        if (!isPseudoOpcode(nextLIR->opcode) ||
+            (nextLIR = (LIR*) cUnit->lastLIRInsn))
+          break;
+      }
     }
-#endif
+  }
 }
 
 
diff --git a/src/compiler/codegen/x86/ArchUtility.cc b/src/compiler/codegen/x86/ArchUtility.cc
index 23cee14..9830e13 100644
--- a/src/compiler/codegen/x86/ArchUtility.cc
+++ b/src/compiler/codegen/x86/ArchUtility.cc
@@ -23,110 +23,80 @@
 namespace art {
 
 /* For dumping instructions */
-#define X86_REG_COUNT 16
-static const char *x86RegName[X86_REG_COUNT] = {
+static const char* x86RegName[] = {
     "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
     "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
 };
 
+static const char* x86CondName[] = {
+    "O",
+    "NO",
+    "B/NAE/C",
+    "NB/AE/NC",
+    "Z/EQ",
+    "NZ/NE",
+    "BE/NA",
+    "NBE/A",
+    "S",
+    "NS",
+    "P/PE",
+    "NP/PO",
+    "L/NGE",
+    "NL/GE",
+    "LE/NG",
+    "NLE/G"
+};
+
 /*
  * Interpret a format string and build a string no longer than size
  * See format key in Assemble.c.
  */
-std::string buildInsnString(const char *fmt, LIR *lir, unsigned char* baseAddr)
-{
-    std::string buf;
-    int i;
-    const char *fmtEnd = &fmt[strlen(fmt)];
-    char tbuf[256];
-    char nc;
-    while (fmt < fmtEnd) {
-        int operand;
-        if (*fmt == '!') {
-            fmt++;
-            DCHECK_LT(fmt, fmtEnd);
-            nc = *fmt++;
-            if (nc=='!') {
-                strcpy(tbuf, "!");
+std::string buildInsnString(const char *fmt, LIR *lir, unsigned char* baseAddr) {
+  std::string buf;
+  size_t i = 0;
+  size_t fmt_len = strlen(fmt);
+  while(i < fmt_len) {
+    if (fmt[i] != '!') {
+      buf += fmt[i];
+      i++;
+    } else {
+      i++;
+      DCHECK_LT(i, fmt_len);
+      char operand_number_ch = fmt[i];
+      i++;
+      if (operand_number_ch == '!') {
+        buf += "!";
+      } else {
+        int operand_number = operand_number_ch - '0';
+        DCHECK_LT(operand_number, 6);  // Expect upto 6 LIR operands.
+        DCHECK_LT(i, fmt_len);
+        int operand = lir->operands[operand_number];
+        switch(fmt[i]) {
+          case 'd':
+            buf += StringPrintf("%d", operand);
+            break;
+          case 'r':
+            if (FPREG(operand) || DOUBLEREG(operand)) {
+              int fp_reg = operand & FP_REG_MASK;
+              buf += StringPrintf("xmm%d", fp_reg);
             } else {
-               DCHECK_LT(fmt, fmtEnd);
-               DCHECK_LT((unsigned)(nc-'0'), 4u);
-               operand = lir->operands[nc-'0'];
-               switch(*fmt++) {
-                   case 'b':
-                       strcpy(tbuf,"0000");
-                       for (i=3; i>= 0; i--) {
-                           tbuf[i] += operand & 1;
-                           operand >>= 1;
-                       }
-                       break;
-                   case 's':
-                       sprintf(tbuf,"$f%d",operand & FP_REG_MASK);
-                       break;
-                   case 'S':
-                       DCHECK_EQ(((operand & FP_REG_MASK) & 1), 0);
-                       sprintf(tbuf,"$f%d",operand & FP_REG_MASK);
-                       break;
-                   case 'h':
-                       sprintf(tbuf,"%04x", operand);
-                       break;
-                   case 'M':
-                   case 'd':
-                       sprintf(tbuf,"%d", operand);
-                       break;
-                   case 'D':
-                       sprintf(tbuf,"%d", operand+1);
-                       break;
-                   case 'E':
-                       sprintf(tbuf,"%d", operand*4);
-                       break;
-                   case 'F':
-                       sprintf(tbuf,"%d", operand*2);
-                       break;
-                   case 't':
-                       sprintf(tbuf,"0x%08x (L%p)",
-                               (int) baseAddr + lir->offset + 4 +
-                               (operand << 2),
-                               lir->target);
-                       break;
-                   case 'T':
-                       sprintf(tbuf,"0x%08x",
-                               (int) (operand << 2));
-                       break;
-                   case 'u': {
-                       int offset_1 = lir->operands[0];
-                       int offset_2 = NEXT_LIR(lir)->operands[0];
-                       intptr_t target =
-                           ((((intptr_t) baseAddr + lir->offset + 4) &
-                            ~3) + (offset_1 << 21 >> 9) + (offset_2 << 1)) &
-                           0xfffffffc;
-                       sprintf(tbuf, "%p", (void *) target);
-                       break;
-                    }
-
-                   /* Nothing to print for BLX_2 */
-                   case 'v':
-                       strcpy(tbuf, "see above");
-                       break;
-                   case 'r':
-                       DCHECK(operand >= 0 && operand < X86_REG_COUNT);
-                       strcpy(tbuf, x86RegName[operand]);
-                       break;
-                   case 'N':
-                       // Placeholder for delay slot handling
-                       strcpy(tbuf, ";    nop");
-                       break;
-                   default:
-                       strcpy(tbuf,"DecodeError");
-                       break;
-               }
-               buf += tbuf;
+              DCHECK_LT(static_cast<size_t>(operand), sizeof(x86RegName));
+              buf += x86RegName[operand];
             }
-        } else {
-           buf += *fmt++;
+            break;
+          case 'c':
+            DCHECK_LT(static_cast<size_t>(operand), sizeof(x86CondName));
+            buf += x86CondName[operand];
+            break;
+          default:
+            buf += StringPrintf("DecodeError '%c'", fmt[i]);
+            break;
         }
+        i++;
+      }
     }
-    return buf;
+  }
+  return buf;
 }
 
 void oatDumpResourceMask(LIR *lir, u8 mask, const char *prefix)
diff --git a/src/compiler/codegen/x86/Assemble.cc b/src/compiler/codegen/x86/Assemble.cc
index 8223db0..cb06776 100644
--- a/src/compiler/codegen/x86/Assemble.cc
+++ b/src/compiler/codegen/x86/Assemble.cc
@@ -24,187 +24,636 @@
 
 #define MAX_ASSEMBLER_RETRIES 50
 
-#define BINARY_ENCODING_MAP(opcode, \
-                            rm8_r8, rm32_r32, \
-                            r8_rm8, r32_rm32, \
-                            rax8_i8, rax32_i32, \
-                            rm8_i8_opcode, rm8_i8_modrm_opcode, \
-                            rm32_i32_opcode, rm32_i32_modrm_opcode, \
-                            rm32_i8_opcode, rm32_i8_modrm_opcode) \
-{ kOp ## opcode ## RI, \
-  kRegImm, \
-  0, \
-  { RegMem_Immediate: { rax8_i8, rax32_i32, \
-                       {rm8_i8_opcode,   rm8_i8_modrm_opcode}, \
-                       {rm32_i32_opcode, rm32_i32_modrm_opcode}, \
-                       {rm32_i8_opcode,  rm32_i8_modrm_opcode} } }, \
-  #opcode "RI", "" \
-}, \
-{ kOp ## opcode ## MI, \
-  kMemImm, \
-  0, \
-  { RegMem_Immediate: { rax8_i8, rax32_i32, \
-                       {rm8_i8_opcode, rm8_i8_modrm_opcode}, \
-                       {rm32_i32_opcode, rm32_i32_modrm_opcode}, \
-                       {rm32_i8_opcode, rm32_i8_modrm_opcode} } }, \
-  #opcode "MI", "" \
-}, \
-{ kOp ## opcode ## AI, \
-  kArrayImm, \
-  0, \
-  { RegMem_Immediate: { rax8_i8, rax32_i32, \
-                       {rm8_i8_opcode, rm8_i8_modrm_opcode}, \
-                       {rm32_i32_opcode, rm32_i32_modrm_opcode}, \
-                       {rm32_i8_opcode, rm32_i8_modrm_opcode} } }, \
-  #opcode "AI", "" \
-}, \
-{ kOp ## opcode ## RR, \
-  kRegReg, \
-  0, \
-  { Reg_RegMem: {r8_rm8, r32_rm32} }, \
-  #opcode "RR", "" \
-}, \
-{ kOp ## opcode ## RM, \
-  kRegMem, \
-  0, \
-  { Reg_RegMem: {r8_rm8, r32_rm32} }, \
-  #opcode "RM", "" \
-}, \
-{ kOp ## opcode ## RA, \
-  kRegArray, \
-  0, \
-  { Reg_RegMem: {r8_rm8, r32_rm32} }, \
-  #opcode "RA", "" \
-}, \
-{ kOp ## opcode ## MR, \
-  kMemReg, \
-  0, \
-  { RegMem_Reg: {rm8_r8, rm32_r32} }, \
-  #opcode "MR", "" \
-}, \
-{ kOp ## opcode ## AR, \
-  kArrayReg, \
-  0, \
-  { RegMem_Reg: {rm8_r8, rm32_r32} }, \
-  #opcode "AR", "" \
-}
-
 X86EncodingMap EncodingMap[kX86Last] = {
-  { kX8632BitData, kData, 0 /* flags - TODO */, { unused: 0 }, "data", "" },
-BINARY_ENCODING_MAP(Add,
+  { kX8632BitData, kData,    IS_UNARY_OP,            { 0, 0, 0x00, 0, 0, 0, 0, 4 }, "data",  "0x!0d" },
+  { kX86Bkpt,      kNullary, NO_OPERAND | IS_BRANCH, { 0, 0, 0xCC, 0, 0, 0, 0, 4 }, "int 3", "" },
+  { kX86Nop,       kNop,     IS_UNARY_OP,            { 0, 0, 0x90, 0, 0, 0, 0, 0 }, "nop",   "" },
+
+#define ENCODING_MAP(opname, \
+                     rm8_r8, rm32_r32, \
+                     r8_rm8, r32_rm32, \
+                     ax8_i8, ax32_i32, \
+                     rm8_i8, rm8_i8_modrm, \
+                     rm32_i32, rm32_i32_modrm, \
+                     rm32_i8, rm32_i8_modrm) \
+{ kX86 ## opname ## 8MR, kMemReg,   IS_STORE | IS_TERTIARY_OP | SETS_CCODES, { 0, 0,             rm8_r8, 0, 0, 0,            0,      0 }, #opname "8MR", "[!0r+!1d],!2r" }, \
+{ kX86 ## opname ## 8AR, kArrayReg, IS_STORE | IS_QUIN_OP     | SETS_CCODES, { 0, 0,             rm8_r8, 0, 0, 0,            0,      0 }, #opname "8AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
+{ kX86 ## opname ## 8TR, kThreadReg,IS_STORE | IS_BINARY_OP   | SETS_CCODES, { THREAD_PREFIX, 0, rm8_r8, 0, 0, 0,            0,      0 }, #opname "8TR", "fs:[!0d],!1r" }, \
+{ kX86 ## opname ## 8RR, kRegReg,              IS_BINARY_OP   | SETS_CCODES, { 0, 0,             r8_rm8, 0, 0, 0,            0,      0 }, #opname "8RR", "!0r,!1r" }, \
+{ kX86 ## opname ## 8RM, kRegMem,    IS_LOAD | IS_TERTIARY_OP | SETS_CCODES, { 0, 0,             r8_rm8, 0, 0, 0,            0,      0 }, #opname "8RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## 8RA, kRegArray,  IS_LOAD | IS_QUIN_OP     | SETS_CCODES, { 0, 0,             r8_rm8, 0, 0, 0,            0,      0 }, #opname "8RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
+{ kX86 ## opname ## 8RT, kRegThread, IS_LOAD | IS_BINARY_OP   | SETS_CCODES, { THREAD_PREFIX, 0, r8_rm8, 0, 0, 0,            0,      0 }, #opname "8RT", "!0r,fs:[!1d]" }, \
+{ kX86 ## opname ## 8RI, kRegImm,              IS_BINARY_OP   | SETS_CCODES, { 0, 0,             rm8_i8, 0, 0, rm8_i8_modrm, ax8_i8, 1 }, #opname "8RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 8MI, kMemImm,   IS_STORE | IS_TERTIARY_OP | SETS_CCODES, { 0, 0,             rm8_i8, 0, 0, rm8_i8_modrm, 0,      1 }, #opname "8MI", "[!0r+!1d],!2r" }, \
+{ kX86 ## opname ## 8AI, kArrayImm, IS_STORE | IS_QUIN_OP     | SETS_CCODES, { 0, 0,             rm8_i8, 0, 0, rm8_i8_modrm, 0,      1 }, #opname "8AI", "[!0r+!1r<<!2d+!3d],!4r" }, \
+{ kX86 ## opname ## 8TI, kThreadImm,IS_STORE | IS_BINARY_OP   | SETS_CCODES, { THREAD_PREFIX, 0, rm8_i8, 0, 0, rm8_i8_modrm, 0,      1 }, #opname "8TI", "fs:[!0d],!1r" }, \
+  \
+{ kX86 ## opname ## 16MR,  kMemReg,   IS_STORE | IS_TERTIARY_OP | SETS_CCODES, { 0x66, 0,             rm32_r32, 0, 0, 0,              0,        0 }, #opname "16MR", "[!0r+!1d],!2r" }, \
+{ kX86 ## opname ## 16AR,  kArrayReg, IS_STORE | IS_QUIN_OP     | SETS_CCODES, { 0x66, 0,             rm32_r32, 0, 0, 0,              0,        0 }, #opname "16AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
+{ kX86 ## opname ## 16TR,  kThreadReg,IS_STORE | IS_BINARY_OP   | SETS_CCODES, { THREAD_PREFIX, 0x66, rm32_r32, 0, 0, 0,              0,        0 }, #opname "16TR", "fs:[!0d],!1r" }, \
+{ kX86 ## opname ## 16RR,  kRegReg,              IS_BINARY_OP   | SETS_CCODES, { 0x66, 0,             r32_rm32, 0, 0, 0,              0,        0 }, #opname "16RR", "!0r,!1r" }, \
+{ kX86 ## opname ## 16RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | SETS_CCODES, { 0x66, 0,             r32_rm32, 0, 0, 0,              0,        0 }, #opname "16RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## 16RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | SETS_CCODES, { 0x66, 0,             r32_rm32, 0, 0, 0,              0,        0 }, #opname "16RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
+{ kX86 ## opname ## 16RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | SETS_CCODES, { THREAD_PREFIX, 0x66, r32_rm32, 0, 0, 0,              0,        0 }, #opname "16RT", "!0r,fs:[!1d]" }, \
+{ kX86 ## opname ## 16RI,  kRegImm,              IS_BINARY_OP   | SETS_CCODES, { 0x66, 0,             rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 2 }, #opname "16RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 16MI,  kMemImm,   IS_STORE | IS_TERTIARY_OP | SETS_CCODES, { 0x66, 0,             rm32_i32, 0, 0, rm32_i32_modrm, 0,        2 }, #opname "16MI", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 16AI,  kArrayImm, IS_STORE | IS_QUIN_OP     | SETS_CCODES, { 0x66, 0,             rm32_i32, 0, 0, rm32_i32_modrm, 0,        2 }, #opname "16AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 16TI,  kThreadImm,IS_STORE | IS_BINARY_OP   | SETS_CCODES, { THREAD_PREFIX, 0x66, rm32_i32, 0, 0, rm32_i32_modrm, 0,        2 }, #opname "16TI", "fs:[!0d],!1d" }, \
+{ kX86 ## opname ## 16RI8, kRegImm,              IS_BINARY_OP   | SETS_CCODES, { 0x66, 0,             rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "16RI8", "!0r,!1d" }, \
+{ kX86 ## opname ## 16MI8, kMemImm,   IS_STORE | IS_TERTIARY_OP | SETS_CCODES, { 0x66, 0,             rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "16MI8", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 16AI8, kArrayImm, IS_STORE | IS_QUIN_OP     | SETS_CCODES, { 0x66, 0,             rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "16AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 16TI8, kThreadImm,IS_STORE | IS_BINARY_OP   | SETS_CCODES, { THREAD_PREFIX, 0x66, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "16TI8", "fs:[!0d],!1d" }, \
+  \
+{ kX86 ## opname ## 32MR,  kMemReg,   IS_STORE | IS_TERTIARY_OP | SETS_CCODES, { 0, 0,             rm32_r32, 0, 0, 0,              0,        0 }, #opname "32MR", "[!0r+!1d],!2r" }, \
+{ kX86 ## opname ## 32AR,  kArrayReg, IS_STORE | IS_QUIN_OP     | SETS_CCODES, { 0, 0,             rm32_r32, 0, 0, 0,              0,        0 }, #opname "32AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
+{ kX86 ## opname ## 32TR,  kThreadReg,IS_STORE | IS_BINARY_OP   | SETS_CCODES, { THREAD_PREFIX, 0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "32TR", "fs:[!0d],!1r" }, \
+{ kX86 ## opname ## 32RR,  kRegReg,              IS_BINARY_OP   | SETS_CCODES, { 0, 0,             r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RR", "!0r,!1r" }, \
+{ kX86 ## opname ## 32RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | SETS_CCODES, { 0, 0,             r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## 32RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | SETS_CCODES, { 0, 0,             r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
+{ kX86 ## opname ## 32RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | SETS_CCODES, { THREAD_PREFIX, 0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RT", "!0r,fs:[!1d]" }, \
+{ kX86 ## opname ## 32RI,  kRegImm,              IS_BINARY_OP   | SETS_CCODES, { 0, 0,             rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4 }, #opname "32RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 32MI,  kMemImm,   IS_STORE | IS_TERTIARY_OP | SETS_CCODES, { 0, 0,             rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "32MI", "[!0r+!1d],!2r" }, \
+{ kX86 ## opname ## 32AI,  kArrayImm, IS_STORE | IS_QUIN_OP     | SETS_CCODES, { 0, 0,             rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 32TI,  kThreadImm,IS_STORE | IS_BINARY_OP   | SETS_CCODES, { THREAD_PREFIX, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "32TI", "fs:[!0d],!1d" }, \
+{ kX86 ## opname ## 32RI8, kRegImm,              IS_BINARY_OP   | SETS_CCODES, { 0, 0,             rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32RI8", "!0r,!1d" }, \
+{ kX86 ## opname ## 32MI8, kMemImm,   IS_STORE | IS_TERTIARY_OP | SETS_CCODES, { 0, 0,             rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32MI8", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 32AI8, kArrayImm, IS_STORE | IS_QUIN_OP     | SETS_CCODES, { 0, 0,             rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 32TI8, kThreadImm,IS_STORE | IS_BINARY_OP   | SETS_CCODES, { THREAD_PREFIX, 0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32TI8", "fs:[!0d],!1d" }
+
+ENCODING_MAP(Add,
   0x00 /* RegMem8/Reg8 */,     0x01 /* RegMem32/Reg32 */,
   0x02 /* Reg8/RegMem8 */,     0x03 /* Reg32/RegMem32 */,
   0x04 /* Rax8/imm8 opcode */, 0x05 /* Rax32/imm32 */,
   0x80, 0x0 /* RegMem8/imm8 */,
   0x81, 0x0 /* RegMem32/imm32 */, 0x83, 0x0 /* RegMem32/imm8 */),
-BINARY_ENCODING_MAP(Or,
+ENCODING_MAP(Or,
   0x08 /* RegMem8/Reg8 */,     0x09 /* RegMem32/Reg32 */,
   0x0A /* Reg8/RegMem8 */,     0x0B /* Reg32/RegMem32 */,
   0x0C /* Rax8/imm8 opcode */, 0x0D /* Rax32/imm32 */,
   0x80, 0x1 /* RegMem8/imm8 */,
   0x81, 0x1 /* RegMem32/imm32 */, 0x83, 0x1 /* RegMem32/imm8 */),
-BINARY_ENCODING_MAP(Adc,
+ENCODING_MAP(Adc,
   0x10 /* RegMem8/Reg8 */,     0x11 /* RegMem32/Reg32 */,
   0x12 /* Reg8/RegMem8 */,     0x13 /* Reg32/RegMem32 */,
   0x14 /* Rax8/imm8 opcode */, 0x15 /* Rax32/imm32 */,
   0x80, 0x2 /* RegMem8/imm8 */,
   0x81, 0x2 /* RegMem32/imm32 */, 0x83, 0x2 /* RegMem32/imm8 */),
-BINARY_ENCODING_MAP(Sbb,
+ENCODING_MAP(Sbb,
   0x18 /* RegMem8/Reg8 */,     0x19 /* RegMem32/Reg32 */,
   0x1A /* Reg8/RegMem8 */,     0x1B /* Reg32/RegMem32 */,
   0x1C /* Rax8/imm8 opcode */, 0x1D /* Rax32/imm32 */,
   0x80, 0x3 /* RegMem8/imm8 */,
   0x81, 0x3 /* RegMem32/imm32 */, 0x83, 0x3 /* RegMem32/imm8 */),
-BINARY_ENCODING_MAP(And,
+ENCODING_MAP(And,
   0x20 /* RegMem8/Reg8 */,     0x21 /* RegMem32/Reg32 */,
   0x22 /* Reg8/RegMem8 */,     0x23 /* Reg32/RegMem32 */,
   0x24 /* Rax8/imm8 opcode */, 0x25 /* Rax32/imm32 */,
   0x80, 0x4 /* RegMem8/imm8 */,
   0x81, 0x4 /* RegMem32/imm32 */, 0x83, 0x4 /* RegMem32/imm8 */),
-BINARY_ENCODING_MAP(Sub,
+ENCODING_MAP(Sub,
   0x28 /* RegMem8/Reg8 */,     0x29 /* RegMem32/Reg32 */,
   0x2A /* Reg8/RegMem8 */,     0x2B /* Reg32/RegMem32 */,
   0x2C /* Rax8/imm8 opcode */, 0x2D /* Rax32/imm32 */,
   0x80, 0x5 /* RegMem8/imm8 */,
   0x81, 0x5 /* RegMem32/imm32 */, 0x83, 0x5 /* RegMem32/imm8 */),
-BINARY_ENCODING_MAP(Xor,
+ENCODING_MAP(Xor,
   0x30 /* RegMem8/Reg8 */,     0x31 /* RegMem32/Reg32 */,
   0x32 /* Reg8/RegMem8 */,     0x33 /* Reg32/RegMem32 */,
   0x34 /* Rax8/imm8 opcode */, 0x35 /* Rax32/imm32 */,
   0x80, 0x6 /* RegMem8/imm8 */,
   0x81, 0x6 /* RegMem32/imm32 */, 0x83, 0x6 /* RegMem32/imm8 */),
-BINARY_ENCODING_MAP(Cmp,
+ENCODING_MAP(Cmp,
   0x38 /* RegMem8/Reg8 */,     0x39 /* RegMem32/Reg32 */,
   0x3A /* Reg8/RegMem8 */,     0x3B /* Reg32/RegMem32 */,
   0x3C /* Rax8/imm8 opcode */, 0x3D /* Rax32/imm32 */,
   0x80, 0x7 /* RegMem8/imm8 */,
   0x81, 0x7 /* RegMem32/imm32 */, 0x83, 0x7 /* RegMem32/imm8 */),
-  { kOpMovRI, kUnimplemented, 0 /* flags - TODO */ , { unused: 0 }, "MovRI", "" },
-  { kOpMovMI, kUnimplemented, 0 /* flags - TODO */ , { unused: 0 }, "MovMI", "" },
-  { kOpMovAI, kUnimplemented, 0 /* flags - TODO */ , { unused: 0 }, "MovAI", "" },
-  { kOpMovRR, kRegReg,   0 /* flags - TODO */, { Reg_RegMem: {0x8A, 0x8B} }, "MovRR", "" },
-  { kOpMovRM, kRegMem,   0 /* flags - TODO */, { Reg_RegMem: {0x8A, 0x8B} }, "MovRM", "" },
-  { kOpMovRA, kRegArray, 0 /* flags - TODO */, { Reg_RegMem: {0x8A, 0x8B} }, "MovRA", "" },
-  { kOpMovMR, kMemReg,   0 /* flags - TODO */, { RegMem_Reg: {0x88, 0x89} }, "MovMR", "" },
-  { kOpMovAR, kArrayReg, 0 /* flags - TODO */, { RegMem_Reg: {0x88, 0x89} }, "MovAR", "" }
+#undef ENCODING_MAP
+
+  { kX86Imul16RRI,   kRegRegImm,             IS_TERTIARY_OP | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2 }, "Imul16RRI", "" },
+  { kX86Imul16RMI,   kRegMemImm,   IS_LOAD | IS_QUAD_OP     | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2 }, "Imul16RMI", "" },
+  { kX86Imul16RAI,   kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2 }, "Imul16RAI", "" },
+
+  { kX86Imul32RRI,   kRegRegImm,             IS_TERTIARY_OP | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 2 }, "Imul32RRI", "" },
+  { kX86Imul32RMI,   kRegMemImm,   IS_LOAD | IS_QUAD_OP     | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 2 }, "Imul32RMI", "" },
+  { kX86Imul32RAI,   kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 2 }, "Imul32RAI", "" },
+  { kX86Imul32RRI8,  kRegRegImm,             IS_TERTIARY_OP | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul32RRI8", "" },
+  { kX86Imul32RMI8,  kRegMemImm,   IS_LOAD | IS_QUAD_OP     | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul32RMI8", "" },
+  { kX86Imul32RAI8,  kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul32RAI8", "" },
+
+  { kX86Mov8MR, kMemReg,    IS_STORE | IS_TERTIARY_OP, { 0, 0,             0x88, 0, 0, 0, 0, 0 }, "Mov8MR", "[!0r+!1d],!2r" },
+  { kX86Mov8AR, kArrayReg,  IS_STORE | IS_QUIN_OP,     { 0, 0,             0x88, 0, 0, 0, 0, 0 }, "Mov8AR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86Mov8TR, kThreadReg, IS_STORE | IS_BINARY_OP,   { THREAD_PREFIX, 0, 0x88, 0, 0, 0, 0, 0 }, "Mov8TR", "fs:[!0d],!1r" },
+  { kX86Mov8RR, kRegReg,               IS_BINARY_OP,   { 0, 0,             0x8A, 0, 0, 0, 0, 0 }, "Mov8RR", "!0r,!1r" },
+  { kX86Mov8RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP, { 0, 0,             0x8A, 0, 0, 0, 0, 0 }, "Mov8RM", "!0r,[!1r+!2d]" },
+  { kX86Mov8RA, kRegArray,  IS_LOAD  | IS_QUIN_OP,     { 0, 0,             0x8A, 0, 0, 0, 0, 0 }, "Mov8RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86Mov8RT, kRegThread, IS_LOAD  | IS_BINARY_OP,   { THREAD_PREFIX, 0, 0x8A, 0, 0, 0, 0, 0 }, "Mov8RT", "!0r,fs:[!1d]" },
+  { kX86Mov8RI, kMovRegImm,            IS_BINARY_OP,   { 0, 0,             0xB0, 0, 0, 0, 0, 1 }, "Mov8RI", "!0r,!1d" },
+  { kX86Mov8MI, kMemImm,    IS_STORE | IS_TERTIARY_OP, { 0, 0,             0xC6, 0, 0, 0, 0, 1 }, "Mov8MI", "[!0r+!1d],!2r" },
+  { kX86Mov8AI, kArrayImm,  IS_STORE | IS_QUIN_OP,     { 0, 0,             0xC6, 0, 0, 0, 0, 1 }, "Mov8AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Mov8TI, kThreadImm, IS_STORE | IS_BINARY_OP,   { THREAD_PREFIX, 0, 0xC6, 0, 0, 0, 0, 1 }, "Mov8TI", "fs:[!0d],!1d" },
+
+  { kX86Mov16MR, kMemReg,    IS_STORE | IS_TERTIARY_OP, { 0x66, 0,             0x89, 0, 0, 0, 0, 0 }, "Mov16MR", "[!0r+!1d],!2r" },
+  { kX86Mov16AR, kArrayReg,  IS_STORE | IS_QUIN_OP,     { 0x66, 0,             0x89, 0, 0, 0, 0, 0 }, "Mov16AR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86Mov16TR, kThreadReg, IS_STORE | IS_BINARY_OP,   { THREAD_PREFIX, 0x66, 0x89, 0, 0, 0, 0, 0 }, "Mov16TR", "fs:[!0d],!1r" },
+  { kX86Mov16RR, kRegReg,               IS_BINARY_OP,   { 0x66, 0,             0x8B, 0, 0, 0, 0, 0 }, "Mov16RR", "!0r,!1r" },
+  { kX86Mov16RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP, { 0x66, 0,             0x8B, 0, 0, 0, 0, 0 }, "Mov16RM", "!0r,[!1r+!2d]" },
+  { kX86Mov16RA, kRegArray,  IS_LOAD  | IS_QUIN_OP,     { 0x66, 0,             0x8B, 0, 0, 0, 0, 0 }, "Mov16RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86Mov16RT, kRegThread, IS_LOAD  | IS_BINARY_OP,   { THREAD_PREFIX, 0x66, 0x8B, 0, 0, 0, 0, 0 }, "Mov16RT", "!0r,fs:[!1d]" },
+  { kX86Mov16RI, kMovRegImm,            IS_BINARY_OP,   { 0x66, 0,             0xB8, 0, 0, 0, 0, 2 }, "Mov16RI", "!0r,!1d" },
+  { kX86Mov16MI, kMemImm,    IS_STORE | IS_TERTIARY_OP, { 0x66, 0,             0xC7, 0, 0, 0, 0, 2 }, "Mov16MI", "[!0r+!1d],!2r" },
+  { kX86Mov16AI, kArrayImm,  IS_STORE | IS_QUIN_OP,     { 0x66, 0,             0xC7, 0, 0, 0, 0, 2 }, "Mov16AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Mov16TI, kThreadImm, IS_STORE | IS_BINARY_OP,   { THREAD_PREFIX, 0x66, 0xC7, 0, 0, 0, 0, 2 }, "Mov16TI", "fs:[!0d],!1d" },
+
+  { kX86Mov32MR, kMemReg,    IS_STORE | IS_TERTIARY_OP, { 0, 0,             0x89, 0, 0, 0, 0, 0 }, "Mov32MR", "[!0r+!1d],!2r" },
+  { kX86Mov32AR, kArrayReg,  IS_STORE | IS_QUIN_OP,     { 0, 0,             0x89, 0, 0, 0, 0, 0 }, "Mov32AR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86Mov32TR, kThreadReg, IS_STORE | IS_BINARY_OP,   { THREAD_PREFIX, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov32TR", "fs:[!0d],!1r" },
+  { kX86Mov32RR, kRegReg,               IS_BINARY_OP,   { 0, 0,             0x8B, 0, 0, 0, 0, 0 }, "Mov32RR", "!0r,!1r" },
+  { kX86Mov32RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP, { 0, 0,             0x8B, 0, 0, 0, 0, 0 }, "Mov32RM", "!0r,[!1r+!2d]" },
+  { kX86Mov32RA, kRegArray,  IS_LOAD  | IS_QUIN_OP,     { 0, 0,             0x8B, 0, 0, 0, 0, 0 }, "Mov32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86Mov32RT, kRegThread, IS_LOAD  | IS_BINARY_OP,   { THREAD_PREFIX, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RT", "!0r,fs:[!1d]" },
+  { kX86Mov32RI, kMovRegImm,            IS_BINARY_OP,   { 0, 0,             0xB8, 0, 0, 0, 0, 4 }, "Mov32RI", "!0r,!1d" },
+  { kX86Mov32MI, kMemImm,    IS_STORE | IS_TERTIARY_OP, { 0, 0,             0xC7, 0, 0, 0, 0, 4 }, "Mov32MI", "[!0r+!1d],!2r" },
+  { kX86Mov32AI, kArrayImm,  IS_STORE | IS_QUIN_OP,     { 0, 0,             0xC7, 0, 0, 0, 0, 4 }, "Mov32AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Mov32TI, kThreadImm, IS_STORE | IS_BINARY_OP,   { THREAD_PREFIX, 0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32TI", "fs:[!0d],!1d" },
+
+  { kX86Lea32RA, kRegArray, IS_QUIN_OP, { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+
+#define SHIFT_ENCODING_MAP(opname, modrm_opcode) \
+{ kX86 ## opname ## 8RI, kShiftRegImm,   IS_BINARY_OP   | SETS_CCODES, { 0, 0,    0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 8MI, kShiftMemImm,   IS_TERTIARY_OP | SETS_CCODES, { 0, 0,    0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8MI", "[!0r+!1d],!2r" }, \
+{ kX86 ## opname ## 8AI, kShiftArrayImm, IS_QUIN_OP     | SETS_CCODES, { 0, 0,    0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 8RC, kShiftRegCl,    IS_BINARY_OP   | SETS_CCODES, { 0, 0,    0xD2, 0, 0, modrm_opcode, 0,    1 }, #opname "8RC", "" }, \
+{ kX86 ## opname ## 8MC, kShiftMemCl,    IS_TERTIARY_OP | SETS_CCODES, { 0, 0,    0xD2, 0, 0, modrm_opcode, 0,    1 }, #opname "8MC", "" }, \
+{ kX86 ## opname ## 8AC, kShiftArrayCl,  IS_QUIN_OP     | SETS_CCODES, { 0, 0,    0xD2, 0, 0, modrm_opcode, 0,    1 }, #opname "8AC", "" }, \
+  \
+{ kX86 ## opname ## 16RI, kShiftRegImm,   IS_BINARY_OP   | SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "16RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 16MI, kShiftMemImm,   IS_TERTIARY_OP | SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "16MI", "[!0r+!1d],!2r" }, \
+{ kX86 ## opname ## 16AI, kShiftArrayImm, IS_QUIN_OP     | SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "16AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 16RC, kShiftRegCl,    IS_BINARY_OP   | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0,    1 }, #opname "16RC", "" }, \
+{ kX86 ## opname ## 16MC, kShiftMemCl,    IS_TERTIARY_OP | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0,    1 }, #opname "16MC", "" }, \
+{ kX86 ## opname ## 16AC, kShiftArrayCl,  IS_QUIN_OP     | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0,    1 }, #opname "16AC", "" }, \
+  \
+{ kX86 ## opname ## 32RI, kShiftRegImm,   IS_BINARY_OP   | SETS_CCODES, { 0, 0,    0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "32RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 32MI, kShiftMemImm,   IS_TERTIARY_OP | SETS_CCODES, { 0, 0,    0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "32MI", "[!0r+!1d],!2r" }, \
+{ kX86 ## opname ## 32AI, kShiftArrayImm, IS_QUIN_OP     | SETS_CCODES, { 0, 0,    0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 32RC, kShiftRegCl,    IS_BINARY_OP   | SETS_CCODES, { 0, 0,    0xD3, 0, 0, modrm_opcode, 0,    1 }, #opname "32RC", "" }, \
+{ kX86 ## opname ## 32MC, kShiftMemCl,    IS_TERTIARY_OP | SETS_CCODES, { 0, 0,    0xD3, 0, 0, modrm_opcode, 0,    1 }, #opname "32MC", "" }, \
+{ kX86 ## opname ## 32AC, kShiftArrayCl,  IS_QUIN_OP     | SETS_CCODES, { 0, 0,    0xD3, 0, 0, modrm_opcode, 0,    1 }, #opname "32AC", "" }
+
+  SHIFT_ENCODING_MAP(Rol, 0x0),
+  SHIFT_ENCODING_MAP(Ror, 0x1),
+  SHIFT_ENCODING_MAP(Rcl, 0x2),
+  SHIFT_ENCODING_MAP(Rcr, 0x3),
+  SHIFT_ENCODING_MAP(Sal, 0x4),
+  SHIFT_ENCODING_MAP(Shl, 0x5),
+  SHIFT_ENCODING_MAP(Shr, 0x6),
+  SHIFT_ENCODING_MAP(Sar, 0x7),
+#undef SHIFT_ENCODING_MAP
+
+#define UNARY_ENCODING_MAP(opname, modrm, \
+                           reg, reg_kind, reg_flags, \
+                           mem, mem_kind, mem_flags, \
+                           arr, arr_kind, arr_flags, imm) \
+{ kX86 ## opname ## 8 ## reg,  reg_kind,           reg_flags, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #reg, "" }, \
+{ kX86 ## opname ## 8 ## mem,  mem_kind, IS_LOAD | mem_flags, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #mem, "" }, \
+{ kX86 ## opname ## 8 ## arr,  arr_kind, IS_LOAD | arr_flags, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #arr, "" }, \
+{ kX86 ## opname ## 16 ## reg, reg_kind,           reg_flags, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #reg, "" }, \
+{ kX86 ## opname ## 16 ## mem, mem_kind, IS_LOAD | mem_flags, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #mem, "" }, \
+{ kX86 ## opname ## 16 ## arr, arr_kind, IS_LOAD | arr_flags, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #arr, "" }, \
+{ kX86 ## opname ## 32 ## reg, reg_kind,           reg_flags, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #reg, "" }, \
+{ kX86 ## opname ## 32 ## mem, mem_kind, IS_LOAD | mem_flags, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #mem, "" }, \
+{ kX86 ## opname ## 32 ## arr, arr_kind, IS_LOAD | arr_flags, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #arr, "" }
+
+  UNARY_ENCODING_MAP(Test,    0x0, RI, kRegImm, IS_BINARY_OP, MI, kMemImm, IS_TERTIARY_OP, AI, kArrayImm, IS_QUIN_OP, 1),
+  UNARY_ENCODING_MAP(Not,     0x2, R, kReg, IS_UNARY_OP, M, kMem, IS_BINARY_OP, A, kArray, IS_QUAD_OP, 0),
+  UNARY_ENCODING_MAP(Neg,     0x3, R, kReg, IS_UNARY_OP, M, kMem, IS_BINARY_OP, A, kArray, IS_QUAD_OP, 0),
+  UNARY_ENCODING_MAP(Mul,     0x4, DaR, kRegRegReg, IS_TERTIARY_OP, DaM, kRegRegMem, IS_QUAD_OP, DaA, kRegRegArray, IS_SEXTUPLE_OP, 0),
+  UNARY_ENCODING_MAP(Imul,    0x5, DaR, kRegRegReg, IS_TERTIARY_OP, DaM, kRegRegMem, IS_QUAD_OP, DaA, kRegRegArray, IS_SEXTUPLE_OP, 0),
+  UNARY_ENCODING_MAP(Divmod,  0x6, DaR, kRegRegReg, IS_TERTIARY_OP, DaM, kRegRegMem, IS_QUAD_OP, DaA, kRegRegArray, IS_SEXTUPLE_OP, 0),
+  UNARY_ENCODING_MAP(Idivmod, 0x7, DaR, kRegRegReg, IS_TERTIARY_OP, DaM, kRegRegMem, IS_QUAD_OP, DaA, kRegRegArray, IS_SEXTUPLE_OP, 0),
+#undef UNARY_ENCODING_MAP
+
+#define EXT_0F_ENCODING_MAP(opname, prefix, opcode) \
+{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP,   { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \
+{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP,     { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
+
+  EXT_0F_ENCODING_MAP(Movsd, 0xF2, 0x10),
+  { kX86MovsdMR, kMemReg,   IS_STORE | IS_TERTIARY_OP, { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovsdMR", "[!0r+!1d],!2r" },
+  { kX86MovsdAR, kArrayReg, IS_STORE | IS_QUIN_OP,     { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovsdAR", "[!0r+!1r<<!2d+!3d],!4r" },
+
+  EXT_0F_ENCODING_MAP(Movss, 0xF3, 0x10),
+  { kX86MovssMR, kMemReg,   IS_STORE | IS_TERTIARY_OP, { 0xF3, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovssMR", "[!0r+!1d],!2r" },
+  { kX86MovssAR, kArrayReg, IS_STORE | IS_QUIN_OP,     { 0xF3, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovssAR", "[!0r+!1r<<!2d+!3d],!4r" },
+
+  EXT_0F_ENCODING_MAP(Cvtsi2sd,  0xF2, 0x2A),
+  EXT_0F_ENCODING_MAP(Cvtsi2ss,  0xF3, 0x2A),
+  EXT_0F_ENCODING_MAP(Cvttsd2si, 0xF2, 0x2C),
+  EXT_0F_ENCODING_MAP(Cvttss2si, 0xF3, 0x2C),
+  EXT_0F_ENCODING_MAP(Cvtsd2si,  0xF2, 0x2D),
+  EXT_0F_ENCODING_MAP(Cvtss2si,  0xF3, 0x2D),
+  EXT_0F_ENCODING_MAP(Ucomisd,   0x66, 0x2E),
+  EXT_0F_ENCODING_MAP(Ucomiss,   0x00, 0x2E),
+  EXT_0F_ENCODING_MAP(Comisd,    0x66, 0x2F),
+  EXT_0F_ENCODING_MAP(Comiss,    0x00, 0x2F),
+  EXT_0F_ENCODING_MAP(Addsd,     0xF2, 0x58),
+  EXT_0F_ENCODING_MAP(Addss,     0xF3, 0x58),
+  EXT_0F_ENCODING_MAP(Mulsd,     0xF2, 0x59),
+  EXT_0F_ENCODING_MAP(Mulss,     0xF3, 0x59),
+  EXT_0F_ENCODING_MAP(Cvtss2sd,  0xF2, 0x5A),
+  EXT_0F_ENCODING_MAP(Cvtsd2ss,  0xF3, 0x5A),
+  EXT_0F_ENCODING_MAP(Subsd,     0xF2, 0x5C),
+  EXT_0F_ENCODING_MAP(Subss,     0xF3, 0x5C),
+  EXT_0F_ENCODING_MAP(Divsd,     0xF2, 0x5E),
+  EXT_0F_ENCODING_MAP(Divss,     0xF3, 0x5E),
+
+  EXT_0F_ENCODING_MAP(Movdxr,    0x66, 0x6E),
+  EXT_0F_ENCODING_MAP(Movdrx,    0x66, 0x7E),
+
+  { kX86Set8R, kRegCond,              IS_BINARY_OP,   { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8R", "!1c !0r" },
+  { kX86Set8M, kMemCond,   IS_STORE | IS_TERTIARY_OP, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8M", "!2c [!0r+!1d]" },
+  { kX86Set8A, kArrayCond, IS_STORE | IS_QUIN_OP,     { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8A", "!4c [!0r+!1r<<!2d+!3d]" },
+
+  EXT_0F_ENCODING_MAP(Imul16,  0x66, 0xAF),
+  EXT_0F_ENCODING_MAP(Imul32,  0x00, 0xAF),
+  EXT_0F_ENCODING_MAP(Movzx8,  0x00, 0xB6),
+  EXT_0F_ENCODING_MAP(Movzx16, 0x00, 0xB7),
+  EXT_0F_ENCODING_MAP(Movsx8,  0x00, 0xBE),
+  EXT_0F_ENCODING_MAP(Movsx16, 0x00, 0xBF),
+#undef EXT_0F_ENCODING_MAP
+
+  { kX86Jcc,   kJcc,  IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0x70, 0, 0, 0, 0, 0 }, "Jcc", "!1c" },
+  { kX86Jmp,   kJmp,  IS_UNARY_OP  | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0xE9, 0, 0, 0, 0, 0 }, "Jmp", "" },
+  { kX86CallR, kCall, IS_UNARY_OP  | IS_BRANCH,               { 0, 0, 0xE8, 0, 0, 0, 0, 0 }, "CallR", "" },
+  { kX86CallM, kCall, IS_BINARY_OP | IS_BRANCH,               { 0, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallM", "" },
+  { kX86CallA, kCall, IS_QUAD_OP   | IS_BRANCH,               { 0, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallA", "" },
+  { kX86Ret,   kNullary,NO_OPERAND | IS_BRANCH,               { 0, 0, 0xC3, 0, 0, 0, 0, 0 }, "Ret", "" },
 };
 
-int oatGetInsnSize(LIR* lir)
-{
-  switch (EncodingMap[lir->opcode].kind) {
-    case kData:
-      return 4;
-    case kRegImm: {
-      int reg = lir->operands[0];
-      int imm = lir->operands[1];
-      return (reg == rAX ? 1 : 2) +  // AX opcodes don't require the modrm byte
-             (IS_SIMM8(imm) ? 1 : 4);  // 1 or 4 byte immediate
-      break;
+static size_t computeSize(X86EncodingMap* entry, int displacement, bool has_sib) {
+  size_t size = 0;
+  if (entry->skeleton.prefix1 > 0) {
+    ++size;
+    if (entry->skeleton.prefix2 > 0) {
+      ++size;
     }
-    case kMemImm: {
-      // int base = lir->operands[0];
-      int disp = lir->operands[1];
-      int imm  = lir->operands[2];
-      return 2 +  // opcode and modrm bytes
-          (disp == 0 ? 0 : (IS_SIMM8(disp) ? 1 : 4)) +  // 0, 1 or 4 byte displacement
-          (IS_SIMM8(imm) ? 1 : 4);  // 1 or 4 byte immediate
-      break;
-    }
-    case kArrayImm:
-      UNIMPLEMENTED(FATAL);
-      return 0;
-    case kRegReg:
-      return 2;  // opcode and modrm
-    case kRegMem: {
-      // int reg =  lir->operands[0];
-      // int base = lir->operands[1];
-      int disp = lir->operands[2];
-      return 2 +  // opcode and modrm bytes
-          (disp == 0 ? 0 : (IS_SIMM8(disp) ? 1 : 4));  // 0, 1 or 4 byte displacement
-      break;
-    }
-    case kRegArray:
-      UNIMPLEMENTED(FATAL);
-      return 0;
-    case kMemReg: {
-      // int base =  lir->operands[0];
-      int disp = lir->operands[1];
-      // int reg = lir->operands[2];
-      return 2 +  // opcode and modrm bytes
-          (disp == 0 ? 0 : (IS_SIMM8(disp) ? 1 : 4));  // 0, 1 or 4 byte displacement
-      break;
-    }
-    case kArrayReg:
-      UNIMPLEMENTED(FATAL);
-      return 0;
-    case kUnimplemented:
-      UNIMPLEMENTED(FATAL);
-      return 0;
   }
-  UNIMPLEMENTED(FATAL);  // unreachable
+  ++size;  // opcode
+  if (entry->skeleton.opcode == 0x0F) {
+    ++size;
+    if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) {
+      ++size;
+    }
+  }
+  ++size;  // modrm
+  if (has_sib) {
+    ++size;
+  }
+  if (displacement != 0) {
+    if (entry->opcode != kX86Lea32RA) {
+      DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), 0);
+    }
+    size += IS_SIMM8(displacement) ? 1 : 4;
+  }
+  size += entry->skeleton.immediate_bytes;
+  return size;
+}
+
+int oatGetInsnSize(LIR* lir) {
+  X86EncodingMap* entry = &EncodingMap[lir->opcode];
+  switch (entry->kind) {
+    case kData:
+      return 4;  // 4 bytes of data
+    case kNop:
+      return lir->operands[0];  // length of nop is sole operand
+    case kNullary:
+      return 1;  // 1 byte of opcode
+    case kReg:  // lir operands - 0: reg
+      return computeSize(entry, 0, false);
+    case kMem: { // lir operands - 0: base, 1: disp
+      int base = lir->operands[0];
+      // SP requires a special extra SIB byte
+      return computeSize(entry, lir->operands[1], false) + (base == rSP ? 1 : 0);
+    }
+    case kArray:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
+      return computeSize(entry, lir->operands[3], true);
+    case kMemReg: { // lir operands - 0: base, 1: disp, 2: reg
+      int base = lir->operands[0];
+      // SP requires a special extra SIB byte
+      return computeSize(entry, lir->operands[1], false) + (base == rSP ? 1 : 0);
+    }
+    case kArrayReg:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
+      return computeSize(entry, lir->operands[3], true);
+    case kThreadReg:  // lir operands - 0: disp, 1: reg
+      return computeSize(entry, lir->operands[0], false);
+    case kRegReg:
+      return computeSize(entry, 0, false);
+    case kRegMem: { // lir operands - 0: reg, 1: base, 2: disp
+      int base = lir->operands[1];
+      return computeSize(entry, lir->operands[2], false) + (base == rSP ? 1 : 0);
+    }
+    case kRegArray:  // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
+      return computeSize(entry, lir->operands[4], true);
+    case kRegThread:  // lir operands - 0: reg, 1: disp
+      return computeSize(entry, lir->operands[1], false);
+    case kRegImm: {  // lir operands - 0: reg, 1: immediate
+      int reg = lir->operands[0];
+      // AX opcodes don't require the modrm byte.
+      return computeSize(entry, 0, false) - (reg == rAX ? 1 : 0);
+    }
+    case kMemImm:  // lir operands - 0: base, 1: disp, 2: immediate
+      CHECK_NE(lir->operands[0], static_cast<int>(rSP));  // TODO: add extra SIB byte
+      return computeSize(entry, lir->operands[1], false);
+    case kArrayImm:  // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
+      return computeSize(entry, lir->operands[3], true);
+    case kThreadImm:  // lir operands - 0: disp, 1: imm
+      return computeSize(entry, lir->operands[0], false);
+    case kRegRegImm:  // lir operands - 0: reg, 1: reg, 2: imm
+      return computeSize(entry, 0, false);
+    case kRegMemImm:  // lir operands - 0: reg, 1: base, 2: disp, 3: imm
+      CHECK_NE(lir->operands[1], static_cast<int>(rSP));  // TODO: add extra SIB byte
+      return computeSize(entry, lir->operands[2], false);
+    case kRegArrayImm:  // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp, 5: imm
+      return computeSize(entry, lir->operands[4], true);
+    case kMovRegImm:  // lir operands - 0: reg, 1: immediate
+      return 1 + entry->skeleton.immediate_bytes;
+    case kShiftRegImm:  // lir operands - 0: reg, 1: immediate
+      // Shift by immediate one has a shorter opcode.
+      return computeSize(entry, 0, false) - (lir->operands[1] == 1 ? 1 : 0);
+    case kShiftMemImm:  // lir operands - 0: base, 1: disp, 2: immediate
+      CHECK_NE(lir->operands[0], static_cast<int>(rSP));  // TODO: add extra SIB byte
+      // Shift by immediate one has a shorter opcode.
+      return computeSize(entry, lir->operands[1], false) - (lir->operands[2] == 1 ? 1 : 0);
+    case kShiftArrayImm:  // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
+      // Shift by immediate one has a shorter opcode.
+      return computeSize(entry, lir->operands[3], true) - (lir->operands[4] == 1 ? 1 : 0);
+    case kShiftRegCl:
+      return computeSize(entry, 0, false);
+    case kShiftMemCl:  // lir operands - 0: base, 1: disp, 2: cl
+      CHECK_NE(lir->operands[0], static_cast<int>(rSP));  // TODO: add extra SIB byte
+      return computeSize(entry, lir->operands[1], false);
+    case kShiftArrayCl:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
+      return computeSize(entry, lir->operands[3], true);
+    case kRegCond:  // lir operands - 0: reg, 1: cond
+      return computeSize(entry, 0, false);
+    case kMemCond:  // lir operands - 0: base, 1: disp, 2: cond
+      CHECK_NE(lir->operands[0], static_cast<int>(rSP));  // TODO: add extra SIB byte
+      return computeSize(entry, lir->operands[1], false);
+    case kArrayCond:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cond
+      return computeSize(entry, lir->operands[3], true);
+    case kJcc: case kJmp:
+      // Jumps only return the short form length, the correct length will be assigned to LIR
+      // flags.size during assembly.
+      return 2;
+    case kCall:
+      switch(lir->opcode) {
+        case kX86CallR: return 2;  // opcode modrm
+        case kX86CallM:  // lir operands - 0: base, 1: disp
+          return computeSize(entry, lir->operands[1], false);
+        case kX86CallA:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
+          return computeSize(entry, lir->operands[3], true);
+        default:
+          break;
+      }
+      break;
+    default:
+      break;
+  }
+  UNIMPLEMENTED(FATAL) << "Unimplemented size encoding for: " << entry->name;
   return 0;
 }
 
+static uint8_t modrmForDisp(int disp) {
+  if (disp == 0) {
+    return 0;
+  } else if (IS_SIMM8(disp)) {
+    return 1;
+  } else {
+    return 2;
+  }
+}
+
+static void emitDisp(CompilationUnit* cUnit, int disp) {
+  if (disp == 0) {
+    return;
+  } else if (IS_SIMM8(disp)) {
+    cUnit->codeBuffer.push_back(disp & 0xFF);
+  } else {
+    cUnit->codeBuffer.push_back(disp & 0xFF);
+    cUnit->codeBuffer.push_back((disp >> 8) & 0xFF);
+    cUnit->codeBuffer.push_back((disp >> 16) & 0xFF);
+    cUnit->codeBuffer.push_back((disp >> 24) & 0xFF);
+  }
+}
+
+static void emitOpReg(CompilationUnit* cUnit, const X86EncodingMap* entry, uint8_t reg) {
+  if (entry->skeleton.prefix1 != 0) {
+    cUnit->codeBuffer.push_back(entry->skeleton.prefix1);
+    if (entry->skeleton.prefix2 != 0) {
+      cUnit->codeBuffer.push_back(entry->skeleton.prefix2);
+    }
+  } else {
+    DCHECK_EQ(0, entry->skeleton.prefix2);
+  }
+  cUnit->codeBuffer.push_back(entry->skeleton.opcode);
+  if (entry->skeleton.opcode == 0x0F) {
+    cUnit->codeBuffer.push_back(entry->skeleton.extra_opcode1);
+    if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) {
+      cUnit->codeBuffer.push_back(entry->skeleton.extra_opcode2);
+    } else {
+      DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+    }
+  } else {
+    DCHECK_EQ(0, entry->skeleton.extra_opcode1);
+    DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+  }
+  DCHECK_LT(reg, 8);
+  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg;
+  cUnit->codeBuffer.push_back(modrm);
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
+}
+
+static void emitOpMem(CompilationUnit* cUnit, const X86EncodingMap* entry, uint8_t base, int disp) {
+  if (entry->skeleton.prefix1 != 0) {
+    cUnit->codeBuffer.push_back(entry->skeleton.prefix1);
+    if (entry->skeleton.prefix2 != 0) {
+      cUnit->codeBuffer.push_back(entry->skeleton.prefix2);
+    }
+  } else {
+    DCHECK_EQ(0, entry->skeleton.prefix2);
+  }
+  cUnit->codeBuffer.push_back(entry->skeleton.opcode);
+  DCHECK_EQ(0, entry->skeleton.extra_opcode1);
+  DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+  DCHECK_LT(entry->skeleton.modrm_opcode, 8);
+  DCHECK_LT(base, 8);
+  uint8_t modrm = (modrmForDisp(disp) << 6) | (entry->skeleton.modrm_opcode << 3) | base;
+  cUnit->codeBuffer.push_back(modrm);
+  emitDisp(cUnit, disp);
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
+}
+
+static void emitMemReg(CompilationUnit* cUnit, const X86EncodingMap* entry,
+                       uint8_t base, int disp, uint8_t reg) {
+  if (entry->skeleton.prefix1 != 0) {
+    cUnit->codeBuffer.push_back(entry->skeleton.prefix1);
+    if (entry->skeleton.prefix2 != 0) {
+      cUnit->codeBuffer.push_back(entry->skeleton.prefix2);
+    }
+  } else {
+    DCHECK_EQ(0, entry->skeleton.prefix2);
+  }
+  cUnit->codeBuffer.push_back(entry->skeleton.opcode);
+  if (entry->skeleton.opcode == 0x0F) {
+    cUnit->codeBuffer.push_back(entry->skeleton.extra_opcode1);
+    if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) {
+      cUnit->codeBuffer.push_back(entry->skeleton.extra_opcode2);
+    } else {
+      DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+    }
+  } else {
+    DCHECK_EQ(0, entry->skeleton.extra_opcode1);
+    DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+  }
+  DCHECK_LT(reg, 8);
+  DCHECK_LT(base, 8);
+  uint8_t modrm = (modrmForDisp(disp) << 6) | (reg << 3) | base;
+  cUnit->codeBuffer.push_back(modrm);
+  if (base == rSP) {
+    // Special SIB for SP base
+    cUnit->codeBuffer.push_back(0 << 6 | (rSP << 3) | rSP);
+  }
+  emitDisp(cUnit, disp);
+  DCHECK_EQ(0, entry->skeleton.modrm_opcode);
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
+}
+
+static void emitRegMem(CompilationUnit* cUnit, const X86EncodingMap* entry,
+                       uint8_t reg, uint8_t base, int disp) {
+  // Opcode will flip operands.
+  emitMemReg(cUnit, entry, base, disp, reg);
+}
+
+static void emitRegArray(CompilationUnit* cUnit, const X86EncodingMap* entry, uint8_t reg,
+                         uint8_t base, uint8_t index, int scale, int disp) {
+  if (entry->skeleton.prefix1 != 0) {
+    cUnit->codeBuffer.push_back(entry->skeleton.prefix1);
+    if (entry->skeleton.prefix2 != 0) {
+      cUnit->codeBuffer.push_back(entry->skeleton.prefix2);
+    }
+  } else {
+    DCHECK_EQ(0, entry->skeleton.prefix2);
+  }
+  cUnit->codeBuffer.push_back(entry->skeleton.opcode);
+  if (entry->skeleton.opcode == 0x0F) {
+    cUnit->codeBuffer.push_back(entry->skeleton.extra_opcode1);
+    if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) {
+      cUnit->codeBuffer.push_back(entry->skeleton.extra_opcode2);
+    } else {
+      DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+    }
+  } else {
+    DCHECK_EQ(0, entry->skeleton.extra_opcode1);
+    DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+  }
+  DCHECK_LT(reg, 8);
+  uint8_t modrm = (modrmForDisp(disp) << 6) | (reg << 3) | rSP;
+  cUnit->codeBuffer.push_back(modrm);
+  DCHECK_LT(scale, 4);
+  DCHECK_LT(index, 8);
+  DCHECK_LT(base, 8);
+  uint8_t sib = (scale << 6) | (index << 3) | base;
+  cUnit->codeBuffer.push_back(sib);
+  emitDisp(cUnit, disp);
+  DCHECK_EQ(0, entry->skeleton.modrm_opcode);
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
+}
+
+static void emitRegReg(CompilationUnit* cUnit, const X86EncodingMap* entry,
+                       uint8_t reg1, uint8_t reg2) {
+  if (entry->skeleton.prefix1 != 0) {
+    cUnit->codeBuffer.push_back(entry->skeleton.prefix1);
+    if (entry->skeleton.prefix2 != 0) {
+      cUnit->codeBuffer.push_back(entry->skeleton.prefix2);
+    }
+  } else {
+    DCHECK_EQ(0, entry->skeleton.prefix2);
+  }
+  cUnit->codeBuffer.push_back(entry->skeleton.opcode);
+  if (entry->skeleton.opcode == 0x0F) {
+    cUnit->codeBuffer.push_back(entry->skeleton.extra_opcode1);
+    if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) {
+      cUnit->codeBuffer.push_back(entry->skeleton.extra_opcode2);
+    } else {
+      DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+    }
+  } else {
+    DCHECK_EQ(0, entry->skeleton.extra_opcode1);
+    DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+  }
+  uint8_t modrm = (3 << 6) | (reg1 << 3) | reg2;
+  cUnit->codeBuffer.push_back(modrm);
+  DCHECK_EQ(0, entry->skeleton.modrm_opcode);
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
+}
+
+static void emitRegImm(CompilationUnit* cUnit, const X86EncodingMap* entry,
+                       uint8_t reg, int imm) {
+  if (entry->skeleton.prefix1 != 0) {
+    cUnit->codeBuffer.push_back(entry->skeleton.prefix1);
+    if (entry->skeleton.prefix2 != 0) {
+      cUnit->codeBuffer.push_back(entry->skeleton.prefix2);
+    }
+  } else {
+    DCHECK_EQ(0, entry->skeleton.prefix2);
+  }
+  if (reg == rAX && entry->skeleton.ax_opcode != 0) {
+    cUnit->codeBuffer.push_back(entry->skeleton.ax_opcode);
+  } else {
+    cUnit->codeBuffer.push_back(entry->skeleton.opcode);
+    if (entry->skeleton.opcode == 0x0F) {
+      cUnit->codeBuffer.push_back(entry->skeleton.extra_opcode1);
+      if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) {
+        cUnit->codeBuffer.push_back(entry->skeleton.extra_opcode2);
+      } else {
+        DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+      }
+    } else {
+      DCHECK_EQ(0, entry->skeleton.extra_opcode1);
+      DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+    }
+    uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg;
+    cUnit->codeBuffer.push_back(modrm);
+  }
+  switch (entry->skeleton.immediate_bytes) {
+    case 1:
+      DCHECK(IS_SIMM8(imm));
+      cUnit->codeBuffer.push_back(imm & 0xFF);
+      break;
+    case 2:
+      DCHECK(IS_SIMM16(imm));
+      cUnit->codeBuffer.push_back(imm & 0xFF);
+      cUnit->codeBuffer.push_back((imm >> 8) & 0xFF);
+      break;
+    case 4:
+      cUnit->codeBuffer.push_back(imm & 0xFF);
+      cUnit->codeBuffer.push_back((imm >> 8) & 0xFF);
+      cUnit->codeBuffer.push_back((imm >> 16) & 0xFF);
+      cUnit->codeBuffer.push_back((imm >> 24) & 0xFF);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected immediate bytes (" << entry->skeleton.immediate_bytes
+          << ") for instruction: " << entry->name;
+      break;
+  }
+}
+
 /*
  * Assemble the LIR into binary instruction format.  Note that we may
  * discover that pc-relative displacements may not fit the selected
@@ -212,196 +661,75 @@
  * sequence or request that the trace be shortened and retried.
  */
 AssemblerStatus oatAssembleInstructions(CompilationUnit *cUnit,
-                                        intptr_t startAddr)
-{
-    UNIMPLEMENTED(WARNING) << "oatAssembleInstructions";
-    return kSuccess;
-#if 0
-    LIR *lir;
-    AssemblerStatus res = kSuccess;  // Assume success
+                                        intptr_t startAddr) {
+  LIR *lir;
+  AssemblerStatus res = kSuccess;  // Assume success
 
-    for (lir = (LIR *) cUnit->firstLIRInsn; lir; lir = NEXT_LIR(lir)) {
-        if (lir->opcode < 0) {
-            continue;
-        }
-
-
-        if (lir->flags.isNop) {
-            continue;
-        }
-
-        if (lir->flags.pcRelFixup) {
-            if (lir->opcode == kX86Delta) {
-                /*
-                 * The "Delta" pseudo-ops load the difference between
-                 * two pc-relative locations into a the target register
-                 * found in operands[0].  The delta is determined by
-                 * (label2 - label1), where label1 is a standard
-                 * kPseudoTargetLabel and is stored in operands[2].
-                 * If operands[3] is null, then label2 is a kPseudoTargetLabel
-                 * and is found in lir->target.  If operands[3] is non-NULL,
-                 * then it is a Switch/Data table.
-                 */
-                int offset1 = ((LIR*)lir->operands[2])->offset;
-                SwitchTable *tabRec = (SwitchTable*)lir->operands[3];
-                int offset2 = tabRec ? tabRec->offset : lir->target->offset;
-                int delta = offset2 - offset1;
-                if ((delta & 0xffff) == delta) {
-                    // Fits
-                    lir->operands[1] = delta;
-                } else {
-                    // Doesn't fit - must expand to kX86Delta[Hi|Lo] pair
-                    LIR *newDeltaHi =
-                          rawLIR(cUnit, lir->dalvikOffset, kX86DeltaHi,
-                                 lir->operands[0], 0, lir->operands[2],
-                                 lir->operands[3], lir->target);
-                    oatInsertLIRBefore((LIR*)lir, (LIR*)newDeltaHi);
-                    LIR *newDeltaLo =
-                          rawLIR(cUnit, lir->dalvikOffset, kX86DeltaLo,
-                                 lir->operands[0], 0, lir->operands[2],
-                                 lir->operands[3], lir->target);
-                    oatInsertLIRBefore((LIR*)lir, (LIR*)newDeltaLo);
-                    lir->flags.isNop = true;
-                    res = kRetryAll;
-                }
-            } else if (lir->opcode == kX86DeltaLo) {
-                int offset1 = ((LIR*)lir->operands[2])->offset;
-                SwitchTable *tabRec = (SwitchTable*)lir->operands[3];
-                int offset2 = tabRec ? tabRec->offset : lir->target->offset;
-                int delta = offset2 - offset1;
-                lir->operands[1] = delta & 0xffff;
-            } else if (lir->opcode == kX86DeltaHi) {
-                int offset1 = ((LIR*)lir->operands[2])->offset;
-                SwitchTable *tabRec = (SwitchTable*)lir->operands[3];
-                int offset2 = tabRec ? tabRec->offset : lir->target->offset;
-                int delta = offset2 - offset1;
-                lir->operands[1] = (delta >> 16) & 0xffff;
-            } else if (lir->opcode == kX86B || lir->opcode == kX86Bal) {
-                LIR *targetLIR = (LIR *) lir->target;
-                intptr_t pc = lir->offset + 4;
-                intptr_t target = targetLIR->offset;
-                int delta = target - pc;
-                if (delta & 0x3) {
-                    LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta;
-                }
-                if (delta > 131068 || delta < -131069) {
-                    res = kRetryAll;
-                    convertShortToLongBranch(cUnit, lir);
-                } else {
-                    lir->operands[0] = delta >> 2;
-                }
-            } else if (lir->opcode >= kX86Beqz && lir->opcode <= kX86Bnez) {
-                LIR *targetLIR = (LIR *) lir->target;
-                intptr_t pc = lir->offset + 4;
-                intptr_t target = targetLIR->offset;
-                int delta = target - pc;
-                if (delta & 0x3) {
-                    LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta;
-                }
-                if (delta > 131068 || delta < -131069) {
-                    res = kRetryAll;
-                    convertShortToLongBranch(cUnit, lir);
-                } else {
-                    lir->operands[1] = delta >> 2;
-                }
-            } else if (lir->opcode == kX86Beq || lir->opcode == kX86Bne) {
-                LIR *targetLIR = (LIR *) lir->target;
-                intptr_t pc = lir->offset + 4;
-                intptr_t target = targetLIR->offset;
-                int delta = target - pc;
-                if (delta & 0x3) {
-                    LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta;
-                }
-                if (delta > 131068 || delta < -131069) {
-                    res = kRetryAll;
-                    convertShortToLongBranch(cUnit, lir);
-                } else {
-                    lir->operands[2] = delta >> 2;
-                }
-            } else if (lir->opcode == kX86Jal) {
-                intptr_t curPC = (startAddr + lir->offset + 4) & ~3;
-                intptr_t target = lir->operands[0];
-                /* ensure PC-region branch can be used */
-                DCHECK_EQ((curPC & 0xF0000000), (target & 0xF0000000));
-                if (target & 0x3) {
-                    LOG(FATAL) << "Jump target not multiple of 4: " << target;
-                }
-                lir->operands[0] =  target >> 2;
-            } else if (lir->opcode == kX86Lahi) { /* ld address hi (via lui) */
-                LIR *targetLIR = (LIR *) lir->target;
-                intptr_t target = startAddr + targetLIR->offset;
-                lir->operands[1] = target >> 16;
-            } else if (lir->opcode == kX86Lalo) { /* ld address lo (via ori) */
-                LIR *targetLIR = (LIR *) lir->target;
-                intptr_t target = startAddr + targetLIR->offset;
-                lir->operands[2] = lir->operands[2] + target;
-            }
-        }
-
-        /*
-         * If one of the pc-relative instructions expanded we'll have
-         * to make another pass.  Don't bother to fully assemble the
-         * instruction.
-         */
-        if (res != kSuccess) {
-            continue;
-        }
-        const X86EncodingMap *encoder = &EncodingMap[lir->opcode];
-        u4 bits = encoder->skeleton;
-        int i;
-        for (i = 0; i < 4; i++) {
-            u4 operand;
-            u4 value;
-            operand = lir->operands[i];
-            switch(encoder->fieldLoc[i].kind) {
-                case kFmtUnused:
-                    break;
-                case kFmtBitBlt:
-                    if (encoder->fieldLoc[i].start == 0 && encoder->fieldLoc[i].end == 31) {
-                        value = operand;
-                    } else {
-                        value = (operand << encoder->fieldLoc[i].start) &
-                                ((1 << (encoder->fieldLoc[i].end + 1)) - 1);
-                    }
-                    bits |= value;
-                    break;
-                case kFmtBlt5_2:
-                    value = (operand & 0x1f);
-                    bits |= (value << encoder->fieldLoc[i].start);
-                    bits |= (value << encoder->fieldLoc[i].end);
-                    break;
-                case kFmtDfp: {
-                    DCHECK(DOUBLEREG(operand));
-                    DCHECK((operand & 0x1) == 0);
-                    value = ((operand & FP_REG_MASK) << encoder->fieldLoc[i].start) &
-                            ((1 << (encoder->fieldLoc[i].end + 1)) - 1);
-                    bits |= value;
-                    break;
-                }
-                case kFmtSfp:
-                    DCHECK(SINGLEREG(operand));
-                    value = ((operand & FP_REG_MASK) << encoder->fieldLoc[i].start) &
-                            ((1 << (encoder->fieldLoc[i].end + 1)) - 1);
-                    bits |= value;
-                    break;
-                default:
-                    LOG(FATAL) << "Bad encoder format: "
-                               << (int)encoder->fieldLoc[i].kind;
-            }
-        }
-        // FIXME: need multi-endian handling here
-        cUnit->codeBuffer.push_back((bits >> 16) & 0xffff);
-        cUnit->codeBuffer.push_back(bits & 0xffff);
-        // TUNING: replace with proper delay slot handling
-        if (encoder->size == 8) {
-            const X86EncodingMap *encoder = &EncodingMap[kX86Nop];
-            u4 bits = encoder->skeleton;
-            cUnit->codeBuffer.push_back((bits >> 16) & 0xffff);
-            cUnit->codeBuffer.push_back(bits & 0xffff);
-        }
+  for (lir = (LIR *) cUnit->firstLIRInsn; lir; lir = NEXT_LIR(lir)) {
+    if (lir->opcode < 0) {
+      continue;
     }
-    return res;
-#endif
+
+
+    if (lir->flags.isNop) {
+      continue;
+    }
+
+    if (lir->flags.pcRelFixup) {
+      UNIMPLEMENTED(WARNING) << "PC relative fix up";
+    }
+
+    /*
+     * If one of the pc-relative instructions expanded we'll have
+     * to make another pass.  Don't bother to fully assemble the
+     * instruction.
+     */
+    if (res != kSuccess) {
+      continue;
+    }
+    const X86EncodingMap *entry = &EncodingMap[lir->opcode];
+    switch(entry->kind) {
+      case kData:  // 4 bytes of data
+        cUnit->codeBuffer.push_back(lir->operands[0]);
+        break;
+      case kNullary:  // 1 byte of opcode
+        DCHECK_EQ(0, entry->skeleton.prefix1);
+        DCHECK_EQ(0, entry->skeleton.prefix2);
+        cUnit->codeBuffer.push_back(entry->skeleton.opcode);
+        DCHECK_EQ(0, entry->skeleton.extra_opcode1);
+        DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+        DCHECK_EQ(0, entry->skeleton.modrm_opcode);
+        DCHECK_EQ(0, entry->skeleton.ax_opcode);
+        DCHECK_EQ(0, entry->skeleton.immediate_bytes);
+        break;
+      case kReg:  // lir operands - 0: reg
+        emitOpReg(cUnit, entry, lir->operands[0]);
+        break;
+      case kMem:  // lir operands - 0: base, 1: disp
+        emitOpMem(cUnit, entry, lir->operands[0], lir->operands[1]);
+        break;
+      case kMemReg:  // lir operands - 0: base, 1: disp, 2: reg
+        emitMemReg(cUnit, entry, lir->operands[0], lir->operands[1], lir->operands[2]);
+        break;
+      case kRegMem:  // lir operands - 0: reg, 1: base, 2: disp
+        emitRegMem(cUnit, entry, lir->operands[0], lir->operands[1], lir->operands[2]);
+        break;
+      case kRegArray:  // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
+        emitRegArray(cUnit, entry, lir->operands[0], lir->operands[1], lir->operands[2],
+                     lir->operands[3], lir->operands[4]);
+        break;
+      case kRegReg:  // lir operands - 0: reg1, 1: reg2
+        emitRegReg(cUnit, entry, lir->operands[0], lir->operands[1]);
+        break;
+      case kRegImm:  // lir operands - 0: reg, 1: immediate
+        emitRegImm(cUnit, entry, lir->operands[0], lir->operands[1]);
+        break;
+      default:
+        UNIMPLEMENTED(FATAL) << "Unimplemented encoding for: " << entry->name;
+        break;
+    }
+  }
+  return res;
 }
 
 /*
diff --git a/src/compiler/codegen/x86/FP/X86FP.cc b/src/compiler/codegen/x86/FP/X86FP.cc
index 5e59b56..52b4fc4 100644
--- a/src/compiler/codegen/x86/FP/X86FP.cc
+++ b/src/compiler/codegen/x86/FP/X86FP.cc
@@ -17,213 +17,210 @@
 namespace art {
 
 bool genArithOpFloat(CompilationUnit *cUnit, MIR *mir, RegLocation rlDest,
-                     RegLocation rlSrc1, RegLocation rlSrc2)
-{
-    UNIMPLEMENTED(WARNING) << "genArithOpFloat";
-    return false;
-#if 0
-    int op = kX86Nop;
-    RegLocation rlResult;
+                     RegLocation rlSrc1, RegLocation rlSrc2) {
+  X86OpCode op = kX86Nop;
+  RegLocation rlResult;
 
-    /*
-     * Don't attempt to optimize register usage since these opcodes call out to
-     * the handlers.
-     */
-    switch (mir->dalvikInsn.opcode) {
-        case Instruction::ADD_FLOAT_2ADDR:
-        case Instruction::ADD_FLOAT:
-            op = kX86Fadds;
-            break;
-        case Instruction::SUB_FLOAT_2ADDR:
-        case Instruction::SUB_FLOAT:
-            op = kX86Fsubs;
-            break;
-        case Instruction::DIV_FLOAT_2ADDR:
-        case Instruction::DIV_FLOAT:
-            op = kX86Fdivs;
-            break;
-        case Instruction::MUL_FLOAT_2ADDR:
-        case Instruction::MUL_FLOAT:
-            op = kX86Fmuls;
-            break;
-        case Instruction::REM_FLOAT_2ADDR:
-        case Instruction::REM_FLOAT:
-        case Instruction::NEG_FLOAT: {
-            return genArithOpFloatPortable(cUnit, mir, rlDest, rlSrc1, rlSrc2);
-        }
-        default:
-            return true;
+  /*
+   * Don't attempt to optimize register usage since these opcodes call out to
+   * the handlers.
+   */
+  switch (mir->dalvikInsn.opcode) {
+    case Instruction::ADD_FLOAT_2ADDR:
+    case Instruction::ADD_FLOAT:
+      op = kX86AddssRR;
+      break;
+    case Instruction::SUB_FLOAT_2ADDR:
+    case Instruction::SUB_FLOAT:
+      op = kX86SubssRR;
+      break;
+    case Instruction::DIV_FLOAT_2ADDR:
+    case Instruction::DIV_FLOAT:
+      op = kX86DivssRR;
+      break;
+    case Instruction::MUL_FLOAT_2ADDR:
+    case Instruction::MUL_FLOAT:
+      op = kX86MulssRR;
+      break;
+    case Instruction::NEG_FLOAT:
+      UNIMPLEMENTED(WARNING) << "inline fneg"; // pxor xmm, [0x80000000]
+                                                             // fall-through
+    case Instruction::REM_FLOAT_2ADDR:
+    case Instruction::REM_FLOAT: {
+      return genArithOpFloatPortable(cUnit, mir, rlDest, rlSrc1, rlSrc2);
     }
-    rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg);
-    rlSrc2 = loadValue(cUnit, rlSrc2, kFPReg);
-    rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
-    newLIR3(cUnit, (X86OpCode)op, rlResult.lowReg, rlSrc1.lowReg,
-                    rlSrc2.lowReg);
-    storeValue(cUnit, rlDest, rlResult);
+    default:
+      return true;
+  }
+  rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg);
+  rlSrc2 = loadValue(cUnit, rlSrc2, kFPReg);
+  rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+  int rDest = rlResult.lowReg;
+  int rSrc1 = rlSrc1.lowReg;
+  int rSrc2 = rlSrc2.lowReg;
+  // TODO: at least CHECK_NE(rDest, rSrc2);
+  opRegCopy(cUnit, rDest, rSrc1);
+  newLIR2(cUnit, op, rDest, rSrc2);
+  storeValue(cUnit, rlDest, rlResult);
 
-    return false;
-#endif
+  return false;
 }
 
 static bool genArithOpDouble(CompilationUnit *cUnit, MIR *mir,
                              RegLocation rlDest, RegLocation rlSrc1,
-                             RegLocation rlSrc2)
-{
-    UNIMPLEMENTED(WARNING) << "genArithOpDouble";
-    return false;
-#if 0
-    int op = kX86Nop;
-    RegLocation rlResult;
+                             RegLocation rlSrc2) {
+  X86OpCode op = kX86Nop;
+  RegLocation rlResult;
 
-    switch (mir->dalvikInsn.opcode) {
-        case Instruction::ADD_DOUBLE_2ADDR:
-        case Instruction::ADD_DOUBLE:
-            op = kX86Faddd;
-            break;
-        case Instruction::SUB_DOUBLE_2ADDR:
-        case Instruction::SUB_DOUBLE:
-            op = kX86Fsubd;
-            break;
-        case Instruction::DIV_DOUBLE_2ADDR:
-        case Instruction::DIV_DOUBLE:
-            op = kX86Fdivd;
-            break;
-        case Instruction::MUL_DOUBLE_2ADDR:
-        case Instruction::MUL_DOUBLE:
-            op = kX86Fmuld;
-            break;
-        case Instruction::REM_DOUBLE_2ADDR:
-        case Instruction::REM_DOUBLE:
-        case Instruction::NEG_DOUBLE: {
-            return genArithOpDoublePortable(cUnit, mir, rlDest, rlSrc1, rlSrc2);
-        }
-        default:
-            return true;
+  switch (mir->dalvikInsn.opcode) {
+    case Instruction::ADD_DOUBLE_2ADDR:
+    case Instruction::ADD_DOUBLE:
+      op = kX86AddsdRR;
+      break;
+    case Instruction::SUB_DOUBLE_2ADDR:
+    case Instruction::SUB_DOUBLE:
+      op = kX86SubsdRR;
+      break;
+    case Instruction::DIV_DOUBLE_2ADDR:
+    case Instruction::DIV_DOUBLE:
+      op = kX86DivsdRR;
+      break;
+    case Instruction::MUL_DOUBLE_2ADDR:
+    case Instruction::MUL_DOUBLE:
+      op = kX86MulsdRR;
+      break;
+    case Instruction::NEG_DOUBLE:
+    case Instruction::REM_DOUBLE_2ADDR:
+    case Instruction::REM_DOUBLE: {
+      return genArithOpDoublePortable(cUnit, mir, rlDest, rlSrc1, rlSrc2);
     }
-    rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg);
-    DCHECK(rlSrc1.wide);
-    rlSrc2 = loadValueWide(cUnit, rlSrc2, kFPReg);
-    DCHECK(rlSrc2.wide);
-    rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
-    DCHECK(rlDest.wide);
-    DCHECK(rlResult.wide);
-    newLIR3(cUnit, (X86OpCode)op, S2D(rlResult.lowReg, rlResult.highReg),
-            S2D(rlSrc1.lowReg, rlSrc1.highReg),
-            S2D(rlSrc2.lowReg, rlSrc2.highReg));
-    storeValueWide(cUnit, rlDest, rlResult);
-    return false;
-#endif
+    default:
+      return true;
+  }
+  rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg);
+  DCHECK(rlSrc1.wide);
+  rlSrc2 = loadValueWide(cUnit, rlSrc2, kFPReg);
+  DCHECK(rlSrc2.wide);
+  rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+  DCHECK(rlDest.wide);
+  DCHECK(rlResult.wide);
+  int rDest = S2D(rlResult.lowReg, rlResult.highReg);
+  int rSrc1 = S2D(rlSrc1.lowReg, rlSrc1.highReg);
+  int rSrc2 = S2D(rlSrc2.lowReg, rlSrc2.highReg);
+  // TODO: at least CHECK_NE(rDest, rSrc2);
+  opRegCopy(cUnit, rDest, rSrc1);
+  newLIR2(cUnit, op, rDest, rSrc2);
+  storeValueWide(cUnit, rlDest, rlResult);
+  return false;
 }
 
-static bool genConversion(CompilationUnit *cUnit, MIR *mir)
-{
-    UNIMPLEMENTED(WARNING) << "genConversion";
-    return false;
-#if 0
-    Instruction::Code opcode = mir->dalvikInsn.opcode;
-    bool longSrc = false;
-    bool longDest = false;
-    RegLocation rlSrc;
-    RegLocation rlDest;
-    int op = kX86Nop;
-    int srcReg;
-    RegLocation rlResult;
-    switch (opcode) {
-        case Instruction::INT_TO_FLOAT:
-            longSrc = false;
-            longDest = false;
-            op = kX86Fcvtsw;
-            break;
-        case Instruction::DOUBLE_TO_FLOAT:
-            longSrc = true;
-            longDest = false;
-            op = kX86Fcvtsd;
-            break;
-        case Instruction::FLOAT_TO_DOUBLE:
-            longSrc = false;
-            longDest = true;
-            op = kX86Fcvtds;
-            break;
-        case Instruction::INT_TO_DOUBLE:
-            longSrc = false;
-            longDest = true;
-            op = kX86Fcvtdw;
-            break;
-        case Instruction::FLOAT_TO_INT:
-        case Instruction::DOUBLE_TO_INT:
-        case Instruction::LONG_TO_DOUBLE:
-        case Instruction::FLOAT_TO_LONG:
-        case Instruction::LONG_TO_FLOAT:
-        case Instruction::DOUBLE_TO_LONG:
-            return genConversionPortable(cUnit, mir);
-        default:
-            return true;
-    }
-    if (longSrc) {
-        rlSrc = oatGetSrcWide(cUnit, mir, 0, 1);
-        rlSrc = loadValueWide(cUnit, rlSrc, kFPReg);
-        srcReg = S2D(rlSrc.lowReg, rlSrc.highReg);
-    } else {
-        rlSrc = oatGetSrc(cUnit, mir, 0);
-        rlSrc = loadValue(cUnit, rlSrc, kFPReg);
-        srcReg = rlSrc.lowReg;
-    }
-    if (longDest) {
-        rlDest = oatGetDestWide(cUnit, mir, 0, 1);
-        rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
-        newLIR2(cUnit, (X86OpCode)op, S2D(rlResult.lowReg, rlResult.highReg), srcReg);
-        storeValueWide(cUnit, rlDest, rlResult);
-    } else {
-        rlDest = oatGetDest(cUnit, mir, 0);
-        rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
-        newLIR2(cUnit, (X86OpCode)op, rlResult.lowReg, srcReg);
-        storeValue(cUnit, rlDest, rlResult);
-    }
-    return false;
-#endif
+static bool genConversion(CompilationUnit *cUnit, MIR *mir) {
+  Instruction::Code opcode = mir->dalvikInsn.opcode;
+  bool longSrc = false;
+  bool longDest = false;
+  RegLocation rlSrc;
+  RegLocation rlDest;
+  X86OpCode op = kX86Nop;
+  int srcReg;
+  RegLocation rlResult;
+  switch (opcode) {
+    case Instruction::INT_TO_FLOAT:
+      longSrc = false;
+      longDest = false;
+      op = kX86Cvtsi2ssRR;
+      break;
+    case Instruction::DOUBLE_TO_FLOAT:
+      longSrc = true;
+      longDest = false;
+      op = kX86Cvtsd2ssRR;
+      break;
+    case Instruction::FLOAT_TO_DOUBLE:
+      longSrc = false;
+      longDest = true;
+      op = kX86Cvtss2sdRR;
+      break;
+    case Instruction::INT_TO_DOUBLE:
+      longSrc = false;
+      longDest = true;
+      op = kX86Cvtsi2sdRR;
+      break;
+    case Instruction::FLOAT_TO_INT:
+    case Instruction::DOUBLE_TO_INT:
+      // These are easy to implement inline except when the src is > MAX_INT/LONG the result
+      // needs to be changed from 0x80000000 to 0x7FFFFFF (requires an in memory float/double
+      // literal constant to compare against).
+      UNIMPLEMENTED(WARNING) << "inline [df]2i";
+    case Instruction::LONG_TO_DOUBLE:
+    case Instruction::FLOAT_TO_LONG:
+    case Instruction::LONG_TO_FLOAT:
+    case Instruction::DOUBLE_TO_LONG:
+      return genConversionPortable(cUnit, mir);
+    default:
+      return true;
+  }
+  if (longSrc) {
+    rlSrc = oatGetSrcWide(cUnit, mir, 0, 1);
+    rlSrc = loadValueWide(cUnit, rlSrc, kFPReg);
+    srcReg = S2D(rlSrc.lowReg, rlSrc.highReg);
+  } else {
+    rlSrc = oatGetSrc(cUnit, mir, 0);
+    rlSrc = loadValue(cUnit, rlSrc, kFPReg);
+    srcReg = rlSrc.lowReg;
+  }
+  if (longDest) {
+    rlDest = oatGetDestWide(cUnit, mir, 0, 1);
+    rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+    newLIR2(cUnit, op, S2D(rlResult.lowReg, rlResult.highReg), srcReg);
+    storeValueWide(cUnit, rlDest, rlResult);
+  } else {
+    rlDest = oatGetDest(cUnit, mir, 0);
+    rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+    newLIR2(cUnit, op, rlResult.lowReg, srcReg);
+    storeValue(cUnit, rlDest, rlResult);
+  }
+  return false;
 }
 
 static bool genCmpFP(CompilationUnit *cUnit, MIR *mir, RegLocation rlDest,
-                     RegLocation rlSrc1, RegLocation rlSrc2)
-{
-    UNIMPLEMENTED(WARNING) << "genCmpFP";
-#if 0
-    bool wide = true;
-    int offset;
-
-    switch(mir->dalvikInsn.opcode) {
-        case Instruction::CMPL_FLOAT:
-            offset = OFFSETOF_MEMBER(Thread, pCmplFloat);
-            wide = false;
-            break;
-        case Instruction::CMPG_FLOAT:
-            offset = OFFSETOF_MEMBER(Thread, pCmpgFloat);
-            wide = false;
-            break;
-        case Instruction::CMPL_DOUBLE:
-            offset = OFFSETOF_MEMBER(Thread, pCmplDouble);
-            break;
-        case Instruction::CMPG_DOUBLE:
-            offset = OFFSETOF_MEMBER(Thread, pCmpgDouble);
-            break;
-        default:
-            return true;
-    }
-    oatFlushAllRegs(cUnit);
-    oatLockCallTemps(cUnit);
-    if (wide) {
-        loadValueDirectWideFixed(cUnit, rlSrc1, rARG0, rARG1);
-        loadValueDirectWideFixed(cUnit, rlSrc2, rARG2, rARG3);
-    } else {
-        loadValueDirectFixed(cUnit, rlSrc1, rARG0);
-        loadValueDirectFixed(cUnit, rlSrc2, rARG1);
-    }
-    int rTgt = loadHelper(cUnit, offset);
-    opReg(cUnit, kOpBlx, rTgt);
-    RegLocation rlResult = oatGetReturn(cUnit);
-    storeValue(cUnit, rlDest, rlResult);
-#endif
-    return false;
+                     RegLocation rlSrc1, RegLocation rlSrc2) {
+  Instruction::Code code = mir->dalvikInsn.opcode;
+  bool single = (code == Instruction::CMPL_FLOAT) || (code == Instruction::CMPG_FLOAT);
+  bool unorderedGt = (code == Instruction::CMPG_DOUBLE) || (code == Instruction::CMPG_FLOAT);
+  int srcReg1;
+  int srcReg2;
+  if (single) {
+    rlSrc1 = oatGetSrc(cUnit, mir, 0);
+    rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg);
+    srcReg1 = rlSrc1.lowReg;
+    rlSrc2 = oatGetSrc(cUnit, mir, 0);
+    rlSrc2 = loadValue(cUnit, rlSrc1, kFPReg);
+    srcReg2 = rlSrc1.lowReg;
+  } else {
+    rlSrc1 = oatGetSrcWide(cUnit, mir, 0, 1);
+    rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg);
+    srcReg1 = S2D(rlSrc1.lowReg, rlSrc1.highReg);
+    rlSrc2 = oatGetSrcWide(cUnit, mir, 0, 1);
+    rlSrc2 = loadValueWide(cUnit, rlSrc2, kFPReg);
+    srcReg2 = S2D(rlSrc2.lowReg, rlSrc2.highReg);
+  }
+  rlDest = oatGetDest(cUnit, mir, 0);
+  RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+  opRegImm(cUnit, kOpMov, rlResult.lowReg, unorderedGt ? 1 : 0);
+  if (single) {
+    newLIR2(cUnit, kX86UcomissRR, srcReg1, srcReg2);
+  } else {
+    newLIR2(cUnit, kX86UcomisdRR, srcReg1, srcReg2);
+  }
+  LIR* branch = NULL;
+  if (unorderedGt) {
+    branch = newLIR2(cUnit, kX86Jcc, 0, kX86CondPE);
+  }
+  newLIR2(cUnit, kX86Set8R, rlResult.lowReg, kX86CondA /* above - unsigned > */);
+  newLIR2(cUnit, kX86Sbb32RI, rlResult.lowReg, 0);
+  if (unorderedGt) {
+    branch->target = newLIR0(cUnit, kPseudoTargetLabel);
+  }
+  return false;
 }
 
 } //  namespace art
diff --git a/src/compiler/codegen/x86/GenInvoke.cc b/src/compiler/codegen/x86/GenInvoke.cc
index 2f095f1..e19afdd 100644
--- a/src/compiler/codegen/x86/GenInvoke.cc
+++ b/src/compiler/codegen/x86/GenInvoke.cc
@@ -31,17 +31,13 @@
                             uint32_t methodIdx);
 /*
  * If there are any ins passed in registers that have not been promoted
- * to a callee-save register, flush them to the frame.  Perform intial
+ * to a callee-save register, flush them to the frame.  Perform initial
  * assignment of promoted arguments.
  */
 void flushIns(CompilationUnit* cUnit)
 {
-    UNIMPLEMENTED(WARNING) << "flushIns";
-#if 0
     if (cUnit->numIns == 0)
         return;
-    int firstArgReg = rARG1;
-    int lastArgReg = rARG3;
     int startVReg = cUnit->numDalvikRegisters - cUnit->numIns;
     /*
      * Arguments passed in registers should be flushed
@@ -56,17 +52,17 @@
      */
     for (int i = 0; i < cUnit->numIns; i++) {
         PromotionMap vMap = cUnit->promotionMap[startVReg + i];
-        if (i <= (lastArgReg - firstArgReg)) {
+        if (i == 0 || i == 1) {
             // If arriving in register
             if (vMap.coreLocation == kLocPhysReg) {
-                opRegCopy(cUnit, vMap.coreReg, firstArgReg + i);
+                opRegCopy(cUnit, vMap.coreReg, i == 0 ? rARG1 : rARG2);
             }
             if (vMap.fpLocation == kLocPhysReg) {
-                opRegCopy(cUnit, vMap.fpReg, firstArgReg + i);
+                opRegCopy(cUnit, vMap.fpReg, i == 0 ? rARG1 : rARG2);
             }
             // Also put a copy in memory in case we're partially promoted
             storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
-                          firstArgReg + i, kWord);
+                          i == 0 ? rARG1 : rARG2, kWord);
         } else {
             // If arriving in frame & promoted
             if (vMap.coreLocation == kLocPhysReg) {
@@ -79,7 +75,6 @@
             }
         }
     }
-#endif
 }
 
 /*
diff --git a/src/compiler/codegen/x86/X86/Factory.cc b/src/compiler/codegen/x86/X86/Factory.cc
index 9746707..c28bae9 100644
--- a/src/compiler/codegen/x86/X86/Factory.cc
+++ b/src/compiler/codegen/x86/X86/Factory.cc
@@ -25,13 +25,26 @@
  */
 
 //FIXME: restore "static" when usage uncovered
-/*static*/ int coreRegs[] = {rAX, rCX, rDX, rBX, rSP, rBP, rSI, rDI};
+/*static*/ int coreRegs[] = {
+    rAX, rCX, rDX, rBX, rSP, rBP, rSI, rDI
+#ifdef TARGET_REX_SUPPORT
+    r8, r9, r10, r11, r12, r13, r14, 15
+#endif
+};
 /*static*/ int reservedRegs[] = {rSP};
 /*static*/ int coreTemps[] = {rAX, rCX, rDX};
-/*static*/ int fpRegs[] = {fr0, fr1, fr2, fr3, fr4, fr5, fr6, fr7, fr8, fr9,
-                       fr10, fr11, fr12, fr13, fr14, fr15};
-/*static*/ int fpTemps[] = {fr0, fr1, fr2, fr3, fr4, fr5, fr6, fr7, fr8, fr9,
-                        fr10, fr11, fr12, fr13, fr14, fr15};
+/*static*/ int fpRegs[] = {
+    fr0, fr1, fr2, fr3, fr4, fr5, fr6, fr7,
+#ifdef TARGET_REX_SUPPORT
+    fr8, fr9, fr10, fr11, fr12, fr13, fr14, fr15
+#endif
+};
+/*static*/ int fpTemps[] = {
+    fr0, fr1, fr2, fr3, fr4, fr5, fr6, fr7,
+#ifdef TARGET_REX_SUPPORT
+    fr8, fr9, fr10, fr11, fr12, fr13, fr14, fr15
+#endif
+};
 
 void genBarrier(CompilationUnit *cUnit);
 void storePair(CompilationUnit *cUnit, int base, int lowReg,
@@ -45,36 +58,29 @@
 
 LIR *fpRegCopy(CompilationUnit *cUnit, int rDest, int rSrc)
 {
-    UNIMPLEMENTED(WARNING) << "fpRegCopy";
-    return NULL;
-#if 0
     int opcode;
     /* must be both DOUBLE or both not DOUBLE */
     DCHECK_EQ(DOUBLEREG(rDest),DOUBLEREG(rSrc));
     if (DOUBLEREG(rDest)) {
-        opcode = kX86Fmovd;
+        opcode = kX86MovsdRR;
     } else {
         if (SINGLEREG(rDest)) {
             if (SINGLEREG(rSrc)) {
-                opcode = kX86Fmovs;
-            } else {
-                /* note the operands are swapped for the mtc1 instr */
-                int tOpnd = rSrc;
-                rSrc = rDest;
-                rDest = tOpnd;
-                opcode = kX86Mtc1;
+                opcode = kX86MovssRR;
+            } else {  // Fpr <- Gpr
+                opcode = kX86MovdxrRR;
             }
-        } else {
+        } else {  // Gpr <- Fpr
             DCHECK(SINGLEREG(rSrc));
-            opcode = kX86Mfc1;
+            opcode = kX86MovdrxRR;
         }
     }
+    DCHECK((EncodingMap[opcode].flags & IS_BINARY_OP) != 0);
     LIR* res = rawLIR(cUnit, cUnit->currentDalvikOffset, opcode, rSrc, rDest);
     if (rDest == rSrc) {
         res->flags.isNop = true;
     }
     return res;
-#endif
 }
 
 /*
@@ -123,277 +129,186 @@
 #endif
 }
 
-LIR *opNone(CompilationUnit *cUnit, OpKind op)
-{
-    UNIMPLEMENTED(WARNING) << "opNone";
-    return NULL;
-#if 0
-    LIR *res;
-    X86OpCode opcode = kX86Nop;
-    switch (op) {
-        case kOpUncondBr:
-            opcode = kX86B;
-            break;
-        default:
-            LOG(FATAL) << "Bad case in opNone";
-    }
-    res = newLIR0(cUnit, opcode);
-    return res;
-#endif
+LIR* opBranchUnconditional(CompilationUnit *cUnit, OpKind op) {
+  CHECK_EQ(op, kOpUncondBr);
+  return newLIR1(cUnit, kX86Jmp, 0 /* offset to be patched */ );
 }
 
 LIR *loadMultiple(CompilationUnit *cUnit, int rBase, int rMask);
 
-LIR* opBranchUnconditional(CompilationUnit* cUnit, OpKind op)
-{
-    UNIMPLEMENTED(WARNING) << "opBranchUnconditional";
-    return NULL;
-}
-
+X86ConditionCode oatX86ConditionEncoding(ConditionCode cond);
 LIR* opCondBranch(CompilationUnit* cUnit, ConditionCode cc, LIR* target)
 {
-    UNIMPLEMENTED(WARNING) << "opCondBranch";
-    return NULL;
+  LIR* branch = newLIR2(cUnit, kX86Jcc, 0 /* offset to be patched */,
+                        oatX86ConditionEncoding(cc));
+  branch->target = target;
+  return branch;
 }
 
-LIR *opReg(CompilationUnit *cUnit, OpKind op, int rDestSrc)
+LIR *opReg(CompilationUnit *cUnit, OpKind op, int rDestSrc) {
+  X86OpCode opcode = kX86Bkpt;
+  switch (op) {
+    case kOpNeg: opcode = kX86Neg32R; break;
+    case kOpBlx: opcode = kX86CallR; break;
+    default:
+      LOG(FATAL) << "Bad case in opReg " << op;
+  }
+  return newLIR1(cUnit, opcode, rDestSrc);
+}
+
+LIR *opRegImm(CompilationUnit *cUnit, OpKind op, int rDestSrc1, int value) {
+  X86OpCode opcode = kX86Bkpt;
+  bool byteImm = IS_SIMM8(value);
+  switch (op) {
+    case kOpLsl: opcode = kX86Sal32RI; break;
+    case kOpLsr: opcode = kX86Shr32RI; break;
+    case kOpAsr: opcode = kX86Sar32RI; break;
+    case kOpAdd: opcode = byteImm ? kX86Add32RI8 : kX86Add32RI; break;
+    case kOpOr:  opcode = byteImm ? kX86Or32RI8  : kX86Or32RI;  break;
+    case kOpAdc: opcode = byteImm ? kX86Adc32RI8 : kX86Adc32RI; break;
+    //case kOpSbb: opcode = kX86Sbb32RI; break;
+    case kOpAnd: opcode = byteImm ? kX86And32RI8 : kX86And32RI; break;
+    case kOpSub: opcode = byteImm ? kX86Sub32RI8 : kX86Sub32RI; break;
+    case kOpXor: opcode = byteImm ? kX86Xor32RI8 : kX86Xor32RI; break;
+    case kOpCmp: opcode = byteImm ? kX86Cmp32RI8 : kX86Cmp32RI; break;
+    case kOpMov: {
+      if (value == 0) {  // turn "mov reg, 0" into "xor reg, reg"
+        opcode = kX86Xor32RR;
+        value = rDestSrc1;
+      } else {
+        opcode = kX86Mov32RI;
+      }
+      break;
+    }
+    case kOpMul:
+      opcode = byteImm ? kX86Imul32RRI8 : kX86Imul32RRI;
+      return newLIR3(cUnit, opcode, rDestSrc1, rDestSrc1, value);
+    default:
+      LOG(FATAL) << "Bad case in opRegImm " << op;
+  }
+  return newLIR2(cUnit, opcode, rDestSrc1, value);
+}
+
+LIR *opRegReg(CompilationUnit *cUnit, OpKind op, int rDestSrc1, int rSrc2)
 {
-    UNIMPLEMENTED(WARNING) << "opReg";
-    return NULL;
-#if 0
     X86OpCode opcode = kX86Nop;
     switch (op) {
-        case kOpBlx:
-            opcode = kX86Jalr;
-            break;
-        case kOpBx:
-            return newLIR1(cUnit, kX86Jr, rDestSrc);
-            break;
-        default:
-            LOG(FATAL) << "Bad case in opReg";
-    }
-    return newLIR2(cUnit, opcode, r_RA, rDestSrc);
-#endif
-}
-
-LIR *opRegRegImm(CompilationUnit *cUnit, OpKind op, int rDest,
-                     int rSrc1, int value);
-LIR *opRegImm(CompilationUnit *cUnit, OpKind op, int rDestSrc1,
-                  int value)
-{
-    UNIMPLEMENTED(WARNING) << "opRegImm";
-    return NULL;
-#if 0
-    LIR *res;
-    bool neg = (value < 0);
-    int absValue = (neg) ? -value : value;
-    bool shortForm = (absValue & 0xff) == absValue;
-    X86OpCode opcode = kX86Nop;
-    switch (op) {
-        case kOpAdd:
-            return opRegRegImm(cUnit, op, rDestSrc1, rDestSrc1, value);
-            break;
-        case kOpSub:
-            return opRegRegImm(cUnit, op, rDestSrc1, rDestSrc1, value);
-            break;
-        default:
-            LOG(FATAL) << "Bad case in opRegImm";
-            break;
-    }
-    if (shortForm)
-        res = newLIR2(cUnit, opcode, rDestSrc1, absValue);
-    else {
-        int rScratch = oatAllocTemp(cUnit);
-        res = loadConstant(cUnit, rScratch, value);
-        if (op == kOpCmp)
-            newLIR2(cUnit, opcode, rDestSrc1, rScratch);
-        else
-            newLIR3(cUnit, opcode, rDestSrc1, rDestSrc1, rScratch);
-    }
-    return res;
-#endif
-}
-
-LIR *opRegRegReg(CompilationUnit *cUnit, OpKind op, int rDest,
-                           int rSrc1, int rSrc2)
-{
-    UNIMPLEMENTED(WARNING) << "opRegRegReg";
-    return NULL;
-#if 0
-    X86OpCode opcode = kX86Nop;
-    switch (op) {
-        case kOpAdd:
-            opcode = kX86Addu;
-            break;
-        case kOpSub:
-            opcode = kX86Subu;
-            break;
-        case kOpAnd:
-            opcode = kX86And;
-            break;
-        case kOpMul:
-            opcode = kX86Mul;
-            break;
-        case kOpOr:
-            opcode = kX86Or;
-            break;
-        case kOpXor:
-            opcode = kX86Xor;
-            break;
-        case kOpLsl:
-            opcode = kX86Sllv;
-            break;
-        case kOpLsr:
-            opcode = kX86Srlv;
-            break;
-        case kOpAsr:
-            opcode = kX86Srav;
-            break;
-        case kOpAdc:
-        case kOpSbc:
-            LOG(FATAL) << "No carry bit on MIPS";
-            break;
-        default:
-            LOG(FATAL) << "bad case in opRegRegReg";
-            break;
-    }
-    return newLIR3(cUnit, opcode, rDest, rSrc1, rSrc2);
-#endif
-}
-
-LIR *opRegRegImm(CompilationUnit *cUnit, OpKind op, int rDest,
-                           int rSrc1, int value)
-{
-    UNIMPLEMENTED(WARNING) << "opRegRegImm";
-    return NULL;
-#if 0
-    LIR *res;
-    X86OpCode opcode = kX86Nop;
-    bool shortForm = true;
-
-    switch(op) {
-        case kOpAdd:
-            if (IS_SIMM16(value)) {
-                opcode = kX86Addiu;
-            }
-            else {
-                shortForm = false;
-                opcode = kX86Addu;
-            }
-            break;
-        case kOpSub:
-            if (IS_SIMM16((-value))) {
-                value = -value;
-                opcode = kX86Addiu;
-            }
-            else {
-                shortForm = false;
-                opcode = kX86Subu;
-            }
-            break;
-        case kOpLsl:
-                DCHECK(value >= 0 && value <= 31);
-                opcode = kX86Sll;
-                break;
-        case kOpLsr:
-                DCHECK(value >= 0 && value <= 31);
-                opcode = kX86Srl;
-                break;
-        case kOpAsr:
-                DCHECK(value >= 0 && value <= 31);
-                opcode = kX86Sra;
-                break;
-        case kOpAnd:
-            if (IS_UIMM16((value))) {
-                opcode = kX86Andi;
-            }
-            else {
-                shortForm = false;
-                opcode = kX86And;
-            }
-            break;
-        case kOpOr:
-            if (IS_UIMM16((value))) {
-                opcode = kX86Ori;
-            }
-            else {
-                shortForm = false;
-                opcode = kX86Or;
-            }
-            break;
-        case kOpXor:
-            if (IS_UIMM16((value))) {
-                opcode = kX86Xori;
-            }
-            else {
-                shortForm = false;
-                opcode = kX86Xor;
-            }
-            break;
-        case kOpMul:
-            shortForm = false;
-            opcode = kX86Mul;
-            break;
-        default:
-            LOG(FATAL) << "Bad case in opRegRegImm";
-            break;
-    }
-
-    if (shortForm)
-        res = newLIR3(cUnit, opcode, rDest, rSrc1, value);
-    else {
-        if (rDest != rSrc1) {
-            res = loadConstant(cUnit, rDest, value);
-            newLIR3(cUnit, opcode, rDest, rSrc1, rDest);
-        } else {
-            int rScratch = oatAllocTemp(cUnit);
-            res = loadConstant(cUnit, rScratch, value);
-            newLIR3(cUnit, opcode, rDest, rSrc1, rScratch);
-        }
-    }
-    return res;
-#endif
-}
-
-LIR *opRegReg(CompilationUnit *cUnit, OpKind op, int rDestSrc1,
-                  int rSrc2)
-{
-    UNIMPLEMENTED(WARNING) << "opRegReg";
-    return NULL;
-#if 0
-    X86OpCode opcode = kX86Nop;
-    LIR *res;
-    switch (op) {
-        case kOpMov:
-            opcode = kX86Move;
-            break;
-        case kOpMvn:
-            return newLIR3(cUnit, kX86Nor, rDestSrc1, rSrc2, r_ZERO);
-        case kOpNeg:
-            return newLIR3(cUnit, kX86Subu, rDestSrc1, r_ZERO, rSrc2);
-        case kOpAdd:
-        case kOpAnd:
-        case kOpMul:
-        case kOpOr:
-        case kOpSub:
-        case kOpXor:
-            return opRegRegReg(cUnit, op, rDestSrc1, rDestSrc1, rSrc2);
-        case kOp2Byte:
-            res = opRegRegImm(cUnit, kOpLsl, rDestSrc1, rSrc2, 24);
-            opRegRegImm(cUnit, kOpAsr, rDestSrc1, rDestSrc1, 24);
-            return res;
-        case kOp2Short:
-            res = opRegRegImm(cUnit, kOpLsl, rDestSrc1, rSrc2, 16);
-            opRegRegImm(cUnit, kOpAsr, rDestSrc1, rDestSrc1, 16);
-            return res;
-        case kOp2Char:
-             return newLIR3(cUnit, kX86Andi, rDestSrc1, rSrc2, 0xFFFF);
-        default:
-            LOG(FATAL) << "Bad case in opRegReg";
-            break;
+        // X86 unary opcodes
+      case kOpMvn:
+        opRegCopy(cUnit, rDestSrc1, rSrc2);
+        return opReg(cUnit, kOpNot, rDestSrc1);
+      case kOpNeg:
+        opRegCopy(cUnit, rDestSrc1, rSrc2);
+        return opReg(cUnit, kOpNeg, rDestSrc1);
+        // X86 binary opcodes
+      case kOpSub: opcode = kX86Sub32RR; break;
+      case kOpSbc: opcode = kX86Sbb32RR; break;
+      case kOpLsl: opcode = kX86Sal32RC; break;
+      case kOpLsr: opcode = kX86Shr32RC; break;
+      case kOpAsr: opcode = kX86Sar32RC; break;
+      case kOpMov: opcode = kX86Mov32RR; break;
+      case kOpCmp: opcode = kX86Cmp32RR; break;
+      case kOpAdd: opcode = kX86Add32RR; break;
+      case kOpAdc: opcode = kX86Adc32RR; break;
+      case kOpAnd: opcode = kX86And32RR; break;
+      case kOpOr:  opcode = kX86Or32RR; break;
+      case kOpXor: opcode = kX86Xor32RR; break;
+      case kOp2Byte: opcode = kX86Movsx8RR; break;
+      case kOp2Short: opcode = kX86Movsx16RR; break;
+      case kOp2Char: opcode = kX86Movzx16RR; break;
+      case kOpMul: opcode = kX86Imul32RR; break;
+      default:
+        LOG(FATAL) << "Bad case in opRegReg " << op;
+        break;
     }
     return newLIR2(cUnit, opcode, rDestSrc1, rSrc2);
-#endif
+}
+
+LIR* opRegMem(CompilationUnit *cUnit, OpKind op, int rDest, int rBase, int offset) {
+  X86OpCode opcode = kX86Nop;
+  switch (op) {
+      // X86 binary opcodes
+    case kOpSub: opcode = kX86Sub32RM; break;
+    case kOpMov: opcode = kX86Mov32RM; break;
+    case kOpCmp: opcode = kX86Cmp32RM; break;
+    case kOpAdd: opcode = kX86Add32RM; break;
+    case kOpAnd: opcode = kX86And32RM; break;
+    case kOpOr:  opcode = kX86Or32RM; break;
+    case kOpXor: opcode = kX86Xor32RM; break;
+    case kOp2Byte: opcode = kX86Movsx8RM; break;
+    case kOp2Short: opcode = kX86Movsx16RM; break;
+    case kOp2Char: opcode = kX86Movzx16RM; break;
+    case kOpMul:
+    default:
+      LOG(FATAL) << "Bad case in opRegReg " << op;
+      break;
+  }
+  return newLIR3(cUnit, opcode, rDest, rBase, offset);
+}
+
+LIR* opRegRegReg(CompilationUnit *cUnit, OpKind op, int rDest, int rSrc1, int rSrc2) {
+  if (rDest != rSrc1 && rDest != rSrc2) {
+    if (op == kOpAdd) { // lea special case, except can't encode rbp as base
+      if (rSrc1 == rSrc2) {
+        return opRegImm(cUnit, kOpLsl, rDest, 1);
+      } else if (rSrc1 != rBP) {
+        return newLIR5(cUnit, kX86Lea32RA, rDest, rSrc1 /* base */,
+                       rSrc2 /* index */, 0 /* scale */, 0 /* disp */);
+      } else {
+        return newLIR5(cUnit, kX86Lea32RA, rDest, rSrc2 /* base */,
+                       rSrc1 /* index */, 0 /* scale */, 0 /* disp */);
+      }
+    } else {
+      opRegCopy(cUnit, rDest, rSrc1);
+      return opRegReg(cUnit, op, rDest, rSrc2);
+    }
+  } else if (rDest == rSrc1) {
+    return opRegReg(cUnit, op, rDest, rSrc2);
+  } else {  // rDest == rSrc2
+    switch (op) {
+      case kOpSub:  // non-commutative
+        opReg(cUnit, kOpNeg, rDest);
+        op = kOpAdd;
+        break;
+      case kOpSbc:
+      case kOpLsl: case kOpLsr: case kOpAsr: case kOpRor: {
+        int tReg = oatAllocTemp(cUnit);
+        opRegCopy(cUnit, tReg, rSrc1);
+        opRegReg(cUnit, op, tReg, rSrc2);
+        LIR* res = opRegCopy(cUnit, rDest, tReg);
+        oatFreeTemp(cUnit, tReg);
+        return res;
+      }
+      case kOpAdd:  // commutative
+      case kOpOr:
+      case kOpAdc:
+      case kOpAnd:
+      case kOpXor:
+        break;
+      default:
+        LOG(FATAL) << "Bad case in opRegRegReg " << op;
+    }
+    return opRegReg(cUnit, op, rDest, rSrc1);
+  }
+}
+
+LIR* opRegRegImm(CompilationUnit *cUnit, OpKind op, int rDest, int rSrc, int value) {
+  if (op == kOpMul) {
+    X86OpCode opcode = IS_SIMM8(value) ? kX86Imul32RRI8 : kX86Imul32RRI;
+    return newLIR3(cUnit, opcode, rDest, rSrc, value);
+  }
+  if (op == kOpLsl && value >= 0 && value <= 3) { // lea shift special case
+    return newLIR5(cUnit, kX86Lea32RA, rDest, rSrc /* base */,
+                   r4sib_no_index /* index */, value /* scale */, value /* disp */);
+  }
+  if (rDest != rSrc) {
+    if (op == kOpAdd) { // lea add special case
+      return newLIR5(cUnit, kX86Lea32RA, rDest, rSrc /* base */,
+                     r4sib_no_index /* index */, 0 /* scale */, value /* disp */);
+    }
+    opRegCopy(cUnit, rDest, rSrc);
+  }
+  return opRegImm(cUnit, op, rDest, value);
 }
 
 LIR *loadConstantValueWide(CompilationUnit *cUnit, int rDestLo,
@@ -567,209 +482,175 @@
 #endif
 }
 
-LIR *loadBaseDispBody(CompilationUnit *cUnit, MIR *mir, int rBase,
-                                int displacement, int rDest, int rDestHi,
-                                OpSize size, int sReg)
-/*
- * Load value from base + displacement.  Optionally perform null check
- * on base (which must have an associated sReg and MIR).  If not
- * performing null check, incoming MIR can be null. IMPORTANT: this
- * code must not allocate any new temps.  If a new register is needed
- * and base and dest are the same, spill some other register to
- * rlp and then restore.
- */
-{
-    UNIMPLEMENTED(WARNING) << "loadBaseDispBody";
-    return NULL;
-#if 0
-    LIR *res;
-    LIR *load = NULL;
-    LIR *load2 = NULL;
-    X86OpCode opcode = kX86Nop;
-    bool shortForm = IS_SIMM16(displacement);
-    bool pair = false;
-
-    switch (size) {
-        case kLong:
-        case kDouble:
-            pair = true;
-            opcode = kX86Lw;
-            if (FPREG(rDest)) {
-                opcode = kX86Flwc1;
-                if (DOUBLEREG(rDest)) {
-                    rDest = rDest - FP_DOUBLE;
-                } else {
-                    DCHECK(FPREG(rDestHi));
-                    DCHECK(rDest == (rDestHi - 1));
-                }
-                rDestHi = rDest + 1;
-            }
-            shortForm = IS_SIMM16_2WORD(displacement);
-            DCHECK_EQ((displacement & 0x3), 0);
-            break;
-        case kWord:
-        case kSingle:
-            opcode = kX86Lw;
-            if (FPREG(rDest)) {
-                opcode = kX86Flwc1;
-                DCHECK(SINGLEREG(rDest));
-            }
-            DCHECK_EQ((displacement & 0x3), 0);
-            break;
-        case kUnsignedHalf:
-            opcode = kX86Lhu;
-            DCHECK_EQ((displacement & 0x1), 0);
-            break;
-        case kSignedHalf:
-            opcode = kX86Lh;
-            DCHECK_EQ((displacement & 0x1), 0);
-            break;
-        case kUnsignedByte:
-            opcode = kX86Lbu;
-            break;
-        case kSignedByte:
-            opcode = kX86Lb;
-            break;
-        default:
-            LOG(FATAL) << "Bad case in loadBaseIndexedBody";
-    }
-
-    if (shortForm) {
-        if (!pair) {
-            load = res = newLIR3(cUnit, opcode, rDest, displacement, rBase);
+LIR* loadBaseIndexedDisp(CompilationUnit *cUnit, MIR *mir,
+                         int rBase, int rIndex, int scale, int displacement,
+                         int rDest, int rDestHi,
+                         OpSize size, int sReg) {
+  LIR *load = NULL;
+  LIR *load2 = NULL;
+  bool isArray = rIndex != INVALID_REG;
+  bool pair = false;
+  bool is64bit = false;
+  X86OpCode opcode = kX86Nop;
+  switch (size) {
+    case kLong:
+    case kDouble:
+      is64bit = true;
+      if (FPREG(rDest)) {
+        opcode = isArray ? kX86MovsdRA : kX86MovsdRM;
+        if (DOUBLEREG(rDest)) {
+          rDest = rDest - FP_DOUBLE;
         } else {
-            load = res = newLIR3(cUnit, opcode, rDest, displacement + LOWORD_OFFSET, rBase);
-            load2 = newLIR3(cUnit, opcode, rDestHi, displacement + HIWORD_OFFSET, rBase);
+          DCHECK(FPREG(rDestHi));
+          DCHECK(rDest == (rDestHi - 1));
         }
+        rDestHi = rDest + 1;
+      } else {
+        pair = true;
+        opcode = isArray ? kX86Mov32RA  : kX86Mov32RM;
+      }
+      // TODO: double store is to unaligned address
+      DCHECK_EQ((displacement & 0x3), 0);
+      break;
+    case kWord:
+    case kSingle:
+      opcode = isArray ? kX86Mov32RA : kX86Mov32RM;
+      if (FPREG(rDest)) {
+        opcode = isArray ? kX86MovssRA : kX86MovssRM;
+        DCHECK(SINGLEREG(rDest));
+      }
+      DCHECK_EQ((displacement & 0x3), 0);
+      break;
+    case kUnsignedHalf:
+      opcode = isArray ? kX86Movzx16RA : kX86Movzx16RM;
+      DCHECK_EQ((displacement & 0x1), 0);
+      break;
+    case kSignedHalf:
+      opcode = isArray ? kX86Movsx16RA : kX86Movsx16RM;
+      DCHECK_EQ((displacement & 0x1), 0);
+      break;
+    case kUnsignedByte:
+      opcode = isArray ? kX86Movzx8RA : kX86Movzx8RM;
+      break;
+    case kSignedByte:
+      opcode = isArray ? kX86Movsx8RA : kX86Movsx8RM;
+      break;
+    default:
+      LOG(FATAL) << "Bad case in loadBaseIndexedDispBody";
+  }
+
+  if (!isArray) {
+    if (!pair) {
+      load = newLIR3(cUnit, opcode, rDest, rBase, displacement + LOWORD_OFFSET);
     } else {
-        if (pair) {
-            int rTmp = oatAllocFreeTemp(cUnit);
-            res = opRegRegImm(cUnit, kOpAdd, rTmp, rBase, displacement);
-            load = newLIR3(cUnit, opcode, rDest, LOWORD_OFFSET, rTmp);
-            load2 = newLIR3(cUnit, opcode, rDestHi, HIWORD_OFFSET, rTmp);
-            oatFreeTemp(cUnit, rTmp);
-        } else {
-            int rTmp = (rBase == rDest) ? oatAllocFreeTemp(cUnit)
-                                        : rDest;
-            res = loadConstant(cUnit, rTmp, displacement);
-            load = newLIR3(cUnit, opcode, rDest, rBase, rTmp);
-            if (rTmp != rDest)
-                oatFreeTemp(cUnit, rTmp);
-        }
+      load = newLIR3(cUnit, opcode, rDest, rBase, displacement + LOWORD_OFFSET);
+      load2 = newLIR3(cUnit, opcode, rDestHi, rBase, displacement + HIWORD_OFFSET);
     }
-
     if (rBase == rSP) {
-        if (load != NULL)
-            annotateDalvikRegAccess(load, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2,
-                                    true /* isLoad */);
-        if (load2 != NULL)
-            annotateDalvikRegAccess(load2, (displacement + HIWORD_OFFSET) >> 2,
-                                    true /* isLoad */);
+      annotateDalvikRegAccess(load, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2,
+                              true /* isLoad */, is64bit);
+      if (is64bit) {
+        annotateDalvikRegAccess(load2, (displacement + HIWORD_OFFSET) >> 2,
+                                true /* isLoad */, is64bit);
+      }
     }
-    return load;
-#endif
+  } else {
+    if (!pair) {
+      load = newLIR5(cUnit, opcode, rDest, rBase, rIndex, scale, displacement + LOWORD_OFFSET);
+    } else {
+      load = newLIR5(cUnit, opcode, rDest, rBase, rIndex, scale, displacement + LOWORD_OFFSET);
+      load2 = newLIR5(cUnit, opcode, rDestHi, rBase, rIndex, scale, displacement + HIWORD_OFFSET);
+    }
+  }
+
+  return load;
 }
 
-LIR *loadBaseDisp(CompilationUnit *cUnit, MIR *mir, int rBase,
-                            int displacement, int rDest, OpSize size,
-                            int sReg)
-{
-    return loadBaseDispBody(cUnit, mir, rBase, displacement, rDest, -1,
-                            size, sReg);
+LIR *loadBaseDisp(CompilationUnit *cUnit, MIR *mir,
+                  int rBase, int displacement,
+                  int rDest,
+                  OpSize size, int sReg) {
+  return loadBaseIndexedDisp(cUnit, mir, rBase, INVALID_REG, 0, displacement,
+                             rDest, INVALID_REG, size, sReg);
 }
 
-LIR *loadBaseDispWide(CompilationUnit *cUnit, MIR *mir, int rBase,
-                          int displacement, int rDestLo, int rDestHi,
-                                int sReg)
-{
-    return loadBaseDispBody(cUnit, mir, rBase, displacement, rDestLo, rDestHi,
-                            kLong, sReg);
+LIR *loadBaseDispWide(CompilationUnit *cUnit, MIR *mir,
+                      int rBase, int displacement,
+                      int rDestLo, int rDestHi,
+                      int sReg) {
+  return loadBaseIndexedDisp(cUnit, mir, rBase, INVALID_REG, 0, displacement,
+                             rDestLo, rDestHi, kLong, sReg);
 }
 
 LIR *storeBaseDispBody(CompilationUnit *cUnit, int rBase,
-                                 int displacement, int rSrc, int rSrcHi,
-                                 OpSize size)
+                       int displacement, int rSrc, int rSrcHi,
+                       OpSize size)
 {
-    UNIMPLEMENTED(WARNING) << "storeBaseDispBody";
-    return NULL;
-#if 0
-    LIR *res;
-    LIR *store = NULL;
-    LIR *store2 = NULL;
-    X86OpCode opcode = kX86Nop;
-    bool shortForm = IS_SIMM16(displacement);
-    bool pair = false;
-
-    switch (size) {
-        case kLong:
-        case kDouble:
-            pair = true;
-            opcode = kX86Sw;
-            if (FPREG(rSrc)) {
-                opcode = kX86Fswc1;
-                if (DOUBLEREG(rSrc)) {
-                    rSrc = rSrc - FP_DOUBLE;
-                } else {
-                    DCHECK(FPREG(rSrcHi));
-                    DCHECK_EQ(rSrc, (rSrcHi - 1));
-                }
-                rSrcHi = rSrc + 1;
-            }
-            shortForm = IS_SIMM16_2WORD(displacement);
-            DCHECK_EQ((displacement & 0x3), 0);
-            break;
-        case kWord:
-        case kSingle:
-            opcode = kX86Sw;
-            if (FPREG(rSrc)) {
-                opcode = kX86Fswc1;
-                DCHECK(SINGLEREG(rSrc));
-            }
-            DCHECK_EQ((displacement & 0x3), 0);
-            break;
-        case kUnsignedHalf:
-        case kSignedHalf:
-            opcode = kX86Sh;
-            DCHECK_EQ((displacement & 0x1), 0);
-            break;
-        case kUnsignedByte:
-        case kSignedByte:
-            opcode = kX86Sb;
-            break;
-        default:
-            LOG(FATAL) << "Bad case in storeBaseIndexedBody";
-    }
-
-    if (shortForm) {
-        if (!pair) {
-            store = res = newLIR3(cUnit, opcode, rSrc, displacement, rBase);
+  LIR *res = NULL;
+  LIR *store = NULL;
+  LIR *store2 = NULL;
+  X86OpCode opcode = kX86Bkpt;
+  bool pair = false;
+  bool is64bit = false;
+  switch (size) {
+    case kLong:
+    case kDouble:
+      is64bit = true;
+      if (FPREG(rSrc)) {
+        pair = false;
+        opcode = kX86MovsdMR;
+        if (DOUBLEREG(rSrc)) {
+          rSrc = rSrc - FP_DOUBLE;
         } else {
-            store = res = newLIR3(cUnit, opcode, rSrc, displacement + LOWORD_OFFSET, rBase);
-            store2 = newLIR3(cUnit, opcode, rSrcHi, displacement + HIWORD_OFFSET, rBase);
+          DCHECK(FPREG(rSrcHi));
+          DCHECK_EQ(rSrc, (rSrcHi - 1));
         }
-    } else {
-        int rScratch = oatAllocTemp(cUnit);
-        res = opRegRegImm(cUnit, kOpAdd, rScratch, rBase, displacement);
-        if (!pair) {
-            store =  newLIR3(cUnit, opcode, rSrc, 0, rScratch);
-        } else {
-            store =  newLIR3(cUnit, opcode, rSrc, LOWORD_OFFSET, rScratch);
-            store2 = newLIR3(cUnit, opcode, rSrcHi, HIWORD_OFFSET, rScratch);
-        }
-        oatFreeTemp(cUnit, rScratch);
-    }
+        rSrcHi = rSrc + 1;
+      } else {
+        pair = true;
+        opcode = kX86Mov32MR;
+      }
+      // TODO: double store is to unaligned address
+      DCHECK_EQ((displacement & 0x3), 0);
+      break;
+    case kWord:
+    case kSingle:
+      opcode = kX86Mov32MR;
+      if (FPREG(rSrc)) {
+        opcode = kX86MovssMR;
+        DCHECK(SINGLEREG(rSrc));
+      }
+      DCHECK_EQ((displacement & 0x3), 0);
+      break;
+    case kUnsignedHalf:
+    case kSignedHalf:
+      opcode = kX86Mov16MR;
+      DCHECK_EQ((displacement & 0x1), 0);
+      break;
+    case kUnsignedByte:
+    case kSignedByte:
+      opcode = kX86Mov8MR;
+      break;
+    default:
+      LOG(FATAL) << "Bad case in storeBaseIndexedBody";
+  }
 
-    if (rBase == rSP) {
-        if (store != NULL)
-            annotateDalvikRegAccess(store, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2,
-                                    false /* isLoad */);
-        if (store2 != NULL)
-            annotateDalvikRegAccess(store2, (displacement + HIWORD_OFFSET) >> 2,
-                                    false /* isLoad */);
-    }
+  if (!pair) {
+    store = res = newLIR3(cUnit, opcode, rBase, displacement, rSrc);
+  } else {
+    store = res = newLIR3(cUnit, opcode, rBase, displacement + LOWORD_OFFSET, rSrc);
+    store2 = newLIR3(cUnit, opcode, rBase, displacement + HIWORD_OFFSET, rSrcHi);
+  }
 
-    return res;
-#endif
+  if (rBase == rSP) {
+    annotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
+                            false /* isLoad */, is64bit);
+    if (pair) {
+      annotateDalvikRegAccess(store2, (displacement + HIWORD_OFFSET) >> 2,
+                              false /* isLoad */, is64bit);
+    }
+  }
+  return res;
 }
 
 LIR *storeBaseDisp(CompilationUnit *cUnit, int rBase,
diff --git a/src/compiler/codegen/x86/X86/Gen.cc b/src/compiler/codegen/x86/X86/Gen.cc
index 152574a..f3b8aa0 100644
--- a/src/compiler/codegen/x86/X86/Gen.cc
+++ b/src/compiler/codegen/x86/X86/Gen.cc
@@ -25,6 +25,21 @@
 namespace art {
 
 /*
+ * Perform register memory operation.
+ */
+LIR* genRegMemCheck(CompilationUnit* cUnit, ConditionCode cCode,
+                    int reg1, int base, int offset, MIR* mir, ThrowKind kind)
+{
+    LIR* tgt = rawLIR(cUnit, 0, kPseudoThrowTarget, kind,
+                      mir ? mir->offset : 0, reg1, base, offset);
+    opRegMem(cUnit, kOpCmp, reg1, base, offset);
+    LIR* branch = opCondBranch(cUnit, cCode, tgt);
+    // Remember branch target - will process later
+    oatInsertGrowableList(cUnit, &cUnit->throwLaunchpads, (intptr_t)tgt);
+    return branch;
+}
+
+/*
  * The lack of pc-relative loads on X86 presents somewhat of a challenge
  * for our PIC switch table strategy.  To materialize the current location
  * we'll do a dummy JAL and reference our tables using r_RA as the
@@ -365,130 +380,58 @@
 #endif
 }
 
-LIR* opCmpBranch(CompilationUnit* cUnit, ConditionCode cond, int src1,
-                 int src2, LIR* target)
+X86ConditionCode oatX86ConditionEncoding(ConditionCode cond) {
+  switch(cond) {
+    case kCondEq: return kX86CondEq;
+    case kCondNe: return kX86CondNe;
+    case kCondCs: return kX86CondC;
+    case kCondCc: return kX86CondNc;
+    case kCondMi: return kX86CondS;
+    case kCondPl: return kX86CondNs;
+    case kCondVs: return kX86CondO;
+    case kCondVc: return kX86CondNo;
+    case kCondHi: return kX86CondA;
+    case kCondLs: return kX86CondBe;
+    case kCondGe: return kX86CondGe;
+    case kCondLt: return kX86CondL;
+    case kCondGt: return kX86CondG;
+    case kCondLe: return kX86CondLe;
+    case kCondAl:
+    case kCondNv: LOG(FATAL) << "Should not reach here";
+  }
+  return kX86CondO;
+}
+
+LIR* opCmpBranch(CompilationUnit* cUnit, ConditionCode cond, int src1, int src2, LIR* target)
 {
-    UNIMPLEMENTED(WARNING) << "opCmpBranch";
-    return NULL;
-#if 0
-    LIR* branch;
-    X86OpCode sltOp;
-    X86OpCode brOp;
-    bool cmpZero = false;
-    bool swapped = false;
-    switch(cond) {
-        case kCondEq:
-            brOp = kX86Beq;
-            cmpZero = true;
-            break;
-        case kCondNe:
-            brOp = kX86Bne;
-            cmpZero = true;
-            break;
-        case kCondCc:
-            sltOp = kX86Sltu;
-            brOp = kX86Bnez;
-            break;
-        case kCondCs:
-            sltOp = kX86Sltu;
-            brOp = kX86Beqz;
-            break;
-        case kCondGe:
-            sltOp = kX86Slt;
-            brOp = kX86Beqz;
-            break;
-        case kCondGt:
-            sltOp = kX86Slt;
-            brOp = kX86Bnez;
-            swapped = true;
-            break;
-        case kCondLe:
-            sltOp = kX86Slt;
-            brOp = kX86Beqz;
-            swapped = true;
-            break;
-        case kCondLt:
-            sltOp = kX86Slt;
-            brOp = kX86Bnez;
-            break;
-        case kCondHi:  // Gtu
-            sltOp = kX86Sltu;
-            brOp = kX86Bnez;
-            swapped = true;
-            break;
-        default:
-            LOG(FATAL) << "No support for ConditionCode: " << (int) cond;
-            return NULL;
-    }
-    if (cmpZero) {
-        branch = newLIR2(cUnit, brOp, src1, src2);
-    } else {
-        int tReg = oatAllocTemp(cUnit);
-        if (swapped) {
-            newLIR3(cUnit, sltOp, tReg, src2, src1);
-        } else {
-            newLIR3(cUnit, sltOp, tReg, src1, src2);
-        }
-        branch = newLIR1(cUnit, brOp, tReg);
-        oatFreeTemp(cUnit, tReg);
-    }
-    branch->target = target;
-    return branch;
-#endif
+  newLIR2(cUnit, kX86Cmp32RR, src1, src2);
+  X86ConditionCode cc = oatX86ConditionEncoding(cond);
+  LIR* branch = newLIR2(cUnit, kX86Jcc, 0 /* lir operand for Jcc offset */ , cc);
+  branch->target = target;
+  return branch;
 }
 
 LIR* opCmpImmBranch(CompilationUnit* cUnit, ConditionCode cond, int reg,
                     int checkValue, LIR* target)
 {
-    UNIMPLEMENTED(WARNING) << "opCmpImmBranch";
-    return NULL;
-#if 0
-    LIR* branch;
-    if (checkValue != 0) {
-        // TUNING: handle s16 & kCondLt/Mi case using slti
-        int tReg = oatAllocTemp(cUnit);
-        loadConstant(cUnit, tReg, checkValue);
-        branch = opCmpBranch(cUnit, cond, reg, tReg, target);
-        oatFreeTemp(cUnit, tReg);
-        return branch;
-    }
-    X86OpCode opc;
-    switch(cond) {
-        case kCondEq: opc = kX86Beqz; break;
-        case kCondGe: opc = kX86Bgez; break;
-        case kCondGt: opc = kX86Bgtz; break;
-        case kCondLe: opc = kX86Blez; break;
-        //case KCondMi:
-        case kCondLt: opc = kX86Bltz; break;
-        case kCondNe: opc = kX86Bnez; break;
-        default:
-            // Tuning: use slti when applicable
-            int tReg = oatAllocTemp(cUnit);
-            loadConstant(cUnit, tReg, checkValue);
-            branch = opCmpBranch(cUnit, cond, reg, tReg, target);
-            oatFreeTemp(cUnit, tReg);
-            return branch;
-    }
-    branch = newLIR1(cUnit, opc, reg);
-    branch->target = target;
-    return branch;
-#endif
+  // TODO: when checkValue == 0 and reg is rCX, use the jcxz/nz opcode
+  newLIR2(cUnit, kX86Cmp32RI, reg, checkValue);
+  X86ConditionCode cc = oatX86ConditionEncoding(cond);
+  LIR* branch = newLIR2(cUnit, kX86Jcc, 0 /* lir operand for Jcc offset */ , cc);
+  branch->target = target;
+  return branch;
 }
 
 LIR* opRegCopyNoInsert(CompilationUnit *cUnit, int rDest, int rSrc)
 {
-    UNIMPLEMENTED(WARNING) << "opRegCopyNoInsert";
-    return NULL;
-#if 0
     if (FPREG(rDest) || FPREG(rSrc))
         return fpRegCopy(cUnit, rDest, rSrc);
-    LIR* res = rawLIR(cUnit, cUnit->currentDalvikOffset, kX86Move,
+    LIR* res = rawLIR(cUnit, cUnit->currentDalvikOffset, kX86Mov32RR,
                       rDest, rSrc);
     if (rDest == rSrc) {
         res->flags.isNop = true;
     }
     return res;
-#endif
 }
 
 LIR* opRegCopy(CompilationUnit *cUnit, int rDest, int rSrc)
diff --git a/src/compiler/codegen/x86/X86/Ralloc.cc b/src/compiler/codegen/x86/X86/Ralloc.cc
index 86625c6..c7985e5 100644
--- a/src/compiler/codegen/x86/X86/Ralloc.cc
+++ b/src/compiler/codegen/x86/X86/Ralloc.cc
@@ -63,8 +63,6 @@
 
 void oatInitializeRegAlloc(CompilationUnit* cUnit)
 {
-    UNIMPLEMENTED(WARNING) << "oatInitializeRegAlloc";
-#if 0
     int numRegs = sizeof(coreRegs)/sizeof(*coreRegs);
     int numReserved = sizeof(reservedRegs)/sizeof(*reservedRegs);
     int numTemps = sizeof(coreTemps)/sizeof(*coreTemps);
@@ -85,11 +83,6 @@
     oatInitPool(pool->FPRegs, fpRegs, pool->numFPRegs);
     // Keep special registers from being allocated
     for (int i = 0; i < numReserved; i++) {
-        if (NO_SUSPEND && !cUnit->genDebugger &&
-            (reservedRegs[i] == rSUSPEND)) {
-            //To measure cost of suspend check
-            continue;
-        }
         oatMarkInUse(cUnit, reservedRegs[i]);
     }
     // Mark temp regs - all others not in use can be used for promotion
@@ -116,7 +109,6 @@
            }
         }
     }
-#endif
 }
 
 void freeRegLocTemps(CompilationUnit* cUnit, RegLocation rlKeep,
diff --git a/src/compiler/codegen/x86/X86LIR.h b/src/compiler/codegen/x86/X86LIR.h
index 31449e0..65f07cf 100644
--- a/src/compiler/codegen/x86/X86LIR.h
+++ b/src/compiler/codegen/x86/X86LIR.h
@@ -118,6 +118,7 @@
 #define LOWREG(x) ((x & 0x1f) == x)
 #define DOUBLEREG(x) ((x & FP_DOUBLE) == FP_DOUBLE)
 #define SINGLEREG(x) (FPREG(x) && !DOUBLEREG(x))
+
 /*
  * Note: the low register of a floating point pair is sufficient to
  * create the name of a double, but require both names to be passed to
@@ -133,10 +134,9 @@
 /* non-existant physical register */
 #define rNone   (-1)
 
-/* RegisterLocation templates return values (r0, or r0/r1) */
-#define LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 1, rAX, INVALID_REG,\
-                      INVALID_SREG}
-#define LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 1, rAX, rDX, INVALID_SREG}
+/* RegisterLocation templates return values (rAX, or rAX/rDX) */
+#define LOC_C_RETURN      {kLocPhysReg, 0, 0, 0, 0, 0, 1, rAX, INVALID_REG, INVALID_SREG}
+#define LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 1, rAX, rDX,         INVALID_SREG}
 
 enum ResourceEncodingPos {
     kGPReg0     = 0,
@@ -177,15 +177,16 @@
 
 enum NativeRegisterPool {
   r0     = 0,
-  rAX   = r0,
+  rAX    = r0,
   r1     = 1,
-  rCX   = r1,
+  rCX    = r1,
   r2     = 2,
   rDX    = r2,
   r3     = 3,
   rBX    = r3,
   r4sp   = 4,
-  rSP    =r4sp,
+  rSP    = r4sp,
+  r4sib_no_index = r4sp,
   r5     = 5,
   rBP    = r5,
   r6     = 6,
@@ -230,6 +231,53 @@
 
 #define isPseudoOpcode(opCode) ((int)(opCode) < 0)
 
+/* X86 condition encodings */
+enum X86ConditionCode {
+    kX86CondO   = 0x0,        // overflow
+    kX86CondNo  = 0x1,        // not overflow
+
+    kX86CondB   = 0x2,        // below
+    kX86CondNae = kX86CondB,  // not-above-equal
+    kX86CondC   = kX86CondB,  // carry
+
+    kX86CondNb  = 0x3,        // not-below
+    kX86CondAe  = kX86CondNb, // above-equal
+    kX86CondNc  = kX86CondNb, // not-carry
+
+    kX86CondZ   = 0x4,        // zero
+    kX86CondEq  = kX86CondZ,  // equal
+
+    kX86CondNz  = 0x5,        // not-zero
+    kX86CondNe  = kX86CondNz, // not-equal
+
+    kX86CondBe  = 0x6,        // below-equal
+    kX86CondNa  = kX86CondBe, // not-above
+
+    kX86CondNbe = 0x7,        // not-below-equal
+    kX86CondA   = kX86CondNbe,// above
+
+    kX86CondS   = 0x8,        // sign
+    kX86CondNs  = 0x9,        // not-sign
+
+    kX86CondP   = 0xA,        // 8-bit parity even
+    kX86CondPE  = kX86CondP,
+
+    kX86CondNp  = 0xB,        // 8-bit parity odd
+    kX86CondPo  = kX86CondNp,
+
+    kX86CondL   = 0xC,        // less-than
+    kX86CondNge = kX86CondL,  // not-greater-equal
+
+    kX86CondNl  = 0xD,        // not-less-than
+    kX86CondGe  = kX86CondL,  // not-greater-equal
+
+    kX86CondLe  = 0xE,        // less-than-equal
+    kX86CondNg  = kX86CondLe, // not-greater
+
+    kX86CondNle = 0xF,        // not-less-than
+    kX86CondG   = kX86CondNle,// greater
+};
+
 /*
  * The following enum defines the list of supported X86 instructions by the
  * assembler. Their corresponding EncodingMap positions will be defined in
@@ -253,78 +301,182 @@
     kPseudoNormalBlockLabel = -1,
     kX86First,
     kX8632BitData = kX86First, /* data [31..0] */
+    kX86Bkpt,
+    kX86Nop,
     // Define groups of binary operations
-    // RI - Register Immediate - opcode reg, #immediate
-    //                         - lir operands - 0: reg, 1: immediate
-    // MI - Memory Immediate   - opcode [base + disp], #immediate
-    //                         - lir operands - 0: base, 1: disp, 2: immediate
-    // AI - Array Immediate    - opcode [base + index * scale + disp], #immediate
-    //                         - lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
+    // MR - Memory Register    - opcode [base + disp], reg
+    //                         - lir operands - 0: base, 1: disp, 2: reg
+    // AR - Array Register     - opcode [base + index * scale + disp], reg
+    //                         - lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
+    // TR - Thread Register    - opcode fs:[disp], reg - where fs: is equal to Thread::Current()
+    //                         - lir operands - 0: disp, 1: reg
     // RR - Register Register  - opcode reg1, reg2
     //                         - lir operands - 0: reg1, 1: reg2
     // RM - Register Memory    - opcode reg, [base + disp]
     //                         - lir operands - 0: reg, 1: base, 2: disp
     // RA - Register Array     - opcode reg, [base + index * scale + disp]
     //                         - lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
-    // MR - Memory Register    - opcode [base + disp], reg
-    //                         - lir operands - 0: base, 1: disp, 2: reg
-    // AR - Array Register     - opcode [base + index * scale + disp], reg
-    //                         - lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
+    // RT - Register Thread    - opcode reg, fs:[disp] - where fs: is equal to Thread::Current()
+    //                         - lir operands - 0: reg, 1: disp
+    // RI - Register Immediate - opcode reg, #immediate
+    //                         - lir operands - 0: reg, 1: immediate
+    // MI - Memory Immediate   - opcode [base + disp], #immediate
+    //                         - lir operands - 0: base, 1: disp, 2: immediate
+    // AI - Array Immediate    - opcode [base + index * scale + disp], #immediate
+    //                         - lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
+    // TI - Thread Register    - opcode fs:[disp], imm - where fs: is equal to Thread::Current()
+    //                         - lir operands - 0: disp, 1: imm
 #define BinaryOpCode(opcode) \
-  opcode ## RI, opcode ## MI, opcode ##AI, \
-  opcode ## RR, opcode ## RM, opcode ##RA, \
-  opcode ## MR, opcode ## AR
-    BinaryOpCode(kOpAdd),
-    BinaryOpCode(kOpOr),
-    BinaryOpCode(kOpAdc),
-    BinaryOpCode(kOpSbb),
-    BinaryOpCode(kOpAnd),
-    BinaryOpCode(kOpSub),
-    BinaryOpCode(kOpXor),
-    BinaryOpCode(kOpCmp),
-    BinaryOpCode(kOpMov),
+  opcode ## 8MR, opcode ## 8AR, opcode ## 8TR, \
+  opcode ## 8RR, opcode ## 8RM, opcode ## 8RA, opcode ## 8RT, \
+  opcode ## 8RI, opcode ## 8MI, opcode ## 8AI, opcode ## 8TI, \
+  opcode ## 16MR, opcode ## 16AR, opcode ## 16TR, \
+  opcode ## 16RR, opcode ## 16RM, opcode ## 16RA, opcode ## 16RT, \
+  opcode ## 16RI, opcode ## 16MI, opcode ## 16AI, opcode ## 16TI, \
+  opcode ## 16RI8, opcode ## 16MI8, opcode ## 16AI8, opcode ## 16TI8, \
+  opcode ## 32MR, opcode ## 32AR, opcode ## 32TR,  \
+  opcode ## 32RR, opcode ## 32RM, opcode ## 32RA, opcode ## 32RT, \
+  opcode ## 32RI, opcode ## 32MI, opcode ## 32AI, opcode ## 32TI, \
+  opcode ## 32RI8, opcode ## 32MI8, opcode ## 32AI8, opcode ## 32TI8
+    BinaryOpCode(kX86Add),
+    BinaryOpCode(kX86Or),
+    BinaryOpCode(kX86Adc),
+    BinaryOpCode(kX86Sbb),
+    BinaryOpCode(kX86And),
+    BinaryOpCode(kX86Sub),
+    BinaryOpCode(kX86Xor),
+    BinaryOpCode(kX86Cmp),
 #undef BinaryOpCode
+    kX86Imul16RRI, kX86Imul16RMI, kX86Imul16RAI,
+    kX86Imul32RRI, kX86Imul32RMI, kX86Imul32RAI,
+    kX86Imul32RRI8, kX86Imul32RMI8, kX86Imul32RAI8,
+    kX86Mov8MR, kX86Mov8AR, kX86Mov8TR,
+    kX86Mov8RR, kX86Mov8RM, kX86Mov8RA, kX86Mov8RT,
+    kX86Mov8RI, kX86Mov8MI, kX86Mov8AI, kX86Mov8TI,
+    kX86Mov16MR, kX86Mov16AR, kX86Mov16TR,
+    kX86Mov16RR, kX86Mov16RM, kX86Mov16RA, kX86Mov16RT,
+    kX86Mov16RI, kX86Mov16MI, kX86Mov16AI, kX86Mov16TI,
+    kX86Mov32MR, kX86Mov32AR, kX86Mov32TR,
+    kX86Mov32RR, kX86Mov32RM, kX86Mov32RA, kX86Mov32RT,
+    kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI,
+    kX86Lea32RA,
+    // RC - Register CL - opcode reg, CL
+    //                  - lir operands - 0: reg, 1: CL
+    // MC - Memory CL   - opcode [base + disp], CL
+    //                  - lir operands - 0: base, 1: disp, 2: CL
+    // AC - Array CL    - opcode [base + index * scale + disp], CL
+    //                  - lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: CL
+#define BinaryShiftOpCode(opcode) \
+    opcode ## 8RI, opcode ## 8MI, opcode ## 8AI, \
+    opcode ## 8RC, opcode ## 8MC, opcode ## 8AC, \
+    opcode ## 16RI, opcode ## 16MI, opcode ## 16AI, \
+    opcode ## 16RC, opcode ## 16MC, opcode ## 16AC, \
+    opcode ## 32RI, opcode ## 32MI, opcode ## 32AI, \
+    opcode ## 32RC, opcode ## 32MC, opcode ## 32AC
+    BinaryShiftOpCode(kX86Rol),
+    BinaryShiftOpCode(kX86Ror),
+    BinaryShiftOpCode(kX86Rcl),
+    BinaryShiftOpCode(kX86Rcr),
+    BinaryShiftOpCode(kX86Sal),
+    BinaryShiftOpCode(kX86Shl),
+    BinaryShiftOpCode(kX86Shr),
+    BinaryShiftOpCode(kX86Sar),
+#undef BinaryShiftOpcode
+#define UnaryOpcode(opcode, reg, mem, array) \
+    opcode ## 8 ## reg, opcode ## 8 ## mem, opcode ## 8 ## array, \
+    opcode ## 16 ## reg, opcode ## 16 ## mem, opcode ## 16 ## array, \
+    opcode ## 32 ## reg, opcode ## 32 ## mem, opcode ## 32 ## array
+    UnaryOpcode(kX86Test, RI, MI, AI),
+    UnaryOpcode(kX86Not, R, M, A),
+    UnaryOpcode(kX86Neg, R, M, A),
+    UnaryOpcode(kX86Mul,  DaR, DaM, DaA),
+    UnaryOpcode(kX86Imul, DaR, DaM, DaA),
+    UnaryOpcode(kX86Divmod,  DaR, DaM, DaA),
+    UnaryOpcode(kX86Idivmod, DaR, DaM, DaA),
+#undef UnaryOpcode
+#define Binary0fOpCode(opcode) \
+  opcode ## RR, opcode ## RM, opcode ## RA
+    Binary0fOpCode(kX86Movsd),
+    kX86MovsdMR,
+    kX86MovsdAR,
+    Binary0fOpCode(kX86Movss),
+    kX86MovssMR,
+    kX86MovssAR,
+    Binary0fOpCode(kX86Cvtsi2sd),   // int to double
+    Binary0fOpCode(kX86Cvtsi2ss),   // int to float
+    Binary0fOpCode(kX86Cvttsd2si),  // truncating double to int
+    Binary0fOpCode(kX86Cvttss2si),  // truncating float to int
+    Binary0fOpCode(kX86Cvtsd2si),   // rounding double to int
+    Binary0fOpCode(kX86Cvtss2si),   // rounding float to int
+    Binary0fOpCode(kX86Ucomisd),    // unordered double compare
+    Binary0fOpCode(kX86Ucomiss),    // unordered float compare
+    Binary0fOpCode(kX86Comisd),     // double compare
+    Binary0fOpCode(kX86Comiss),     // float compare
+    Binary0fOpCode(kX86Addsd),      // double add
+    Binary0fOpCode(kX86Addss),      // float add
+    Binary0fOpCode(kX86Mulsd),      // double multiply
+    Binary0fOpCode(kX86Mulss),      // float multiply
+    Binary0fOpCode(kX86Cvtss2sd),   // float to double
+    Binary0fOpCode(kX86Cvtsd2ss),   // double to float
+    Binary0fOpCode(kX86Subsd),      // double subtract
+    Binary0fOpCode(kX86Subss),      // float subtract
+    Binary0fOpCode(kX86Divsd),      // double subtract
+    Binary0fOpCode(kX86Divss),      // float subtract
+    Binary0fOpCode(kX86Movdxr),     // move into xmm from gpr
+    Binary0fOpCode(kX86Movdrx),     // move into reg from xmm
+    kX86Set8R, kX86Set8M, kX86Set8A,// set byte depending on condition operand
+    Binary0fOpCode(kX86Imul16),     // 16bit multiply
+    Binary0fOpCode(kX86Imul32),     // 32bit multiply
+    Binary0fOpCode(kX86Movzx8),     // zero-extend 8-bit value
+    Binary0fOpCode(kX86Movzx16),    // zero-extend 16-bit value
+    Binary0fOpCode(kX86Movsx8),     // sign-extend 8-bit value
+    Binary0fOpCode(kX86Movsx16),    // sign-extend 16-bit value
+#undef Binary0fOpCode
+    kX86Jcc,    // jCC rel; lir operands - 0: rel, 1: CC, target assigned
+    kX86Jmp,    // jmp rel; lir operands - 0: rel, target assigned
+    kX86CallR,  // call reg; lir operands - 0: reg
+    kX86CallM,  // call [base + disp]; lir operands - 0: base, 1: disp
+    kX86CallA,  // call [base + index * scale + disp]
+                // lir operands - 0: base, 1: index, 2: scale, 3: disp
+    kX86Ret,    // ret; no lir operands
     kX86Last
 };
 
 /* Instruction assembly fieldLoc kind */
 enum X86EncodingKind {
-  kData,                       // Special case for raw data
-  kRegImm, kMemImm, kArrayImm, // RI, MI, AI instruction kinds
-  kRegReg, kRegMem, kRegArray, // RR, RM, RA instruction kinds
-  kMemReg, kArrayReg,          // MR and AR instruction kinds
+  kData,                                   // Special case for raw data.
+  kNop,                                    // Special case for variable length nop.
+  kNullary,                                // Opcode that takes no arguments.
+  kReg, kMem, kArray,                      // R, M and A instruction kinds.
+  kMemReg, kArrayReg, kThreadReg,          // MR, AR and TR instruction kinds.
+  kRegReg, kRegMem, kRegArray, kRegThread, // RR, RM, RA and RT instruction kinds.
+  kRegImm, kMemImm, kArrayImm, kThreadImm, // RI, MI, AI and TI instruction kinds.
+  kRegRegImm, kRegMemImm, kRegArrayImm,    // RRI, RMI and RAI instruction kinds.
+  kMovRegImm,                              // Shorter form move RI.
+  kShiftRegImm, kShiftMemImm, kShiftArrayImm,  // Shift opcode with immediate.
+  kShiftRegCl, kShiftMemCl, kShiftArrayCl,     // Shift opcode with register CL.
+  kRegRegReg, kRegRegMem, kRegRegArray,    // RRR, RRM, RRA instruction kinds.
+  kRegCond, kMemCond, kArrayCond,          // R, M, A instruction kinds following by a condition.
+  kJmp, kJcc, kCall,           // Branch instruction kinds.
   kUnimplemented               // Encoding used when an instruction isn't yet implemented.
 };
 
-/* A form of instruction with an opcode byte and a secondary opcode within the modrm byte */
-struct OpcodeModRMOpcode {
-  uint8_t opcode;        // 1 byte opcode
-  uint8_t modrm_opcode;  // 3 bit opcode that gets encoded in the register bits of the modrm byte
-};
-
 /* Struct used to define the EncodingMap positions for each X86 opcode */
 struct X86EncodingMap {
   X86OpCode opcode;      // e.g. kOpAddRI
   X86EncodingKind kind;  // Used to discriminate in the union below
   int flags;
-  union {
-    struct {
-      uint8_t rax8_i8_opcode;
-      uint8_t rax32_i32_opcode;
-      OpcodeModRMOpcode rm8_i8_opcode;
-      OpcodeModRMOpcode rm32_i32_opcode;
-      OpcodeModRMOpcode rm32_i8_opcode;
-    } RegMem_Immediate;  // kind: kRegImm, kMemImm, kArrayImm
-    struct {
-      uint8_t r8_rm8_opcode;
-      uint8_t r32_rm32_opcode;
-    } Reg_RegMem;  // kind: kRegReg, kRegMem, kRegArray
-    struct {
-      uint8_t rm8_r8_opcode;
-      uint8_t rm32_r32_opcode;
-    } RegMem_Reg;  // kind: kMemReg, kArrayReg
-    // This is a convenience for static initialization where the kind doesn't require opcode data.
-    int unused;  // kind: kData, kUnimplemented
+  struct {
+    uint8_t prefix1;     // non-zero => a prefix byte
+    uint8_t prefix2;     // non-zero => a second prefix byte
+    uint8_t opcode;      // 1 byte opcode
+    uint8_t extra_opcode1;  // possible extra opcode byte
+    uint8_t extra_opcode2;  // possible second extra opcode byte
+    // 3bit opcode that gets encoded in the register bits of the modrm byte, use determined by the
+    // encoding kind
+    uint8_t modrm_opcode;
+    uint8_t ax_opcode;  // non-zero => shorter encoding for AX as a destination
+    uint8_t immediate_bytes; // number of bytes of immediate
   } skeleton;
   const char *name;
   const char* fmt;
@@ -356,6 +508,8 @@
     kIsBinaryOp,
     kIsTertiaryOp,
     kIsQuadOp,
+    kIsQuinOp,
+    kIsSextupleOp,
     kIsIT,
     kSetsCCodes,
     kUsesCCodes,
@@ -387,10 +541,12 @@
 #define IS_BINARY_OP    (1 << kIsBinaryOp)
 #define IS_TERTIARY_OP  (1 << kIsTertiaryOp)
 #define IS_QUAD_OP      (1 << kIsQuadOp)
+#define IS_QUIN_OP      (1 << kIsQuinOp)
+#define IS_SEXTUPLE_OP  (1 << kIsSextupleOp)
 #define IS_IT           (1 << kIsIT)
 #define SETS_CCODES     (1 << kSetsCCodes)
 #define USES_CCODES     (1 << kUsesCCodes)
-#define NEEDS_FIXUP      (1 << kPCRelFixup)
+#define NEEDS_FIXUP     (1 << kPCRelFixup)
 
 /*  attributes, included for compatibility */
 #define REG_DEF_FPCS_LIST0   (0)
@@ -416,8 +572,15 @@
     kMaxHoistDistance,
 };
 
+/* Offsets of high and low halves of a 64bit value */
+#define LOWORD_OFFSET 0
+#define HIWORD_OFFSET 4
+
+/* Segment override instruction prefix used for quick TLS access to Thread::Current() */
+#define THREAD_PREFIX 0x64
 
 #define IS_SIMM8(v) ((-128 <= (v)) && ((v) <= 127))
+#define IS_SIMM16(v) ((-32768 <= (v)) && ((v) <= 32767))
 
 }  // namespace art
 
diff --git a/src/compiler/codegen/x86/X86RallocUtil.cc b/src/compiler/codegen/x86/X86RallocUtil.cc
index cdd75a1..7c99fd6 100644
--- a/src/compiler/codegen/x86/X86RallocUtil.cc
+++ b/src/compiler/codegen/x86/X86RallocUtil.cc
@@ -27,19 +27,8 @@
 
 namespace art {
 
-/*
- * TUNING: is leaf?  Can't just use "hasInvoke" to determine as some
- * instructions might call out to C/assembly helper functions.  Until
- * machinery is in place, always spill lr.
- */
-
-void oatAdjustSpillMask(CompilationUnit* cUnit)
-{
-    UNIMPLEMENTED(WARNING) << "oatAdjustSpillMask";
-#if 0
-    cUnit->coreSpillMask |= (1 << r_RA);
-    cUnit->numCoreSpills++;
-#endif
+void oatAdjustSpillMask(CompilationUnit* cUnit) {
+  // Adjustment for LR spilling, x86 has no LR so nothing to do here
 }
 
 /*
@@ -149,6 +138,28 @@
 #endif
 }
 
+extern RegLocation oatGetReturnWideAlt(CompilationUnit* cUnit)
+{
+    RegLocation res = LOC_C_RETURN_WIDE;
+    res.lowReg = rAX;
+    res.highReg = rDX;
+    oatClobber(cUnit, rAX);
+    oatClobber(cUnit, rDX);
+    oatMarkInUse(cUnit, rAX);
+    oatMarkInUse(cUnit, rDX);
+    oatMarkPair(cUnit, res.lowReg, res.highReg);
+    return res;
+}
+
+extern RegLocation oatGetReturnAlt(CompilationUnit* cUnit)
+{
+    RegLocation res = LOC_C_RETURN;
+    res.lowReg = rDX;
+    oatClobber(cUnit, rDX);
+    oatMarkInUse(cUnit, rDX);
+    return res;
+}
+
 extern RegisterInfo* oatGetRegInfo(CompilationUnit* cUnit, int reg)
 {
     return FPREG(reg) ? &cUnit->regPool->FPRegs[reg & FP_REG_MASK]
@@ -185,11 +196,4 @@
     ((LIR*)lir)->flags.isNop = true;
 }
 
-extern RegLocation oatGetReturnAlt(CompilationUnit* cUnit)
-{
-    UNIMPLEMENTED(FATAL);
-    RegLocation res = LOC_C_RETURN;
-    return res;
-}
-
 }  // namespace art
diff --git a/src/compiler/codegen/x86/x86/Codegen.cc b/src/compiler/codegen/x86/x86/Codegen.cc
index ba66b54..dd9217d 100644
--- a/src/compiler/codegen/x86/x86/Codegen.cc
+++ b/src/compiler/codegen/x86/x86/Codegen.cc
@@ -30,6 +30,10 @@
 #include "../X86/Factory.cc"
 /* Target independent factory utilities */
 #include "../../CodegenFactory.cc"
+/* X86-specific codegen routines */
+#include "../X86/Gen.cc"
+/* FP codegen routines */
+#include "../FP/X86FP.cc"
 /* Target independent gen routines */
 #include "../../GenCommon.cc"
 /* Shared invoke gen routines */
@@ -37,11 +41,6 @@
 /* X86-specific factory utilities */
 #include "../ArchFactory.cc"
 
-/* X86-specific codegen routines */
-#include "../X86/Gen.cc"
-/* FP codegen routines */
-#include "../FP/X86FP.cc"
-
 /* X86-specific register allocation */
 #include "../X86/Ralloc.cc"
 
diff --git a/src/jni_compiler.cc b/src/jni_compiler.cc
index bb64c73..25d76ca 100644
--- a/src/jni_compiler.cc
+++ b/src/jni_compiler.cc
@@ -97,7 +97,7 @@
     } else {
       if (!mr_conv->IsCurrentParamOnStack()) {
         // regular non-straddling move
-        __ Move(out_reg, in_reg);
+        __ Move(out_reg, in_reg, mr_conv->CurrentParamSize());
       } else {
         UNIMPLEMENTED(FATAL);  // we currently don't expect to see this case
       }
@@ -159,7 +159,7 @@
     __ StoreRawPtr(dest, in_reg);
   } else {
     if (!jni_conv->CurrentParamRegister().Equals(in_reg)) {
-      __ Move(jni_conv->CurrentParamRegister(), in_reg);
+      __ Move(jni_conv->CurrentParamRegister(), in_reg, jni_conv->CurrentParamSize());
     }
   }
 }
@@ -199,7 +199,7 @@
   // 1. Build the frame saving all callee saves
   const size_t frame_size(jni_conv->FrameSize());
   const std::vector<ManagedRegister>& callee_save_regs = jni_conv->CalleeSaveRegisters();
-  __ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs);
+  __ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs, mr_conv->EntrySpills());
 
   // 2. Set up the StackIndirectReferenceTable
   mr_conv->ResetIterator(FrameOffset(frame_size));
@@ -279,9 +279,11 @@
     // Compute arguments in registers to preserve
     mr_conv->ResetIterator(FrameOffset(frame_size + out_arg_size));
     std::vector<ManagedRegister> live_argument_regs;
+    std::vector<size_t> live_argument_regs_size;
     while (mr_conv->HasNext()) {
       if (mr_conv->IsCurrentParamInRegister()) {
         live_argument_regs.push_back(mr_conv->CurrentParamRegister());
+        live_argument_regs_size.push_back(mr_conv->CurrentParamSize());
       }
       mr_conv->Next();
     }
@@ -289,7 +291,7 @@
     // Copy arguments to preserve to callee save registers
     CHECK_LE(live_argument_regs.size(), callee_save_regs.size());
     for (size_t i = 0; i < live_argument_regs.size(); i++) {
-      __ Move(callee_save_regs.at(i), live_argument_regs.at(i));
+      __ Move(callee_save_regs.at(i), live_argument_regs.at(i), live_argument_regs_size.at(i));
     }
 
     // Get SIRT entry for 1st argument (jclass or this) to be 1st argument to
@@ -331,7 +333,7 @@
 
     // Restore live arguments
     for (size_t i = 0; i < live_argument_regs.size(); i++) {
-      __ Move(live_argument_regs.at(i), callee_save_regs.at(i));
+      __ Move(live_argument_regs.at(i), callee_save_regs.at(i), live_argument_regs_size.at(i));
     }
   }
 
@@ -515,7 +517,8 @@
     __ DecreaseFrameSize(out_arg_size);
     jni_conv->ResetIterator(FrameOffset(0));
   }
-  __ Move(mr_conv->ReturnRegister(), jni_conv->ReturnRegister());
+  DCHECK_EQ(mr_conv->SizeOfReturnValue(), jni_conv->SizeOfReturnValue());
+  __ Move(mr_conv->ReturnRegister(), jni_conv->ReturnRegister(), mr_conv->SizeOfReturnValue());
 
   // 14. Restore segment state and remove SIRT from thread
   {
diff --git a/src/jni_internal_arm.cc b/src/jni_internal_arm.cc
index 05deff7..49c9d7a 100644
--- a/src/jni_internal_arm.cc
+++ b/src/jni_internal_arm.cc
@@ -47,7 +47,8 @@
   UniquePtr<ArmAssembler> assembler(down_cast<ArmAssembler*>(Assembler::Create(kArm)));
 #define __ assembler->
   size_t num_arg_array_bytes = NumArgArrayBytes(shorty, shorty_len);
-  // Size of frame - spill of R4,R9/LR + Method* + possible receiver + arg array
+  // Size of frame = spill of R4,R9/LR + Method* + possible receiver + arg array size
+  // Note, space is left in the frame to flush arguments in registers back to out locations.
   size_t unpadded_frame_size = (4 * kPointerSize) +
                                (is_static ? 0 : kPointerSize) +
                                num_arg_array_bytes;
diff --git a/src/jni_internal_test.cc b/src/jni_internal_test.cc
index 39768d1..5c61afc 100644
--- a/src/jni_internal_test.cc
+++ b/src/jni_internal_test.cc
@@ -891,7 +891,6 @@
   env_->DeleteWeakGlobalRef(o2);
 }
 
-#if defined(__arm__) || defined(ART_USE_LLVM_COMPILER)
 TEST_F(JniInternalTest, StaticMainMethod) {
   SirtRef<ClassLoader> class_loader(LoadDex("Main"));
   CompileDirectMethod(class_loader.get(), "Main", "main", "([Ljava/lang/String;)V");
@@ -996,6 +995,7 @@
   EXPECT_EQ(INT_MIN, result.i);
 }
 
+#if defined(__arm__) || defined(ART_USE_LLVM_COMPILER)
 TEST_F(JniInternalTest, StaticIdentityDoubleMethod) {
   SirtRef<ClassLoader> class_loader(LoadDex("StaticLeafMethods"));
   CompileDirectMethod(class_loader.get(), "StaticLeafMethods", "identity", "(D)D");
diff --git a/src/jni_internal_x86.cc b/src/jni_internal_x86.cc
index 67e8d30..bd6643e 100644
--- a/src/jni_internal_x86.cc
+++ b/src/jni_internal_x86.cc
@@ -26,7 +26,7 @@
 // Creates a function which invokes a managed method with an array of
 // arguments.
 //
-// Immediately after the call, the environment looks like this:
+// Immediately after the call on X86, the environment looks like this:
 //
 // [SP+0 ] = Return address
 // [SP+4 ] = method pointer
@@ -40,34 +40,59 @@
 // to save the native registers and set up the managed registers. On
 // return, the return value must be store into the result JValue.
 CompiledInvokeStub* CreateInvokeStub(bool is_static, const char* shorty, uint32_t shorty_len) {
-  UniquePtr<X86Assembler> assembler(
-      down_cast<X86Assembler*>(Assembler::Create(kX86)));
+  UniquePtr<X86Assembler> assembler(down_cast<X86Assembler*>(Assembler::Create(kX86)));
 #define __ assembler->
   size_t num_arg_array_bytes = NumArgArrayBytes(shorty, shorty_len);
-  // Size of frame - return address + Method* + possible receiver + arg array
-  size_t frame_size = (2 * kPointerSize) +
-                      (is_static ? 0 : kPointerSize) +
-                      num_arg_array_bytes;
+  // Size of frame = return address + Method* + possible receiver + arg array size
+  // Note, space is left in the frame to flush arguments in registers back to out locations.
+  size_t frame_size = 2 * kPointerSize + (is_static ? 0 : kPointerSize) + num_arg_array_bytes;
   size_t pad_size = RoundUp(frame_size, kStackAlignment) - frame_size;
 
-  __ movl(EAX, Address(ESP, 4));   // EAX = method
-  __ movl(ECX, Address(ESP, 8));   // ECX = receiver
-  __ movl(EDX, Address(ESP, 16));  // EDX = arg array
+  Register rMethod = EAX;
+  __ movl(rMethod,   Address(ESP, 4));     // EAX = method
+  Register rReceiver = EDX;
+  if (!is_static) {
+    __ movl(rReceiver, Address(ESP, 8));   // EDX = receiver
+  }
+  Register rArgArray = ECX;
+  __ movl(rArgArray, Address(ESP, 16));    // ECX = arg array
 
+  // TODO: optimize the frame set up to avoid excessive SP math
   // Push padding
   if (pad_size != 0) {
-    __ addl(ESP, Immediate(-pad_size));
+    __ subl(ESP, Immediate(pad_size));
   }
-
   // Push/copy arguments
   for (size_t off = num_arg_array_bytes; off > 0; off -= kPointerSize) {
-    __ pushl(Address(EDX, off - kPointerSize));
+    if (off > ((is_static ? 2 : 1) * kPointerSize)) {
+      // Copy argument
+      __ pushl(Address(rArgArray, off - kPointerSize));
+    } else {
+      // Space for argument passed in register
+      __ pushl(Immediate(0));
+    }
   }
+  // Backing space for receiver
   if (!is_static) {
-    __ pushl(ECX);
+    __ pushl(Immediate(0));
   }
   // Push 0 as NULL Method* thereby terminating managed stack crawls
   __ pushl(Immediate(0));
+  if (!is_static) {
+    if (num_arg_array_bytes >= static_cast<size_t>(kPointerSize)) {
+      // Receiver already in EDX, pass 1st arg in ECX
+      __ movl(ECX, Address(rArgArray, 0));
+    }
+  } else {
+    if (num_arg_array_bytes >= static_cast<size_t>(kPointerSize)) {
+      // Pass 1st arg in EDX
+      __ movl(EDX, Address(rArgArray, 0));
+      if (num_arg_array_bytes >= static_cast<size_t>(2* kPointerSize)) {
+        // Pass 2nd arg in ECX
+        __ movl(ECX, Address(rArgArray, kPointerSize));
+      }
+    }
+  }
 
   __ call(Address(EAX, Method::GetCodeOffset()));  // Call code off of method
 
@@ -79,10 +104,10 @@
     __ movl(ECX, Address(ESP, 20));
     switch (ch) {
       case 'D':
-        __ fstpl(Address(ECX, 0));
+        __ movsd(Address(ECX, 0), XMM0);
         break;
       case 'F':
-        __ fstps(Address(ECX, 0));
+        __ movss(Address(ECX, 0), XMM0);
         break;
       case 'J':
         __ movl(Address(ECX, 0), EAX);
diff --git a/src/managed_register_x86.h b/src/managed_register_x86.h
index 94d50b2..edbf852 100644
--- a/src/managed_register_x86.h
+++ b/src/managed_register_x86.h
@@ -129,7 +129,7 @@
   bool IsX87Register() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCpuRegIds + kNumberOfXmmRegIds);
-    return (0 <= test) && (test < kNumberOfXmmRegIds);
+    return (0 <= test) && (test < kNumberOfX87RegIds);
   }
 
   bool IsRegisterPair() const {