Merge "Change MethodHelper to use a Handle."
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index b32fc9b..407269b 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -25,6 +25,7 @@
 	runtime/base/hex_dump_test.cc \
 	runtime/base/histogram_test.cc \
 	runtime/base/mutex_test.cc \
+	runtime/base/scoped_flock_test.cc \
 	runtime/base/timing_logger_test.cc \
 	runtime/base/unix_file/fd_file_test.cc \
 	runtime/base/unix_file/mapped_file_test.cc \
@@ -36,6 +37,7 @@
 	runtime/dex_instruction_visitor_test.cc \
 	runtime/dex_method_iterator_test.cc \
 	runtime/entrypoints/math_entrypoints_test.cc \
+	runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc \
 	runtime/entrypoints_order_test.cc \
 	runtime/exception_test.cc \
 	runtime/gc/accounting/space_bitmap_test.cc \
diff --git a/compiler/Android.mk b/compiler/Android.mk
index cfce9f7..3cf7368 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -91,6 +91,7 @@
 	optimizing/register_allocator.cc \
 	optimizing/ssa_builder.cc \
 	optimizing/ssa_liveness_analysis.cc \
+	optimizing/ssa_type_propagation.cc \
 	trampolines/trampoline_compiler.cc \
 	utils/arena_allocator.cc \
 	utils/arena_bit_vector.cc \
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 547c0f6..d544397 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -157,6 +157,8 @@
     Instruction::GOTO,
     Instruction::GOTO_16,
     Instruction::GOTO_32,
+    Instruction::PACKED_SWITCH,
+    Instruction::SPARSE_SWITCH,
     Instruction::IF_EQ,
     Instruction::IF_NE,
     Instruction::IF_LT,
@@ -248,8 +250,6 @@
     Instruction::MOVE_OBJECT,
     Instruction::MOVE_OBJECT_FROM16,
     Instruction::MOVE_OBJECT_16,
-    // Instruction::PACKED_SWITCH,
-    // Instruction::SPARSE_SWITCH,
     // Instruction::MOVE_RESULT,
     // Instruction::MOVE_RESULT_WIDE,
     // Instruction::MOVE_RESULT_OBJECT,
@@ -889,7 +889,9 @@
     // TODO(Arm64): enable optimizations once backend is mature enough.
     // TODO(X86_64): enable optimizations once backend is mature enough.
     cu.disable_opt = ~(uint32_t)0;
-    cu.enable_debug |= (1 << kDebugCodegenDump);
+    if (cu.instruction_set == kArm64) {
+      cu.enable_debug |= (1 << kDebugCodegenDump);
+    }
   }
 
   cu.StartTimingSplit("BuildMIRGraph");
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index b80938a..b85f569 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -68,7 +68,7 @@
 
   // Get the switch value
   rl_src = LoadValue(rl_src, kCoreReg);
-  RegStorage r_base = AllocTemp();
+  RegStorage r_base = AllocTempWide();
   // Allocate key and disp temps.
   RegStorage r_key = AllocTemp();
   RegStorage r_disp = AllocTemp();
@@ -95,7 +95,8 @@
   tab_rec->anchor = switch_label;
 
   // Add displacement to base branch address and go!
-  OpRegRegRegShift(kOpAdd, r_base, r_base, r_disp, ENCODE_NO_SHIFT);
+  // TODO(Arm64): generate "add x1, x1, w3, sxtw" rather than "add x1, x1, x3"?
+  OpRegRegRegShift(kOpAdd, r_base, r_base, As64BitReg(r_disp), ENCODE_NO_SHIFT);
   NewLIR1(kA64Br1x, r_base.GetReg());
 
   // Loop exit label.
@@ -105,7 +106,7 @@
 
 
 void Arm64Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset,
-                                 RegLocation rl_src) {
+                                   RegLocation rl_src) {
   const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
   if (cu_->verbose) {
     DumpPackedSwitchTable(table);
@@ -122,7 +123,7 @@
 
   // Get the switch value
   rl_src = LoadValue(rl_src, kCoreReg);
-  RegStorage table_base = AllocTemp();
+  RegStorage table_base = AllocTempWide();
   // Materialize a pointer to the switch table
   NewLIR3(kA64Adr2xd, table_base.GetReg(), 0, WrapPointer(tab_rec));
   int low_key = s4FromSwitchData(&table[2]);
@@ -140,15 +141,17 @@
 
   // Load the displacement from the switch table
   RegStorage disp_reg = AllocTemp();
-  LoadBaseIndexed(table_base, key_reg, disp_reg, 2, k32);
+  // TODO(Arm64): generate "ldr w3, [x1,w2,sxtw #2]" rather than "ldr w3, [x1,x2,lsl #2]"?
+  LoadBaseIndexed(table_base, key_reg, As64BitReg(disp_reg), 2, k32);
 
   // Get base branch address.
-  RegStorage branch_reg = AllocTemp();
+  RegStorage branch_reg = AllocTempWide();
   LIR* switch_label = NewLIR3(kA64Adr2xd, branch_reg.GetReg(), 0, -1);
   tab_rec->anchor = switch_label;
 
   // Add displacement to base branch address and go!
-  OpRegRegRegShift(kOpAdd, branch_reg, branch_reg, disp_reg, ENCODE_NO_SHIFT);
+  // TODO(Arm64): generate "add x4, x4, w3, sxtw" rather than "add x4, x4, x3"?
+  OpRegRegRegShift(kOpAdd, branch_reg, branch_reg, As64BitReg(disp_reg), ENCODE_NO_SHIFT);
   NewLIR1(kA64Br1x, branch_reg.GetReg());
 
   // branch_over target here
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 6251f4f..21db771 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -223,6 +223,40 @@
                     bool skip_this);
 
   private:
+    /**
+     * @brief Given register xNN (dNN), returns register wNN (sNN).
+     * @param reg #RegStorage containing a Solo64 input register (e.g. @c x1 or @c d2).
+     * @return A Solo32 with the same register number as the @p reg (e.g. @c w1 or @c s2).
+     * @see As64BitReg
+     */
+    RegStorage As32BitReg(RegStorage reg) {
+      DCHECK(reg.Is64Bit());
+      DCHECK(!reg.IsPair());
+      RegStorage ret_val = RegStorage(RegStorage::k32BitSolo,
+                                      reg.GetRawBits() & RegStorage::kRegTypeMask);
+      DCHECK_EQ(GetRegInfo(reg)->FindMatchingView(RegisterInfo::k32SoloStorageMask)
+                               ->GetReg().GetReg(),
+                ret_val.GetReg());
+      return ret_val;
+    }
+
+    /**
+     * @brief Given register wNN (sNN), returns register xNN (dNN).
+     * @param reg #RegStorage containing a Solo32 input register (e.g. @c w1 or @c s2).
+     * @return A Solo64 with the same register number as the @p reg (e.g. @c x1 or @c d2).
+     * @see As32BitReg
+     */
+    RegStorage As64BitReg(RegStorage reg) {
+      DCHECK(reg.Is32Bit());
+      DCHECK(!reg.IsPair());
+      RegStorage ret_val = RegStorage(RegStorage::k64BitSolo,
+                                      reg.GetRawBits() & RegStorage::kRegTypeMask);
+      DCHECK_EQ(GetRegInfo(reg)->FindMatchingView(RegisterInfo::k64SoloStorageMask)
+                               ->GetReg().GetReg(),
+                ret_val.GetReg());
+      return ret_val;
+    }
+
     LIR* LoadFPConstantValue(int r_dest, int32_t value);
     LIR* LoadFPConstantValueWide(int r_dest, int64_t value);
     void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 7a415a2..b7ea362 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -689,7 +689,7 @@
     case 0:  // Set target method index in case of conflict [set kHiddenArg, kHiddenFpArg (x86)]
       CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
       cg->LoadConstant(cg->TargetReg(kHiddenArg), target_method.dex_method_index);
-      if (cu->instruction_set == kX86 || cu->instruction_set == kX86_64) {
+      if (cu->instruction_set == kX86) {
         cg->OpRegCopy(cg->TargetReg(kHiddenFpArg), cg->TargetReg(kHiddenArg));
       }
       break;
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index c383296..0a8193a 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -23,9 +23,9 @@
 #define MAX_ASSEMBLER_RETRIES 50
 
 const X86EncodingMap X86Mir2Lir::EncodingMap[kX86Last] = {
-  { kX8632BitData, kData,    IS_UNARY_OP,            { 0, 0, 0x00, 0, 0, 0, 0, 4 }, "data",  "0x!0d" },
-  { kX86Bkpt,      kNullary, NO_OPERAND | IS_BRANCH, { 0, 0, 0xCC, 0, 0, 0, 0, 0 }, "int 3", "" },
-  { kX86Nop,       kNop,     NO_OPERAND,             { 0, 0, 0x90, 0, 0, 0, 0, 0 }, "nop",   "" },
+  { kX8632BitData, kData,    IS_UNARY_OP,            { 0, 0, 0x00, 0, 0, 0, 0, 4, false }, "data",  "0x!0d" },
+  { kX86Bkpt,      kNullary, NO_OPERAND | IS_BRANCH, { 0, 0, 0xCC, 0, 0, 0, 0, 0, false }, "int 3", "" },
+  { kX86Nop,       kNop,     NO_OPERAND,             { 0, 0, 0x90, 0, 0, 0, 0, 0, false }, "nop",   "" },
 
 #define ENCODING_MAP(opname, mem_use, reg_def, uses_ccodes, \
                      rm8_r8, rm32_r32, \
@@ -34,65 +34,65 @@
                      rm8_i8, rm8_i8_modrm, \
                      rm32_i32, rm32_i32_modrm, \
                      rm32_i8, rm32_i8_modrm) \
-{ kX86 ## opname ## 8MR, kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { 0,             0, rm8_r8, 0, 0, 0,            0,      0 }, #opname "8MR", "[!0r+!1d],!2r" }, \
-{ kX86 ## opname ## 8AR, kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { 0,             0, rm8_r8, 0, 0, 0,            0,      0 }, #opname "8AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
-{ kX86 ## opname ## 8TR, kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm8_r8, 0, 0, 0,            0,      0 }, #opname "8TR", "fs:[!0d],!1r" }, \
-{ kX86 ## opname ## 8RR, kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r8_rm8, 0, 0, 0,            0,      0 }, #opname "8RR", "!0r,!1r" }, \
-{ kX86 ## opname ## 8RM, kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r8_rm8, 0, 0, 0,            0,      0 }, #opname "8RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## 8RA, kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0,             0, r8_rm8, 0, 0, 0,            0,      0 }, #opname "8RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
-{ kX86 ## opname ## 8RT, kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r8_rm8, 0, 0, 0,            0,      0 }, #opname "8RT", "!0r,fs:[!1d]" }, \
-{ kX86 ## opname ## 8RI, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm8_i8, 0, 0, rm8_i8_modrm, ax8_i8, 1 }, #opname "8RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 8MI, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm8_i8, 0, 0, rm8_i8_modrm, 0,      1 }, #opname "8MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 8AI, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm8_i8, 0, 0, rm8_i8_modrm, 0,      1 }, #opname "8AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 8TI, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm8_i8, 0, 0, rm8_i8_modrm, 0,      1 }, #opname "8TI", "fs:[!0d],!1d" }, \
+{ kX86 ## opname ## 8MR, kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { 0,             0, rm8_r8, 0, 0, 0,            0,      0, true }, #opname "8MR", "[!0r+!1d],!2r" }, \
+{ kX86 ## opname ## 8AR, kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { 0,             0, rm8_r8, 0, 0, 0,            0,      0, true}, #opname "8AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
+{ kX86 ## opname ## 8TR, kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm8_r8, 0, 0, 0,            0,      0, true }, #opname "8TR", "fs:[!0d],!1r" }, \
+{ kX86 ## opname ## 8RR, kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r8_rm8, 0, 0, 0,            0,      0, true }, #opname "8RR", "!0r,!1r" }, \
+{ kX86 ## opname ## 8RM, kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r8_rm8, 0, 0, 0,            0,      0, true }, #opname "8RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## 8RA, kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0,             0, r8_rm8, 0, 0, 0,            0,      0, true }, #opname "8RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
+{ kX86 ## opname ## 8RT, kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r8_rm8, 0, 0, 0,            0,      0, true }, #opname "8RT", "!0r,fs:[!1d]" }, \
+{ kX86 ## opname ## 8RI, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm8_i8, 0, 0, rm8_i8_modrm, ax8_i8, 1, true }, #opname "8RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 8MI, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm8_i8, 0, 0, rm8_i8_modrm, 0,      1, true }, #opname "8MI", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 8AI, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm8_i8, 0, 0, rm8_i8_modrm, 0,      1, true }, #opname "8AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 8TI, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm8_i8, 0, 0, rm8_i8_modrm, 0,      1, true }, #opname "8TI", "fs:[!0d],!1d" }, \
   \
-{ kX86 ## opname ## 16MR,  kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_r32, 0, 0, 0,              0,        0 }, #opname "16MR", "[!0r+!1d],!2r" }, \
-{ kX86 ## opname ## 16AR,  kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_r32, 0, 0, 0,              0,        0 }, #opname "16AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
-{ kX86 ## opname ## 16TR,  kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_r32, 0, 0, 0,              0,        0 }, #opname "16TR", "fs:[!0d],!1r" }, \
-{ kX86 ## opname ## 16RR,  kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0x66,          0,    r32_rm32, 0, 0, 0,              0,        0 }, #opname "16RR", "!0r,!1r" }, \
-{ kX86 ## opname ## 16RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0x66,          0,    r32_rm32, 0, 0, 0,              0,        0 }, #opname "16RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## 16RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0x66,          0,    r32_rm32, 0, 0, 0,              0,        0 }, #opname "16RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
-{ kX86 ## opname ## 16RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, r32_rm32, 0, 0, 0,              0,        0 }, #opname "16RT", "!0r,fs:[!1d]" }, \
-{ kX86 ## opname ## 16RI,  kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 2 }, #opname "16RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 16MI,  kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i32, 0, 0, rm32_i32_modrm, 0,        2 }, #opname "16MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 16AI,  kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i32, 0, 0, rm32_i32_modrm, 0,        2 }, #opname "16AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 16TI,  kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_i32, 0, 0, rm32_i32_modrm, 0,        2 }, #opname "16TI", "fs:[!0d],!1d" }, \
-{ kX86 ## opname ## 16RI8, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "16RI8", "!0r,!1d" }, \
-{ kX86 ## opname ## 16MI8, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "16MI8", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 16AI8, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "16AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 16TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "16TI8", "fs:[!0d],!1d" }, \
+{ kX86 ## opname ## 16MR,  kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_r32, 0, 0, 0,              0,        0, false }, #opname "16MR", "[!0r+!1d],!2r" }, \
+{ kX86 ## opname ## 16AR,  kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_r32, 0, 0, 0,              0,        0, false }, #opname "16AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
+{ kX86 ## opname ## 16TR,  kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_r32, 0, 0, 0,              0,        0, false }, #opname "16TR", "fs:[!0d],!1r" }, \
+{ kX86 ## opname ## 16RR,  kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0x66,          0,    r32_rm32, 0, 0, 0,              0,        0, false }, #opname "16RR", "!0r,!1r" }, \
+{ kX86 ## opname ## 16RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0x66,          0,    r32_rm32, 0, 0, 0,              0,        0, false }, #opname "16RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## 16RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0x66,          0,    r32_rm32, 0, 0, 0,              0,        0, false }, #opname "16RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
+{ kX86 ## opname ## 16RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "16RT", "!0r,fs:[!1d]" }, \
+{ kX86 ## opname ## 16RI,  kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 2, false }, #opname "16RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 16MI,  kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i32, 0, 0, rm32_i32_modrm, 0,        2, false }, #opname "16MI", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 16AI,  kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i32, 0, 0, rm32_i32_modrm, 0,        2, false }, #opname "16AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 16TI,  kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_i32, 0, 0, rm32_i32_modrm, 0,        2, false }, #opname "16TI", "fs:[!0d],!1d" }, \
+{ kX86 ## opname ## 16RI8, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "16RI8", "!0r,!1d" }, \
+{ kX86 ## opname ## 16MI8, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "16MI8", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 16AI8, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "16AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 16TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "16TI8", "fs:[!0d],!1d" }, \
   \
-{ kX86 ## opname ## 32MR,  kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "32MR", "[!0r+!1d],!2r" }, \
-{ kX86 ## opname ## 32AR,  kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { 0,             0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "32AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
-{ kX86 ## opname ## 32TR,  kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "32TR", "fs:[!0d],!1r" }, \
-{ kX86 ## opname ## 32RR,  kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RR", "!0r,!1r" }, \
-{ kX86 ## opname ## 32RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## 32RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
-{ kX86 ## opname ## 32RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RT", "!0r,fs:[!1d]" }, \
-{ kX86 ## opname ## 32RI,  kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4 }, #opname "32RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 32MI,  kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "32MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 32AI,  kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 32TI,  kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "32TI", "fs:[!0d],!1d" }, \
-{ kX86 ## opname ## 32RI8, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32RI8", "!0r,!1d" }, \
-{ kX86 ## opname ## 32MI8, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32MI8", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 32AI8, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 32TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32TI8", "fs:[!0d],!1d" }, \
+{ kX86 ## opname ## 32MR,  kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_r32, 0, 0, 0,              0,        0, false }, #opname "32MR", "[!0r+!1d],!2r" }, \
+{ kX86 ## opname ## 32AR,  kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { 0,             0, rm32_r32, 0, 0, 0,              0,        0, false }, #opname "32AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
+{ kX86 ## opname ## 32TR,  kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_r32, 0, 0, 0,              0,        0, false }, #opname "32TR", "fs:[!0d],!1r" }, \
+{ kX86 ## opname ## 32RR,  kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "32RR", "!0r,!1r" }, \
+{ kX86 ## opname ## 32RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "32RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## 32RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
+{ kX86 ## opname ## 32RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "32RT", "!0r,fs:[!1d]" }, \
+{ kX86 ## opname ## 32RI,  kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4, false }, #opname "32RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 32MI,  kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4, false }, #opname "32MI", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 32AI,  kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4, false }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 32TI,  kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4, false }, #opname "32TI", "fs:[!0d],!1d" }, \
+{ kX86 ## opname ## 32RI8, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "32RI8", "!0r,!1d" }, \
+{ kX86 ## opname ## 32MI8, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "32MI8", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 32AI8, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "32AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 32TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "32TI8", "fs:[!0d],!1d" }, \
   \
-{ kX86 ## opname ## 64MR,  kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "64MR", "[!0r+!1d],!2r" }, \
-{ kX86 ## opname ## 64AR,  kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "64AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
-{ kX86 ## opname ## 64TR,  kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_r32, 0, 0, 0,              0,        0 }, #opname "64TR", "fs:[!0d],!1r" }, \
-{ kX86 ## opname ## 64RR,  kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RR", "!0r,!1r" }, \
-{ kX86 ## opname ## 64RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## 64RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { REX_W,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
-{ kX86 ## opname ## 64RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RT", "!0r,fs:[!1d]" }, \
-{ kX86 ## opname ## 64RI,  kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4 }, #opname "64RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 64MI,  kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "64MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 64AI,  kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "64AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 64TI,  kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "64TI", "fs:[!0d],!1d" }, \
-{ kX86 ## opname ## 64RI8, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "64RI8", "!0r,!1d" }, \
-{ kX86 ## opname ## 64MI8, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "64MI8", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 64AI8, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "64AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 64TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "64TI8", "fs:[!0d],!1d" }
+{ kX86 ## opname ## 64MR,  kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_r32, 0, 0, 0,              0,        0, false }, #opname "64MR", "[!0r+!1d],!2r" }, \
+{ kX86 ## opname ## 64AR,  kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_r32, 0, 0, 0,              0,        0, false }, #opname "64AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
+{ kX86 ## opname ## 64TR,  kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_r32, 0, 0, 0,              0,        0, false }, #opname "64TR", "fs:[!0d],!1r" }, \
+{ kX86 ## opname ## 64RR,  kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "64RR", "!0r,!1r" }, \
+{ kX86 ## opname ## 64RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "64RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## 64RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { REX_W,             0, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "64RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
+{ kX86 ## opname ## 64RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "64RT", "!0r,fs:[!1d]" }, \
+{ kX86 ## opname ## 64RI,  kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4, false }, #opname "64RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 64MI,  kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4, false }, #opname "64MI", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 64AI,  kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4, false }, #opname "64AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 64TI,  kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4, false }, #opname "64TI", "fs:[!0d],!1d" }, \
+{ kX86 ## opname ## 64RI8, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "64RI8", "!0r,!1d" }, \
+{ kX86 ## opname ## 64MI8, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "64MI8", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 64AI8, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "64AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 64TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "64TI8", "fs:[!0d],!1d" }
 
 ENCODING_MAP(Add, IS_LOAD | IS_STORE, REG_DEF0, 0,
   0x00 /* RegMem8/Reg8 */,     0x01 /* RegMem32/Reg32 */,
@@ -144,114 +144,112 @@
   0x81, 0x7 /* RegMem32/imm32 */, 0x83, 0x7 /* RegMem32/imm8 */),
 #undef ENCODING_MAP
 
-  { kX86Imul16RRI,   kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2 }, "Imul16RRI", "!0r,!1r,!2d" },
-  { kX86Imul16RMI,   kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2 }, "Imul16RMI", "!0r,[!1r+!2d],!3d" },
-  { kX86Imul16RAI,   kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2 }, "Imul16RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
+  { kX86Imul16RRI,   kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2, false }, "Imul16RRI", "!0r,!1r,!2d" },
+  { kX86Imul16RMI,   kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2, false }, "Imul16RMI", "!0r,[!1r+!2d],!3d" },
+  { kX86Imul16RAI,   kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2, false }, "Imul16RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
 
-  { kX86Imul32RRI,   kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4 }, "Imul32RRI", "!0r,!1r,!2d" },
-  { kX86Imul32RMI,   kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4 }, "Imul32RMI", "!0r,[!1r+!2d],!3d" },
-  { kX86Imul32RAI,   kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4 }, "Imul32RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
-  { kX86Imul32RRI8,  kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul32RRI8", "!0r,!1r,!2d" },
-  { kX86Imul32RMI8,  kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul32RMI8", "!0r,[!1r+!2d],!3d" },
-  { kX86Imul32RAI8,  kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul32RAI8", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
+  { kX86Imul32RRI,   kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul32RRI", "!0r,!1r,!2d" },
+  { kX86Imul32RMI,   kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul32RMI", "!0r,[!1r+!2d],!3d" },
+  { kX86Imul32RAI,   kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul32RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
+  { kX86Imul32RRI8,  kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul32RRI8", "!0r,!1r,!2d" },
+  { kX86Imul32RMI8,  kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul32RMI8", "!0r,[!1r+!2d],!3d" },
+  { kX86Imul32RAI8,  kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul32RAI8", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
 
-  { kX86Imul64RRI,   kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 8 }, "Imul64RRI", "!0r,!1r,!2d" },
-  { kX86Imul64RMI,   kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 8 }, "Imul64RMI", "!0r,[!1r+!2d],!3d" },
-  { kX86Imul64RAI,   kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 8 }, "Imul64RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
-  { kX86Imul64RRI8,  kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul64RRI8", "!0r,!1r,!2d" },
-  { kX86Imul64RMI8,  kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul64RMI8", "!0r,[!1r+!2d],!3d" },
-  { kX86Imul64RAI8,  kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul64RAI8", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
+  { kX86Imul64RRI,   kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul64RRI", "!0r,!1r,!2d" },
+  { kX86Imul64RMI,   kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul64RMI", "!0r,[!1r+!2d],!3d" },
+  { kX86Imul64RAI,   kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul64RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
+  { kX86Imul64RRI8,  kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul64RRI8", "!0r,!1r,!2d" },
+  { kX86Imul64RMI8,  kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul64RMI8", "!0r,[!1r+!2d],!3d" },
+  { kX86Imul64RAI8,  kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul64RAI8", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
 
-  { kX86Mov8MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { 0,             0, 0x88, 0, 0, 0, 0, 0 }, "Mov8MR", "[!0r+!1d],!2r" },
-  { kX86Mov8AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { 0,             0, 0x88, 0, 0, 0, 0, 0 }, "Mov8AR", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86Mov8TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0, 0x88, 0, 0, 0, 0, 0 }, "Mov8TR", "fs:[!0d],!1r" },
-  { kX86Mov8RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { 0,             0, 0x8A, 0, 0, 0, 0, 0 }, "Mov8RR", "!0r,!1r" },
-  { kX86Mov8RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { 0,             0, 0x8A, 0, 0, 0, 0, 0 }, "Mov8RM", "!0r,[!1r+!2d]" },
-  { kX86Mov8RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { 0,             0, 0x8A, 0, 0, 0, 0, 0 }, "Mov8RA", "!0r,[!1r+!2r<<!3d+!4d]" },
-  { kX86Mov8RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, 0, 0x8A, 0, 0, 0, 0, 0 }, "Mov8RT", "!0r,fs:[!1d]" },
-  { kX86Mov8RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { 0,             0, 0xB0, 0, 0, 0, 0, 1 }, "Mov8RI", "!0r,!1d" },
-  { kX86Mov8MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { 0,             0, 0xC6, 0, 0, 0, 0, 1 }, "Mov8MI", "[!0r+!1d],!2d" },
-  { kX86Mov8AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { 0,             0, 0xC6, 0, 0, 0, 0, 1 }, "Mov8AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Mov8TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0, 0xC6, 0, 0, 0, 0, 1 }, "Mov8TI", "fs:[!0d],!1d" },
+  { kX86Mov8MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { 0,             0, 0x88, 0, 0, 0, 0, 0, true }, "Mov8MR", "[!0r+!1d],!2r" },
+  { kX86Mov8AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { 0,             0, 0x88, 0, 0, 0, 0, 0, true }, "Mov8AR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86Mov8TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0, 0x88, 0, 0, 0, 0, 0, true }, "Mov8TR", "fs:[!0d],!1r" },
+  { kX86Mov8RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { 0,             0, 0x8A, 0, 0, 0, 0, 0, true }, "Mov8RR", "!0r,!1r" },
+  { kX86Mov8RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { 0,             0, 0x8A, 0, 0, 0, 0, 0, true }, "Mov8RM", "!0r,[!1r+!2d]" },
+  { kX86Mov8RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { 0,             0, 0x8A, 0, 0, 0, 0, 0, true }, "Mov8RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86Mov8RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, 0, 0x8A, 0, 0, 0, 0, 0, true }, "Mov8RT", "!0r,fs:[!1d]" },
+  { kX86Mov8RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { 0,             0, 0xB0, 0, 0, 0, 0, 1, true }, "Mov8RI", "!0r,!1d" },
+  { kX86Mov8MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { 0,             0, 0xC6, 0, 0, 0, 0, 1, true }, "Mov8MI", "[!0r+!1d],!2d" },
+  { kX86Mov8AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { 0,             0, 0xC6, 0, 0, 0, 0, 1, true }, "Mov8AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Mov8TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0, 0xC6, 0, 0, 0, 0, 1, true }, "Mov8TI", "fs:[!0d],!1d" },
 
-  { kX86Mov16MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { 0x66,          0,    0x89, 0, 0, 0, 0, 0 }, "Mov16MR", "[!0r+!1d],!2r" },
-  { kX86Mov16AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { 0x66,          0,    0x89, 0, 0, 0, 0, 0 }, "Mov16AR", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86Mov16TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0x66, 0x89, 0, 0, 0, 0, 0 }, "Mov16TR", "fs:[!0d],!1r" },
-  { kX86Mov16RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { 0x66,          0,    0x8B, 0, 0, 0, 0, 0 }, "Mov16RR", "!0r,!1r" },
-  { kX86Mov16RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { 0x66,          0,    0x8B, 0, 0, 0, 0, 0 }, "Mov16RM", "!0r,[!1r+!2d]" },
-  { kX86Mov16RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { 0x66,          0,    0x8B, 0, 0, 0, 0, 0 }, "Mov16RA", "!0r,[!1r+!2r<<!3d+!4d]" },
-  { kX86Mov16RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, 0x66, 0x8B, 0, 0, 0, 0, 0 }, "Mov16RT", "!0r,fs:[!1d]" },
-  { kX86Mov16RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { 0x66,          0,    0xB8, 0, 0, 0, 0, 2 }, "Mov16RI", "!0r,!1d" },
-  { kX86Mov16MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { 0x66,          0,    0xC7, 0, 0, 0, 0, 2 }, "Mov16MI", "[!0r+!1d],!2d" },
-  { kX86Mov16AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { 0x66,          0,    0xC7, 0, 0, 0, 0, 2 }, "Mov16AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Mov16TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0x66, 0xC7, 0, 0, 0, 0, 2 }, "Mov16TI", "fs:[!0d],!1d" },
+  { kX86Mov16MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { 0x66,          0,    0x89, 0, 0, 0, 0, 0, false }, "Mov16MR", "[!0r+!1d],!2r" },
+  { kX86Mov16AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { 0x66,          0,    0x89, 0, 0, 0, 0, 0, false }, "Mov16AR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86Mov16TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0x66, 0x89, 0, 0, 0, 0, 0, false }, "Mov16TR", "fs:[!0d],!1r" },
+  { kX86Mov16RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { 0x66,          0,    0x8B, 0, 0, 0, 0, 0, false }, "Mov16RR", "!0r,!1r" },
+  { kX86Mov16RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { 0x66,          0,    0x8B, 0, 0, 0, 0, 0, false }, "Mov16RM", "!0r,[!1r+!2d]" },
+  { kX86Mov16RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { 0x66,          0,    0x8B, 0, 0, 0, 0, 0, false }, "Mov16RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86Mov16RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, 0x66, 0x8B, 0, 0, 0, 0, 0, false }, "Mov16RT", "!0r,fs:[!1d]" },
+  { kX86Mov16RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { 0x66,          0,    0xB8, 0, 0, 0, 0, 2, false }, "Mov16RI", "!0r,!1d" },
+  { kX86Mov16MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { 0x66,          0,    0xC7, 0, 0, 0, 0, 2, false }, "Mov16MI", "[!0r+!1d],!2d" },
+  { kX86Mov16AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { 0x66,          0,    0xC7, 0, 0, 0, 0, 2, false }, "Mov16AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Mov16TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0x66, 0xC7, 0, 0, 0, 0, 2, false }, "Mov16TI", "fs:[!0d],!1d" },
 
-  { kX86Mov32MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { 0,             0, 0x89, 0, 0, 0, 0, 0 }, "Mov32MR", "[!0r+!1d],!2r" },
-  { kX86Mov32AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { 0,             0, 0x89, 0, 0, 0, 0, 0 }, "Mov32AR", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86Mov32TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov32TR", "fs:[!0d],!1r" },
-  { kX86Mov32RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RR", "!0r,!1r" },
-  { kX86Mov32RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RM", "!0r,[!1r+!2d]" },
-  { kX86Mov32RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { 0,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
-  { kX86Mov32RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RT", "!0r,fs:[!1d]" },
-  { kX86Mov32RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { 0,             0, 0xB8, 0, 0, 0, 0, 4 }, "Mov32RI", "!0r,!1d" },
-  { kX86Mov32MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { 0,             0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32MI", "[!0r+!1d],!2d" },
-  { kX86Mov32AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { 0,             0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Mov32TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32TI", "fs:[!0d],!1d" },
+  { kX86Mov32MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { 0,             0, 0x89, 0, 0, 0, 0, 0, false }, "Mov32MR", "[!0r+!1d],!2r" },
+  { kX86Mov32AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { 0,             0, 0x89, 0, 0, 0, 0, 0, false }, "Mov32AR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86Mov32TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0, 0x89, 0, 0, 0, 0, 0, false }, "Mov32TR", "fs:[!0d],!1r" },
+  { kX86Mov32RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RR", "!0r,!1r" },
+  { kX86Mov32RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RM", "!0r,[!1r+!2d]" },
+  { kX86Mov32RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { 0,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86Mov32RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, 0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RT", "!0r,fs:[!1d]" },
+  { kX86Mov32RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { 0,             0, 0xB8, 0, 0, 0, 0, 4, false }, "Mov32RI", "!0r,!1d" },
+  { kX86Mov32MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { 0,             0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov32MI", "[!0r+!1d],!2d" },
+  { kX86Mov32AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { 0,             0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov32AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Mov32TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov32TI", "fs:[!0d],!1d" },
 
-  { kX86Lea32RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1,      { 0,             0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RM", "!0r,[!1r+!2d]" },
+  { kX86Lea32RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1,      { 0,             0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea32RM", "!0r,[!1r+!2d]" },
+  { kX86Lea32RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12,                 { 0,             0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
 
-  { kX86Lea32RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12,                 { 0,             0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86Mov64MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { REX_W,             0, 0x89, 0, 0, 0, 0, 0, false }, "Mov64MR", "[!0r+!1d],!2r" },
+  { kX86Mov64AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { REX_W,             0, 0x89, 0, 0, 0, 0, 0, false }, "Mov64AR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86Mov64TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, REX_W, 0x89, 0, 0, 0, 0, 0, false }, "Mov64TR", "fs:[!0d],!1r" },
+  { kX86Mov64RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { REX_W,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RR", "!0r,!1r" },
+  { kX86Mov64RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { REX_W,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RM", "!0r,[!1r+!2d]" },
+  { kX86Mov64RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { REX_W,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86Mov64RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, REX_W, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RT", "!0r,fs:[!1d]" },
+  { kX86Mov64RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { REX_W,             0, 0xB8, 0, 0, 0, 0, 8, false }, "Mov64RI", "!0r,!1d" },
+  { kX86Mov64MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { REX_W,             0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov64MI", "[!0r+!1d],!2d" },
+  { kX86Mov64AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { REX_W,             0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov64AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Mov64TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, REX_W, 0xC7, 0, 0, 0, 0, 4, false }, "Mov64TI", "fs:[!0d],!1d" },
 
-  { kX86Mov64MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { REX_W,             0, 0x89, 0, 0, 0, 0, 0 }, "Mov64MR", "[!0r+!1d],!2r" },
-  { kX86Mov64AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { REX_W,             0, 0x89, 0, 0, 0, 0, 0 }, "Mov64AR", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86Mov64TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, REX_W, 0x89, 0, 0, 0, 0, 0 }, "Mov64TR", "fs:[!0d],!1r" },
-  { kX86Mov64RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { REX_W,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RR", "!0r,!1r" },
-  { kX86Mov64RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { REX_W,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RM", "!0r,[!1r+!2d]" },
-  { kX86Mov64RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { REX_W,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RA", "!0r,[!1r+!2r<<!3d+!4d]" },
-  { kX86Mov64RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, REX_W, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RT", "!0r,fs:[!1d]" },
-  { kX86Mov64RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { REX_W,             0, 0xB8, 0, 0, 0, 0, 8 }, "Mov64RI", "!0r,!1d" },
-  { kX86Mov64MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { REX_W,             0, 0xC7, 0, 0, 0, 0, 8 }, "Mov64MI", "[!0r+!1d],!2d" },
-  { kX86Mov64AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { REX_W,             0, 0xC7, 0, 0, 0, 0, 8 }, "Mov64AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Mov64TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, REX_W, 0xC7, 0, 0, 0, 0, 8 }, "Mov64TI", "fs:[!0d],!1d" },
+  { kX86Lea64RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1,      { REX_W,             0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea64RM", "!0r,[!1r+!2d]" },
+  { kX86Lea64RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12,                 { REX_W,             0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea64RA", "!0r,[!1r+!2r<<!3d+!4d]" },
 
-  { kX86Lea64RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1,      { REX_W,             0, 0x8D, 0, 0, 0, 0, 0 }, "Lea64RM", "!0r,[!1r+!2d]" },
+  { kX86Cmov32RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, { 0,     0, 0x0F, 0x40, 0, 0, 0, 0, false }, "Cmovcc32RR", "!2c !0r,!1r" },
+  { kX86Cmov64RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, { REX_W, 0, 0x0F, 0x40, 0, 0, 0, 0, false }, "Cmovcc64RR", "!2c !0r,!1r" },
 
-  { kX86Lea64RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12,                 { REX_W,             0, 0x8D, 0, 0, 0, 0, 0 }, "Lea64RA", "!0r,[!1r+!2r<<!3d+!4d]" },
-
-  { kX86Cmov32RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, {0,     0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc32RR", "!2c !0r,!1r" },
-  { kX86Cmov64RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, {REX_W, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc64RR", "!2c !0r,!1r" },
-
-  { kX86Cmov32RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, {0,     0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc32RM", "!3c !0r,[!1r+!2d]" },
-  { kX86Cmov64RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, {REX_W, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc64RM", "!3c !0r,[!1r+!2d]" },
+  { kX86Cmov32RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, { 0,     0, 0x0F, 0x40, 0, 0, 0, 0, false }, "Cmovcc32RM", "!3c !0r,[!1r+!2d]" },
+  { kX86Cmov64RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, { REX_W, 0, 0x0F, 0x40, 0, 0, 0, 0, false }, "Cmovcc64RM", "!3c !0r,[!1r+!2d]" },
 
 #define SHIFT_ENCODING_MAP(opname, modrm_opcode) \
-{ kX86 ## opname ## 8RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 8MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 8AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 8RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0,    0, 0xD2, 0, 0, modrm_opcode, 0,    1 }, #opname "8RC", "!0r,cl" }, \
-{ kX86 ## opname ## 8MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { 0,    0, 0xD2, 0, 0, modrm_opcode, 0,    1 }, #opname "8MC", "[!0r+!1d],cl" }, \
-{ kX86 ## opname ## 8AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { 0,    0, 0xD2, 0, 0, modrm_opcode, 0,    1 }, #opname "8AC", "[!0r+!1r<<!2d+!3d],cl" }, \
+{ kX86 ## opname ## 8RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1, true }, #opname "8RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 8MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1, true }, #opname "8MI", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 8AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1, true }, #opname "8AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 8RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0,    0, 0xD2, 0, 0, modrm_opcode, 0,    1, true }, #opname "8RC", "!0r,cl" }, \
+{ kX86 ## opname ## 8MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { 0,    0, 0xD2, 0, 0, modrm_opcode, 0,    1, true }, #opname "8MC", "[!0r+!1d],cl" }, \
+{ kX86 ## opname ## 8AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { 0,    0, 0xD2, 0, 0, modrm_opcode, 0,    1, true }, #opname "8AC", "[!0r+!1r<<!2d+!3d],cl" }, \
   \
-{ kX86 ## opname ## 16RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "16RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 16MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "16MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 16AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "16AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 16RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0,    1 }, #opname "16RC", "!0r,cl" }, \
-{ kX86 ## opname ## 16MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0,    1 }, #opname "16MC", "[!0r+!1d],cl" }, \
-{ kX86 ## opname ## 16AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0,    1 }, #opname "16AC", "[!0r+!1r<<!2d+!3d],cl" }, \
+{ kX86 ## opname ## 16RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "16RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 16MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "16MI", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 16AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "16AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 16RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0,    1, false }, #opname "16RC", "!0r,cl" }, \
+{ kX86 ## opname ## 16MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0,    1, false }, #opname "16MC", "[!0r+!1d],cl" }, \
+{ kX86 ## opname ## 16AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0,    1, false }, #opname "16AC", "[!0r+!1r<<!2d+!3d],cl" }, \
   \
-{ kX86 ## opname ## 32RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { 0,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "32RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 32MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { 0,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "32MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 32AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { 0,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 32RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0,    0, 0xD3, 0, 0, modrm_opcode, 0,    0 }, #opname "32RC", "!0r,cl" }, \
-{ kX86 ## opname ## 32MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { 0,    0, 0xD3, 0, 0, modrm_opcode, 0,    0 }, #opname "32MC", "[!0r+!1d],cl" }, \
-{ kX86 ## opname ## 32AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { 0,    0, 0xD3, 0, 0, modrm_opcode, 0,    0 }, #opname "32AC", "[!0r+!1r<<!2d+!3d],cl" }, \
+{ kX86 ## opname ## 32RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { 0,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "32RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 32MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { 0,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "32MI", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 32AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { 0,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 32RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0,    0, 0xD3, 0, 0, modrm_opcode, 0,    0, false }, #opname "32RC", "!0r,cl" }, \
+{ kX86 ## opname ## 32MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { 0,    0, 0xD3, 0, 0, modrm_opcode, 0,    0, false }, #opname "32MC", "[!0r+!1d],cl" }, \
+{ kX86 ## opname ## 32AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { 0,    0, 0xD3, 0, 0, modrm_opcode, 0,    0, false }, #opname "32AC", "[!0r+!1r<<!2d+!3d],cl" }, \
   \
-{ kX86 ## opname ## 64RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { REX_W,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "64RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 64MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { REX_W,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "64MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 64AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { REX_W,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "64AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 64RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { REX_W,    0, 0xD3, 0, 0, modrm_opcode, 0,    0 }, #opname "64RC", "!0r,cl" }, \
-{ kX86 ## opname ## 64MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { REX_W,    0, 0xD3, 0, 0, modrm_opcode, 0,    0 }, #opname "64MC", "[!0r+!1d],cl" }, \
-{ kX86 ## opname ## 64AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { REX_W,    0, 0xD3, 0, 0, modrm_opcode, 0,    0 }, #opname "64AC", "[!0r+!1r<<!2d+!3d],cl" }
+{ kX86 ## opname ## 64RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { REX_W,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "64RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 64MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { REX_W,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "64MI", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 64AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { REX_W,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "64AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 64RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { REX_W,    0, 0xD3, 0, 0, modrm_opcode, 0,    0, false }, #opname "64RC", "!0r,cl" }, \
+{ kX86 ## opname ## 64MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { REX_W,    0, 0xD3, 0, 0, modrm_opcode, 0,    0, false }, #opname "64MC", "[!0r+!1d],cl" }, \
+{ kX86 ## opname ## 64AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { REX_W,    0, 0xD3, 0, 0, modrm_opcode, 0,    0, false }, #opname "64AC", "[!0r+!1r<<!2d+!3d],cl" }
 
   SHIFT_ENCODING_MAP(Rol, 0x0),
   SHIFT_ENCODING_MAP(Ror, 0x1),
@@ -262,31 +260,31 @@
   SHIFT_ENCODING_MAP(Sar, 0x7),
 #undef SHIFT_ENCODING_MAP
 
-  { kX86Cmc, kNullary, NO_OPERAND, { 0, 0, 0xF5, 0, 0, 0, 0, 0}, "Cmc", "" },
-  { kX86Shld32RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { 0,    0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld32RRI", "!0r,!1r,!2d" },
-  { kX86Shld32MRI,  kMemRegImm,    IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0,    0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld32MRI", "[!0r+!1d],!2r,!3d" },
-  { kX86Shrd32RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { 0,    0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd32RRI", "!0r,!1r,!2d" },
-  { kX86Shrd32MRI,  kMemRegImm,    IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0,    0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd32MRI", "[!0r+!1d],!2r,!3d" },
-  { kX86Shld64RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { REX_W,    0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld64RRI", "!0r,!1r,!2d" },
-  { kX86Shld64MRI,  kMemRegImm,    IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W,    0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld64MRI", "[!0r+!1d],!2r,!3d" },
-  { kX86Shrd64RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { REX_W,    0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd64RRI", "!0r,!1r,!2d" },
-  { kX86Shrd64MRI,  kMemRegImm,    IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W,    0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd64MRI", "[!0r+!1d],!2r,!3d" },
+  { kX86Cmc, kNullary, NO_OPERAND, { 0, 0, 0xF5, 0, 0, 0, 0, 0, false }, "Cmc", "" },
+  { kX86Shld32RRI,  kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES,            { 0,    0, 0x0F, 0xA4, 0, 0, 0, 1, false }, "Shld32RRI", "!0r,!1r,!2d" },
+  { kX86Shld32MRI,  kMemRegImm,      IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0,    0, 0x0F, 0xA4, 0, 0, 0, 1, false }, "Shld32MRI", "[!0r+!1d],!2r,!3d" },
+  { kX86Shrd32RRI,  kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES,            { 0,    0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd32RRI", "!0r,!1r,!2d" },
+  { kX86Shrd32MRI,  kMemRegImm,      IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0,    0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd32MRI", "[!0r+!1d],!2r,!3d" },
+  { kX86Shld64RRI,  kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES,            { REX_W,    0, 0x0F, 0xA4, 0, 0, 0, 1, false }, "Shld64RRI", "!0r,!1r,!2d" },
+  { kX86Shld64MRI,  kMemRegImm,      IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W,    0, 0x0F, 0xA4, 0, 0, 0, 1, false }, "Shld64MRI", "[!0r+!1d],!2r,!3d" },
+  { kX86Shrd64RRI,  kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES,            { REX_W,    0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd64RRI", "!0r,!1r,!2d" },
+  { kX86Shrd64MRI,  kMemRegImm,      IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W,    0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd64MRI", "[!0r+!1d],!2r,!3d" },
 
-  { kX86Test8RI,  kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1}, "Test8RI", "!0r,!1d" },
-  { kX86Test8MI,  kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1}, "Test8MI", "[!0r+!1d],!2d" },
-  { kX86Test8AI,  kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1}, "Test8AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Test16RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2}, "Test16RI", "!0r,!1d" },
-  { kX86Test16MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2}, "Test16MI", "[!0r+!1d],!2d" },
-  { kX86Test16AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2}, "Test16AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Test32RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4}, "Test32RI", "!0r,!1d" },
-  { kX86Test32MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4}, "Test32MI", "[!0r+!1d],!2d" },
-  { kX86Test32AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4}, "Test32AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Test64RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 8}, "Test64RI", "!0r,!1d" },
-  { kX86Test64MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 8}, "Test64MI", "[!0r+!1d],!2d" },
-  { kX86Test64AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 8}, "Test64AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Test8RI,  kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8RI", "!0r,!1d" },
+  { kX86Test8MI,  kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8MI", "[!0r+!1d],!2d" },
+  { kX86Test8AI,  kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Test16RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16RI", "!0r,!1d" },
+  { kX86Test16MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16MI", "[!0r+!1d],!2d" },
+  { kX86Test16AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Test32RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32RI", "!0r,!1d" },
+  { kX86Test32MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32MI", "[!0r+!1d],!2d" },
+  { kX86Test32AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Test64RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64RI", "!0r,!1d" },
+  { kX86Test64MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64MI", "[!0r+!1d],!2d" },
+  { kX86Test64AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64AI", "[!0r+!1r<<!2d+!3d],!4d" },
 
-  { kX86Test32RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { 0,    0, 0x85, 0, 0, 0, 0, 0}, "Test32RR", "!0r,!1r" },
-  { kX86Test64RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { REX_W, 0, 0x85, 0, 0, 0, 0, 0}, "Test64RR", "!0r,!1r" },
+  { kX86Test32RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { 0,    0, 0x85, 0, 0, 0, 0, 0, false }, "Test32RR", "!0r,!1r" },
+  { kX86Test64RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { REX_W, 0, 0x85, 0, 0, 0, 0, 0, false }, "Test64RR", "!0r,!1r" },
 
 #define UNARY_ENCODING_MAP(opname, modrm, is_store, sets_ccodes, \
                            reg, reg_kind, reg_flags, \
@@ -294,18 +292,18 @@
                            arr, arr_kind, arr_flags, imm, \
                            b_flags, hw_flags, w_flags, \
                            b_format, hw_format, w_format) \
-{ kX86 ## opname ## 8 ## reg,  reg_kind,                      reg_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #reg, b_format "!0r" }, \
-{ kX86 ## opname ## 8 ## mem,  mem_kind, IS_LOAD | is_store | mem_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #mem, b_format "[!0r+!1d]" }, \
-{ kX86 ## opname ## 8 ## arr,  arr_kind, IS_LOAD | is_store | arr_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #arr, b_format "[!0r+!1r<<!2d+!3d]" }, \
-{ kX86 ## opname ## 16 ## reg, reg_kind,                      reg_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #reg, hw_format "!0r" }, \
-{ kX86 ## opname ## 16 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #mem, hw_format "[!0r+!1d]" }, \
-{ kX86 ## opname ## 16 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #arr, hw_format "[!0r+!1r<<!2d+!3d]" }, \
-{ kX86 ## opname ## 32 ## reg, reg_kind,                      reg_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #reg, w_format "!0r" }, \
-{ kX86 ## opname ## 32 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #mem, w_format "[!0r+!1d]" }, \
-{ kX86 ## opname ## 32 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #arr, w_format "[!0r+!1r<<!2d+!3d]" }, \
-{ kX86 ## opname ## 64 ## reg, reg_kind,                      reg_flags | w_flags  | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "64" #reg, w_format "!0r" }, \
-{ kX86 ## opname ## 64 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags  | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "64" #mem, w_format "[!0r+!1d]" }, \
-{ kX86 ## opname ## 64 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags  | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "64" #arr, w_format "[!0r+!1r<<!2d+!3d]" }
+{ kX86 ## opname ## 8 ## reg,  reg_kind,                      reg_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0, true }, #opname "8" #reg, b_format "!0r" }, \
+{ kX86 ## opname ## 8 ## mem,  mem_kind, IS_LOAD | is_store | mem_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0, true }, #opname "8" #mem, b_format "[!0r+!1d]" }, \
+{ kX86 ## opname ## 8 ## arr,  arr_kind, IS_LOAD | is_store | arr_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0, true }, #opname "8" #arr, b_format "[!0r+!1r<<!2d+!3d]" }, \
+{ kX86 ## opname ## 16 ## reg, reg_kind,                      reg_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1, false }, #opname "16" #reg, hw_format "!0r" }, \
+{ kX86 ## opname ## 16 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1, false }, #opname "16" #mem, hw_format "[!0r+!1d]" }, \
+{ kX86 ## opname ## 16 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1, false }, #opname "16" #arr, hw_format "[!0r+!1r<<!2d+!3d]" }, \
+{ kX86 ## opname ## 32 ## reg, reg_kind,                      reg_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "32" #reg, w_format "!0r" }, \
+{ kX86 ## opname ## 32 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "32" #mem, w_format "[!0r+!1d]" }, \
+{ kX86 ## opname ## 32 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "32" #arr, w_format "[!0r+!1r<<!2d+!3d]" }, \
+{ kX86 ## opname ## 64 ## reg, reg_kind,                      reg_flags | w_flags  | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "64" #reg, w_format "!0r" }, \
+{ kX86 ## opname ## 64 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags  | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "64" #mem, w_format "[!0r+!1d]" }, \
+{ kX86 ## opname ## 64 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags  | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "64" #arr, w_format "[!0r+!1r<<!2d+!3d]" }
 
   UNARY_ENCODING_MAP(Not, 0x2, IS_STORE, 0,           R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""),
   UNARY_ENCODING_MAP(Neg, 0x3, IS_STORE, SETS_CCODES, R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""),
@@ -316,34 +314,34 @@
   UNARY_ENCODING_MAP(Idivmod, 0x7, 0, SETS_CCODES, DaR, kReg, IS_UNARY_OP | REG_USE0, DaM, kMem, IS_BINARY_OP | REG_USE0, DaA, kArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"),
 #undef UNARY_ENCODING_MAP
 
-  { kx86Cdq32Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA,                                  { 0, 0, 0x99, 0, 0, 0, 0, 0 }, "Cdq", "" },
-  { kx86Cqo64Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA,                                  { REX_W, 0, 0x99, 0, 0, 0, 0, 0 }, "Cqo", "" },
-  { kX86Bswap32R, kRegOpcode, IS_UNARY_OP | REG_DEF0_USE0,                                 { 0, 0, 0x0F, 0xC8, 0, 0, 0, 0 }, "Bswap32R", "!0r" },
-  { kX86Push32R,  kRegOpcode, IS_UNARY_OP | REG_USE0 | REG_USE_SP | REG_DEF_SP | IS_STORE, { 0, 0, 0x50, 0,    0, 0, 0, 0 }, "Push32R",  "!0r" },
-  { kX86Pop32R,   kRegOpcode, IS_UNARY_OP | REG_DEF0 | REG_USE_SP | REG_DEF_SP | IS_LOAD,  { 0, 0, 0x58, 0,    0, 0, 0, 0 }, "Pop32R",   "!0r" },
+  { kx86Cdq32Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA,                                  { 0,     0, 0x99, 0,    0, 0, 0, 0, false }, "Cdq", "" },
+  { kx86Cqo64Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA,                                  { REX_W, 0, 0x99, 0,    0, 0, 0, 0, false }, "Cqo", "" },
+  { kX86Bswap32R, kRegOpcode, IS_UNARY_OP | REG_DEF0_USE0,                                 { 0,     0, 0x0F, 0xC8, 0, 0, 0, 0, false }, "Bswap32R", "!0r" },
+  { kX86Push32R,  kRegOpcode, IS_UNARY_OP | REG_USE0 | REG_USE_SP | REG_DEF_SP | IS_STORE, { 0,     0, 0x50, 0,    0, 0, 0, 0, false }, "Push32R",  "!0r" },
+  { kX86Pop32R,   kRegOpcode, IS_UNARY_OP | REG_DEF0 | REG_USE_SP | REG_DEF_SP | IS_LOAD,  { 0,     0, 0x58, 0,    0, 0, 0, 0, false }, "Pop32R",   "!0r" },
 
 #define EXT_0F_ENCODING_MAP(opname, prefix, opcode, reg_def) \
-{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \
-{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
+{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \
+{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
 
 #define EXT_0F_REX_W_ENCODING_MAP(opname, prefix, opcode, reg_def) \
-{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \
-{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
+{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \
+{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
 
 #define EXT_0F_ENCODING2_MAP(opname, prefix, opcode, opcode2, reg_def) \
-{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \
-{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
+{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \
+{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
 
   EXT_0F_ENCODING_MAP(Movsd, 0xF2, 0x10, REG_DEF0),
-  { kX86MovsdMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovsdMR", "[!0r+!1d],!2r" },
-  { kX86MovsdAR, kArrayReg, IS_STORE | IS_QUIN_OP     | REG_USE014, { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovsdAR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86MovsdMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovsdMR", "[!0r+!1d],!2r" },
+  { kX86MovsdAR, kArrayReg, IS_STORE | IS_QUIN_OP     | REG_USE014, { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovsdAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
   EXT_0F_ENCODING_MAP(Movss, 0xF3, 0x10, REG_DEF0),
-  { kX86MovssMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0xF3, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovssMR", "[!0r+!1d],!2r" },
-  { kX86MovssAR, kArrayReg, IS_STORE | IS_QUIN_OP     | REG_USE014, { 0xF3, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovssAR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86MovssMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0xF3, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovssMR", "[!0r+!1d],!2r" },
+  { kX86MovssAR, kArrayReg, IS_STORE | IS_QUIN_OP     | REG_USE014, { 0xF3, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovssAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
   EXT_0F_ENCODING_MAP(Cvtsi2sd,  0xF2, 0x2A, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvtsi2ss,  0xF3, 0x2A, REG_DEF0),
@@ -393,84 +391,84 @@
   EXT_0F_ENCODING2_MAP(Phaddw,   0x66, 0x38, 0x01, REG_DEF0_USE0),
   EXT_0F_ENCODING2_MAP(Phaddd,   0x66, 0x38, 0x02, REG_DEF0_USE0),
 
-  { kX86PextrbRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x14, 0, 0, 1 }, "PextbRRI", "!0r,!1r,!2d" },
-  { kX86PextrwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0xC5, 0x00, 0, 0, 1 }, "PextwRRI", "!0r,!1r,!2d" },
-  { kX86PextrdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1 }, "PextdRRI", "!0r,!1r,!2d" },
+  { kX86PextrbRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x14, 0, 0, 1, false }, "PextbRRI", "!0r,!1r,!2d" },
+  { kX86PextrwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0xC5, 0x00, 0, 0, 1, false }, "PextwRRI", "!0r,!1r,!2d" },
+  { kX86PextrdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextdRRI", "!0r,!1r,!2d" },
 
-  { kX86PshuflwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0xF2, 0, 0x0F, 0x70, 0, 0, 0, 1 }, "PshuflwRRI", "!0r,!1r,!2d" },
-  { kX86PshufdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x70, 0, 0, 0, 1 }, "PshuffRRI", "!0r,!1r,!2d" },
+  { kX86PshuflwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0xF2, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuflwRRI", "!0r,!1r,!2d" },
+  { kX86PshufdRRI,  kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuffRRI", "!0r,!1r,!2d" },
 
-  { kX86PsrawRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 4, 0, 1 }, "PsrawRI", "!0r,!1d" },
-  { kX86PsradRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 4, 0, 1 }, "PsradRI", "!0r,!1d" },
-  { kX86PsrlwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 2, 0, 1 }, "PsrlwRI", "!0r,!1d" },
-  { kX86PsrldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 2, 0, 1 }, "PsrldRI", "!0r,!1d" },
-  { kX86PsrlqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 2, 0, 1 }, "PsrlqRI", "!0r,!1d" },
-  { kX86PsllwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 6, 0, 1 }, "PsllwRI", "!0r,!1d" },
-  { kX86PslldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 6, 0, 1 }, "PslldRI", "!0r,!1d" },
-  { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1 }, "PsllqRI", "!0r,!1d" },
+  { kX86PsrawRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 4, 0, 1, false }, "PsrawRI", "!0r,!1d" },
+  { kX86PsradRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 4, 0, 1, false }, "PsradRI", "!0r,!1d" },
+  { kX86PsrlwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 2, 0, 1, false }, "PsrlwRI", "!0r,!1d" },
+  { kX86PsrldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 2, 0, 1, false }, "PsrldRI", "!0r,!1d" },
+  { kX86PsrlqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 2, 0, 1, false }, "PsrlqRI", "!0r,!1d" },
+  { kX86PsllwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 6, 0, 1, false }, "PsllwRI", "!0r,!1d" },
+  { kX86PslldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 6, 0, 1, false }, "PslldRI", "!0r,!1d" },
+  { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1, false }, "PsllqRI", "!0r,!1d" },
 
-  { kX86Fild32M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDB, 0x00, 0, 0, 0, 0 }, "Fild32M", "[!0r,!1d]" },
-  { kX86Fild64M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDF, 0x00, 0, 5, 0, 0 }, "Fild64M", "[!0r,!1d]" },
-  { kX86Fstp32M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xD9, 0x00, 0, 3, 0, 0 }, "FstpsM", "[!0r,!1d]" },
-  { kX86Fstp64M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0 }, "FstpdM", "[!0r,!1d]" },
+  { kX86Fild32M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDB, 0x00, 0, 0, 0, 0, false }, "Fild32M", "[!0r,!1d]" },
+  { kX86Fild64M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDF, 0x00, 0, 5, 0, 0, false }, "Fild64M", "[!0r,!1d]" },
+  { kX86Fstp32M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xD9, 0x00, 0, 3, 0, 0, false }, "FstpsM", "[!0r,!1d]" },
+  { kX86Fstp64M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0, false }, "FstpdM", "[!0r,!1d]" },
 
   EXT_0F_ENCODING_MAP(Mova128,    0x66, 0x6F, REG_DEF0),
-  { kX86Mova128MR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0 }, "Mova128MR", "[!0r+!1d],!2r" },
-  { kX86Mova128AR, kArrayReg, IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0 }, "Mova128AR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86Mova128MR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0, false }, "Mova128MR", "[!0r+!1d],!2r" },
+  { kX86Mova128AR, kArrayReg, IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0, false }, "Mova128AR", "[!0r+!1r<<!2d+!3d],!4r" },
 
 
   EXT_0F_ENCODING_MAP(Movups,    0x0, 0x10, REG_DEF0),
-  { kX86MovupsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsMR", "[!0r+!1d],!2r" },
-  { kX86MovupsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86MovupsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovupsMR", "[!0r+!1d],!2r" },
+  { kX86MovupsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovupsAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
   EXT_0F_ENCODING_MAP(Movaps,    0x0, 0x28, REG_DEF0),
-  { kX86MovapsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0 }, "MovapsMR", "[!0r+!1d],!2r" },
-  { kX86MovapsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0 }, "MovapsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86MovapsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0, false }, "MovapsMR", "[!0r+!1d],!2r" },
+  { kX86MovapsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0, false }, "MovapsAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
-  { kX86MovlpsRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01,  { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0 }, "MovlpsRM", "!0r,[!1r+!2d]" },
-  { kX86MovlpsRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0 }, "MovlpsRA", "!0r,[!1r+!2r<<!3d+!4d]" },
-  { kX86MovlpsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0 }, "MovlpsMR", "[!0r+!1d],!2r" },
-  { kX86MovlpsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0 }, "MovlpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86MovlpsRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01,  { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0, false }, "MovlpsRM", "!0r,[!1r+!2d]" },
+  { kX86MovlpsRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0, false }, "MovlpsRA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86MovlpsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,            { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0, false }, "MovlpsMR", "[!0r+!1d],!2r" },
+  { kX86MovlpsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014,           { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0, false }, "MovlpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
-  { kX86MovhpsRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01,  { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0 }, "MovhpsRM", "!0r,[!1r+!2d]" },
-  { kX86MovhpsRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0 }, "MovhpsRA", "!0r,[!1r+!2r<<!3d+!4d]" },
-  { kX86MovhpsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0 }, "MovhpsMR", "[!0r+!1d],!2r" },
-  { kX86MovhpsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0 }, "MovhpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86MovhpsRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01,  { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0, false }, "MovhpsRM", "!0r,[!1r+!2d]" },
+  { kX86MovhpsRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0, false }, "MovhpsRA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86MovhpsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,            { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0, false }, "MovhpsMR", "[!0r+!1d],!2r" },
+  { kX86MovhpsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014,           { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0, false }, "MovhpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
   EXT_0F_ENCODING_MAP(Movdxr,    0x66, 0x6E, REG_DEF0),
   EXT_0F_REX_W_ENCODING_MAP(Movqxr, 0x66, 0x6E, REG_DEF0),
-  { kX86MovqrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0   | REG_USE1,   { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovqrxRR", "!0r,!1r" },
-  { kX86MovqrxMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovqrxMR", "[!0r+!1d],!2r" },
-  { kX86MovqrxAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovqrxAR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86MovqrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0   | REG_USE1,   { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovqrxRR", "!0r,!1r" },
+  { kX86MovqrxMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovqrxMR", "[!0r+!1d],!2r" },
+  { kX86MovqrxAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovqrxAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
-  { kX86MovdrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0   | REG_USE1,   { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxRR", "!0r,!1r" },
-  { kX86MovdrxMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxMR", "[!0r+!1d],!2r" },
-  { kX86MovdrxAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxAR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86MovdrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0   | REG_USE1,   { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovdrxRR", "!0r,!1r" },
+  { kX86MovdrxMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovdrxMR", "[!0r+!1d],!2r" },
+  { kX86MovdrxAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovdrxAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
-  { kX86MovsxdRR, kRegReg,      IS_BINARY_OP | REG_DEF0 | REG_USE1,              { REX_W, 0, 0x63, 0, 0, 0, 0, 0 }, "MovsxdRR", "!0r,!1r" },
-  { kX86MovsxdRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE1,  { REX_W, 0, 0x63, 0, 0, 0, 0, 0 }, "MovsxdRM", "!0r,[!1r+!2d]" },
-  { kX86MovsxdRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE12, { REX_W, 0, 0x63, 0, 0, 0, 0, 0 }, "MovsxdRA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86MovsxdRR, kRegReg,      IS_BINARY_OP | REG_DEF0 | REG_USE1,              { REX_W, 0, 0x63, 0, 0, 0, 0, 0, false }, "MovsxdRR", "!0r,!1r" },
+  { kX86MovsxdRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE1,  { REX_W, 0, 0x63, 0, 0, 0, 0, 0, false }, "MovsxdRM", "!0r,[!1r+!2d]" },
+  { kX86MovsxdRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE12, { REX_W, 0, 0x63, 0, 0, 0, 0, 0, false }, "MovsxdRA", "!0r,[!1r+!2r<<!3d+!4d]" },
 
-  { kX86Set8R, kRegCond,              IS_BINARY_OP   | REG_DEF0  | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8R", "!1c !0r" },
-  { kX86Set8M, kMemCond,   IS_STORE | IS_TERTIARY_OP | REG_USE0  | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8M", "!2c [!0r+!1d]" },
-  { kX86Set8A, kArrayCond, IS_STORE | IS_QUIN_OP     | REG_USE01 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8A", "!4c [!0r+!1r<<!2d+!3d]" },
+  { kX86Set8R, kRegCond,              IS_BINARY_OP   | REG_DEF0  | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0, true }, "Set8R", "!1c !0r" },
+  { kX86Set8M, kMemCond,   IS_STORE | IS_TERTIARY_OP | REG_USE0  | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0, false }, "Set8M", "!2c [!0r+!1d]" },
+  { kX86Set8A, kArrayCond, IS_STORE | IS_QUIN_OP     | REG_USE01 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0, false }, "Set8A", "!4c [!0r+!1r<<!2d+!3d]" },
 
   // TODO: load/store?
   // Encode the modrm opcode as an extra opcode byte to avoid computation during assembly.
-  { kX86Mfence, kReg,                 NO_OPERAND,     { 0, 0, 0x0F, 0xAE, 0, 6, 0, 0 }, "Mfence", "" },
+  { kX86Mfence, kReg,                 NO_OPERAND,     { 0, 0, 0x0F, 0xAE, 0, 6, 0, 0, false }, "Mfence", "" },
 
   EXT_0F_ENCODING_MAP(Imul16,  0x66, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
   EXT_0F_ENCODING_MAP(Imul32,  0x00, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
   EXT_0F_ENCODING_MAP(Imul64,  REX_W, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
 
-  { kX86CmpxchgRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE01 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "!0r,!1r" },
-  { kX86CmpxchgMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "[!0r+!1d],!2r" },
-  { kX86CmpxchgAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86LockCmpxchgMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Lock Cmpxchg", "[!0r+!1d],!2r" },
-  { kX86LockCmpxchgAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Lock Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86LockCmpxchg8bM, kMem,   IS_STORE | IS_BINARY_OP | REG_USE0 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES, { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0 }, "Lock Cmpxchg8b", "[!0r+!1d]" },
-  { kX86LockCmpxchg8bA, kArray, IS_STORE | IS_QUAD_OP | REG_USE01 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES, { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0 }, "Lock Cmpxchg8b", "[!0r+!1r<<!2d+!3d]" },
-  { kX86XchgMR, kMemReg, IS_STORE | IS_LOAD | IS_TERTIARY_OP | REG_DEF2 | REG_USE02, { 0, 0, 0x87, 0, 0, 0, 0, 0 }, "Xchg", "[!0r+!1d],!2r" },
+  { kX86CmpxchgRR, kRegRegStore,  IS_BINARY_OP | REG_DEF0 | REG_USE01 | REG_DEFA_USEA | SETS_CCODES,   { 0,    0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Cmpxchg", "!0r,!1r" },
+  { kX86CmpxchgMR, kMemReg,       IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0,    0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Cmpxchg", "[!0r+!1d],!2r" },
+  { kX86CmpxchgAR, kArrayReg,     IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES,    { 0,    0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86LockCmpxchgMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Lock Cmpxchg", "[!0r+!1d],!2r" },
+  { kX86LockCmpxchgAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES,    { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Lock Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86LockCmpxchg64M, kMem,     IS_STORE | IS_BINARY_OP | REG_USE0 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES, { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0, false }, "Lock Cmpxchg8b", "[!0r+!1d]" },
+  { kX86LockCmpxchg64A, kArray,   IS_STORE | IS_QUAD_OP | REG_USE01 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES,  { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0, false }, "Lock Cmpxchg8b", "[!0r+!1r<<!2d+!3d]" },
+  { kX86XchgMR, kMemReg,          IS_STORE | IS_LOAD | IS_TERTIARY_OP | REG_DEF2 | REG_USE02,          { 0, 0, 0x87, 0, 0, 0, 0, 0, false }, "Xchg", "[!0r+!1d],!2r" },
 
   EXT_0F_ENCODING_MAP(Movzx8,  0x00, 0xB6, REG_DEF0),
   EXT_0F_ENCODING_MAP(Movzx16, 0x00, 0xB7, REG_DEF0),
@@ -478,28 +476,39 @@
   EXT_0F_ENCODING_MAP(Movsx16, 0x00, 0xBF, REG_DEF0),
 #undef EXT_0F_ENCODING_MAP
 
-  { kX86Jcc8,  kJcc,  IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP | USES_CCODES, { 0,             0, 0x70, 0,    0, 0, 0, 0 }, "Jcc8",  "!1c !0t" },
-  { kX86Jcc32, kJcc,  IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP | USES_CCODES, { 0,             0, 0x0F, 0x80, 0, 0, 0, 0 }, "Jcc32", "!1c !0t" },
-  { kX86Jmp8,  kJmp,  IS_UNARY_OP  | IS_BRANCH | NEEDS_FIXUP,               { 0,             0, 0xEB, 0,    0, 0, 0, 0 }, "Jmp8",  "!0t" },
-  { kX86Jmp32, kJmp,  IS_UNARY_OP  | IS_BRANCH | NEEDS_FIXUP,               { 0,             0, 0xE9, 0,    0, 0, 0, 0 }, "Jmp32", "!0t" },
-  { kX86JmpR,  kJmp,  IS_UNARY_OP  | IS_BRANCH | REG_USE0,                  { 0,             0, 0xFF, 0,    0, 4, 0, 0 }, "JmpR",  "!0r" },
-  { kX86Jecxz8, kJmp, NO_OPERAND   | IS_BRANCH | NEEDS_FIXUP | REG_USEC,    { 0,             0, 0xE3, 0,    0, 0, 0, 0 }, "Jecxz", "!0t" },
-  { kX86JmpT,  kJmp,  IS_UNARY_OP  | IS_BRANCH | IS_LOAD,                   { THREAD_PREFIX, 0, 0xFF, 0,    0, 4, 0, 0 }, "JmpT",  "fs:[!0d]" },
-  { kX86CallR, kCall, IS_UNARY_OP  | IS_BRANCH | REG_USE0,                  { 0,             0, 0xE8, 0,    0, 0, 0, 0 }, "CallR", "!0r" },
-  { kX86CallM, kCall, IS_BINARY_OP | IS_BRANCH | IS_LOAD | REG_USE0,        { 0,             0, 0xFF, 0,    0, 2, 0, 0 }, "CallM", "[!0r+!1d]" },
-  { kX86CallA, kCall, IS_QUAD_OP   | IS_BRANCH | IS_LOAD | REG_USE01,       { 0,             0, 0xFF, 0,    0, 2, 0, 0 }, "CallA", "[!0r+!1r<<!2d+!3d]" },
-  { kX86CallT, kCall, IS_UNARY_OP  | IS_BRANCH | IS_LOAD,                   { THREAD_PREFIX, 0, 0xFF, 0,    0, 2, 0, 0 }, "CallT", "fs:[!0d]" },
-  { kX86CallI, kCall, IS_UNARY_OP  | IS_BRANCH,                             { 0,             0, 0xE8, 0,    0, 0, 0, 4 }, "CallI", "!0d" },
-  { kX86Ret,   kNullary, NO_OPERAND | IS_BRANCH,                            { 0,             0, 0xC3, 0,    0, 0, 0, 0 }, "Ret", "" },
+  { kX86Jcc8,  kJcc,  IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP | USES_CCODES, { 0,             0, 0x70, 0,    0, 0, 0, 0, false }, "Jcc8",  "!1c !0t" },
+  { kX86Jcc32, kJcc,  IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP | USES_CCODES, { 0,             0, 0x0F, 0x80, 0, 0, 0, 0, false }, "Jcc32", "!1c !0t" },
+  { kX86Jmp8,  kJmp,  IS_UNARY_OP  | IS_BRANCH | NEEDS_FIXUP,               { 0,             0, 0xEB, 0,    0, 0, 0, 0, false }, "Jmp8",  "!0t" },
+  { kX86Jmp32, kJmp,  IS_UNARY_OP  | IS_BRANCH | NEEDS_FIXUP,               { 0,             0, 0xE9, 0,    0, 0, 0, 0, false }, "Jmp32", "!0t" },
+  { kX86JmpR,  kJmp,  IS_UNARY_OP  | IS_BRANCH | REG_USE0,                  { 0,             0, 0xFF, 0,    0, 4, 0, 0, false }, "JmpR",  "!0r" },
+  { kX86Jecxz8, kJmp, NO_OPERAND   | IS_BRANCH | NEEDS_FIXUP | REG_USEC,    { 0,             0, 0xE3, 0,    0, 0, 0, 0, false }, "Jecxz", "!0t" },
+  { kX86JmpT,  kJmp,  IS_UNARY_OP  | IS_BRANCH | IS_LOAD,                   { THREAD_PREFIX, 0, 0xFF, 0,    0, 4, 0, 0, false }, "JmpT",  "fs:[!0d]" },
+  { kX86CallR, kCall, IS_UNARY_OP  | IS_BRANCH | REG_USE0,                  { 0,             0, 0xE8, 0,    0, 0, 0, 0, false }, "CallR", "!0r" },
+  { kX86CallM, kCall, IS_BINARY_OP | IS_BRANCH | IS_LOAD | REG_USE0,        { 0,             0, 0xFF, 0,    0, 2, 0, 0, false }, "CallM", "[!0r+!1d]" },
+  { kX86CallA, kCall, IS_QUAD_OP   | IS_BRANCH | IS_LOAD | REG_USE01,       { 0,             0, 0xFF, 0,    0, 2, 0, 0, false }, "CallA", "[!0r+!1r<<!2d+!3d]" },
+  { kX86CallT, kCall, IS_UNARY_OP  | IS_BRANCH | IS_LOAD,                   { THREAD_PREFIX, 0, 0xFF, 0,    0, 2, 0, 0, false }, "CallT", "fs:[!0d]" },
+  { kX86CallI, kCall, IS_UNARY_OP  | IS_BRANCH,                             { 0,             0, 0xE8, 0,    0, 0, 0, 4, false }, "CallI", "!0d" },
+  { kX86Ret,   kNullary, NO_OPERAND | IS_BRANCH,                            { 0,             0, 0xC3, 0,    0, 0, 0, 0, false }, "Ret", "" },
 
-  { kX86StartOfMethod, kMacro,  IS_UNARY_OP | SETS_CCODES,             { 0, 0, 0,    0, 0, 0, 0, 0 }, "StartOfMethod", "!0r" },
-  { kX86PcRelLoadRA,   kPcRel,  IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8B, 0, 0, 0, 0, 0 }, "PcRelLoadRA",   "!0r,[!1r+!2r<<!3d+!4p]" },
-  { kX86PcRelAdr,      kPcRel,  IS_LOAD | IS_BINARY_OP | REG_DEF0,     { 0, 0, 0xB8, 0, 0, 0, 0, 4 }, "PcRelAdr",      "!0r,!1d" },
-  { kX86RepneScasw, kPrefix2Nullary, NO_OPERAND | REG_USEA | REG_USEC | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0 }, "RepNE ScasW", "" },
+  { kX86StartOfMethod, kMacro,  IS_UNARY_OP | SETS_CCODES,             { 0, 0, 0,    0, 0, 0, 0, 0, false }, "StartOfMethod", "!0r" },
+  { kX86PcRelLoadRA,   kPcRel,  IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8B, 0, 0, 0, 0, 0, false }, "PcRelLoadRA",   "!0r,[!1r+!2r<<!3d+!4p]" },
+  { kX86PcRelAdr,      kPcRel,  IS_LOAD | IS_BINARY_OP | REG_DEF0,     { 0, 0, 0xB8, 0, 0, 0, 0, 4, false }, "PcRelAdr",      "!0r,!1d" },
+  { kX86RepneScasw,    kNullary, NO_OPERAND | REG_USEA | REG_USEC | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0, false }, "RepNE ScasW", "" },
 };
 
-size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displacement,
-                               int reg_r, int reg_x, bool has_sib) {
+static bool NeedsRex(int32_t raw_reg) {
+  return RegStorage::RegNum(raw_reg) > 7;
+}
+
+static uint8_t LowRegisterBits(int32_t raw_reg) {
+  uint8_t low_reg = RegStorage::RegNum(raw_reg) & kRegNumMask32;  // 3 bits
+  DCHECK_LT(low_reg, 8);
+  return low_reg;
+}
+
+size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_index,
+                               int32_t raw_base, bool has_sib, bool r8_form, bool r8_reg_reg_form,
+                               int32_t displacement) {
   size_t size = 0;
   if (entry->skeleton.prefix1 > 0) {
     ++size;
@@ -507,9 +516,17 @@
       ++size;
     }
   }
-  if ((NeedsRex(base) || NeedsRex(reg_r) || NeedsRex(reg_x)) &&
-       entry->skeleton.prefix1 != REX_W && entry->skeleton.prefix2 != REX_W) {
-    ++size;  // REX_R
+  if (Gen64Bit() || kIsDebugBuild) {
+    bool registers_need_rex_prefix =
+        NeedsRex(raw_reg) || NeedsRex(raw_index) || NeedsRex(raw_base) ||
+        (r8_form && RegStorage::RegNum(raw_reg) > 4) ||
+        (r8_reg_reg_form && RegStorage::RegNum(raw_base) > 4);
+    if (registers_need_rex_prefix &&
+        entry->skeleton.prefix1 != REX_W && entry->skeleton.prefix2 != REX_W) {
+      DCHECK(Gen64Bit()) << "Attempt to use " << entry->name << " on a non-byte register "
+          << RegStorage::RegNum(raw_reg);
+      ++size;  // rex
+    }
   }
   ++size;  // opcode
   if (entry->skeleton.opcode == 0x0F) {
@@ -519,16 +536,16 @@
     }
   }
   ++size;  // modrm
-  if (has_sib || LowRegisterBits(RegStorage::RegNum(base)) == rs_rX86_SP.GetRegNum()
+  if (has_sib || LowRegisterBits(raw_base) == rs_rX86_SP.GetRegNum()
       || (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX)) {
     // SP requires a SIB byte.
     // GS access also needs a SIB byte for absolute adressing in 64-bit mode.
     ++size;
   }
-  if (displacement != 0 || LowRegisterBits(RegStorage::RegNum(base)) == rs_rBP.GetRegNum()) {
+  if (displacement != 0 || LowRegisterBits(raw_base) == rs_rBP.GetRegNum()) {
     // BP requires an explicit displacement, even when it's 0.
     if (entry->opcode != kX86Lea32RA && entry->opcode != kX86Lea64RA) {
-      DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), 0ULL) << entry->name;
+      DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), UINT64_C(0)) << entry->name;
     }
     size += IS_SIMM8(displacement) ? 1 : 4;
   }
@@ -539,112 +556,153 @@
 int X86Mir2Lir::GetInsnSize(LIR* lir) {
   DCHECK(!IsPseudoLirOp(lir->opcode));
   const X86EncodingMap* entry = &X86Mir2Lir::EncodingMap[lir->opcode];
+  DCHECK_EQ(entry->opcode, lir->opcode) << entry->name;
   switch (entry->kind) {
     case kData:
-      return 4;  // 4 bytes of data
+      return 4;  // 4 bytes of data.
     case kNop:
-      return lir->operands[0];  // length of nop is sole operand
+      return lir->operands[0];  // Length of nop is sole operand.
     case kNullary:
-      return 1;  // 1 byte of opcode
-    case kPrefix2Nullary:
-      return 3;  // 1 byte of opcode + 2 prefixes
+      // Substract 1 for modrm which isn't used.
+      DCHECK_EQ(false, entry->skeleton.r8_form);
+      return ComputeSize(entry, NO_REG, NO_REG, NO_REG, false, false, false, 0) - 1;
     case kRegOpcode:  // lir operands - 0: reg
-      // substract 1 for modrm
-      return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false) - 1;
+      // Substract 1 for modrm  which isn't used.
+      DCHECK_EQ(false, entry->skeleton.r8_form);
+      // Note: RegOpcode form passes reg as REX_R but encodes it as REX_B.
+      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, false, false, false, 0) - 1;
     case kReg:  // lir operands - 0: reg
-      return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false);
+      // Note: Reg form passes reg as REX_R but encodes it as REX_B.
+      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG,
+                         false, entry->skeleton.r8_form, false, 0);
     case kMem:  // lir operands - 0: base, 1: disp
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false);
+      DCHECK_EQ(false, entry->skeleton.r8_form);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], false, false, false,
+                         lir->operands[1]);
     case kArray:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
-      return ComputeSize(entry, lir->operands[0], lir->operands[3],
-                         NO_REG, lir->operands[1], true);
+      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], true, false, false,
+                         lir->operands[3]);
     case kMemReg:  // lir operands - 0: base, 1: disp, 2: reg
-      return ComputeSize(entry, lir->operands[0], lir->operands[1],
-                         lir->operands[2], NO_REG, false);
+      return ComputeSize(entry, lir->operands[2], NO_REG, lir->operands[0],
+                         false, entry->skeleton.r8_form, false, lir->operands[1]);
     case kMemRegImm:  // lir operands - 0: base, 1: disp, 2: reg 3: immediate
-      return ComputeSize(entry, lir->operands[0], lir->operands[1],
-                         lir->operands[2], NO_REG, false);
+      return ComputeSize(entry, lir->operands[2], NO_REG, lir->operands[0],
+                         false, entry->skeleton.r8_form, false, lir->operands[1]);
     case kArrayReg:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
-      return ComputeSize(entry, lir->operands[0], lir->operands[3],
-                         lir->operands[4], lir->operands[1], true);
+      return ComputeSize(entry, lir->operands[4], lir->operands[1], lir->operands[0],
+                         true, entry->skeleton.r8_form, false, lir->operands[3]);
     case kThreadReg:  // lir operands - 0: disp, 1: reg
-      return ComputeSize(entry, 0, lir->operands[0], lir->operands[1], NO_REG, false);
+      DCHECK_EQ(false, entry->skeleton.r8_form);
+      // Thread displacement size is always 32bit.
+      return ComputeSize(entry, lir->operands[1], NO_REG, NO_REG, false, false, false,
+                         0x12345678);
     case kRegReg:  // lir operands - 0: reg1, 1: reg2
-      return ComputeSize(entry, 0, 0, lir->operands[0], lir->operands[1], false);
+      // Note: RegReg form passes reg2 as index but encodes it using base.
+      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG,
+                         false, entry->skeleton.r8_form, entry->skeleton.r8_form, 0);
     case kRegRegStore:  // lir operands - 0: reg2, 1: reg1
-      return ComputeSize(entry, 0, 0, lir->operands[1], lir->operands[0], false);
+      // Note: RegRegStore form passes reg1 as index but encodes it using base.
+      return ComputeSize(entry, lir->operands[1], lir->operands[0], NO_REG,
+                         false, entry->skeleton.r8_form, entry->skeleton.r8_form, 0);
     case kRegMem:  // lir operands - 0: reg, 1: base, 2: disp
-      return ComputeSize(entry, lir->operands[1], lir->operands[2],
-                         lir->operands[0], NO_REG, false);
+      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1],
+                         false, entry->skeleton.r8_form, false, lir->operands[2]);
     case kRegArray:   // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
-      return ComputeSize(entry, lir->operands[1], lir->operands[4],
-                         lir->operands[0], lir->operands[2], true);
+      return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1],
+                         true, entry->skeleton.r8_form, false, lir->operands[4]);
     case kRegThread:  // lir operands - 0: reg, 1: disp
-      // displacement size is always 32bit
-      return ComputeSize(entry, 0, 0x12345678, lir->operands[0], NO_REG, false);
+      // Thread displacement size is always 32bit.
+      DCHECK_EQ(false, entry->skeleton.r8_form);
+      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, false, false, false,
+                         0x12345678);
     case kRegImm: {  // lir operands - 0: reg, 1: immediate
-      size_t size = ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false);
+      size_t size = ComputeSize(entry, lir->operands[0], NO_REG, NO_REG,
+                         false, entry->skeleton.r8_form, false, 0);
+      // AX opcodes don't require the modrm byte.
       if (entry->skeleton.ax_opcode == 0) {
         return size;
       } else {
-        // AX opcodes don't require the modrm byte.
-        int reg = lir->operands[0];
-        return size - (RegStorage::RegNum(reg) == rs_rAX.GetRegNum() ? 1 : 0);
+        return size - (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum() ? 1 : 0);
       }
     }
     case kMemImm:  // lir operands - 0: base, 1: disp, 2: immediate
-      return ComputeSize(entry, lir->operands[0], lir->operands[1],
-                         NO_REG, lir->operands[0], false);
+      DCHECK_EQ(false, entry->skeleton.r8_form);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0],
+                         false, false, false, lir->operands[1]);
     case kArrayImm:  // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
-      return ComputeSize(entry, lir->operands[0], lir->operands[3],
-                         NO_REG, lir->operands[1], true);
+      DCHECK_EQ(false, entry->skeleton.r8_form);
+      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0],
+                         true, false, false, lir->operands[3]);
     case kThreadImm:  // lir operands - 0: disp, 1: imm
-      // displacement size is always 32bit
-      return ComputeSize(entry, 0, 0x12345678, NO_REG, NO_REG, false);
-    case kRegRegImm:  // lir operands - 0: reg, 1: reg, 2: imm
-    case kRegRegImmRev:
-      return ComputeSize(entry, 0, 0, lir->operands[0], lir->operands[1], false);
+      // Thread displacement size is always 32bit.
+      DCHECK_EQ(false, entry->skeleton.r8_form);
+      return ComputeSize(entry, NO_REG, NO_REG, NO_REG, false, false, false, 0x12345678);
+    case kRegRegImm:  // lir operands - 0: reg1, 1: reg2, 2: imm
+      // Note: RegRegImm form passes reg2 as index but encodes it using base.
+      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG,
+                         false, entry->skeleton.r8_form, entry->skeleton.r8_form, 0);
+    case kRegRegImmStore:  // lir operands - 0: reg2, 1: reg1, 2: imm
+      // Note: RegRegImmStore form passes reg1 as index but encodes it using base.
+      return ComputeSize(entry, lir->operands[1], lir->operands[0], NO_REG,
+                         false, entry->skeleton.r8_form, entry->skeleton.r8_form, 0);
     case kRegMemImm:  // lir operands - 0: reg, 1: base, 2: disp, 3: imm
-      return ComputeSize(entry, lir->operands[1], lir->operands[2],
-                         lir->operands[0], NO_REG, false);
+      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1],
+                         false, entry->skeleton.r8_form, false, lir->operands[2]);
     case kRegArrayImm:  // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp, 5: imm
-      return ComputeSize(entry, lir->operands[1], lir->operands[4],
-                         lir->operands[0], lir->operands[2], true);
+      return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1],
+                         true, entry->skeleton.r8_form, false, lir->operands[4]);
     case kMovRegImm:  // lir operands - 0: reg, 1: immediate
-      return (entry->skeleton.prefix1 != 0 || NeedsRex(lir->operands[0])?1:0) +
-             1 + entry->skeleton.immediate_bytes;
+      return ((entry->skeleton.prefix1 != 0 || NeedsRex(lir->operands[0])) ? 1 : 0) + 1 +
+          entry->skeleton.immediate_bytes;
     case kShiftRegImm:  // lir operands - 0: reg, 1: immediate
       // Shift by immediate one has a shorter opcode.
-      return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false) -
-             (lir->operands[1] == 1 ? 1 : 0);
+      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG,
+                         false, entry->skeleton.r8_form, false, 0) -
+          (lir->operands[1] == 1 ? 1 : 0);
     case kShiftMemImm:  // lir operands - 0: base, 1: disp, 2: immediate
       // Shift by immediate one has a shorter opcode.
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false) -
-             (lir->operands[2] == 1 ? 1 : 0);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0],
+                         false, entry->skeleton.r8_form, false, lir->operands[1]) -
+          (lir->operands[2] == 1 ? 1 : 0);
     case kShiftArrayImm:  // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
       // Shift by immediate one has a shorter opcode.
-      return ComputeSize(entry, lir->operands[0], lir->operands[3],
-                         NO_REG, lir->operands[1], true) -
-             (lir->operands[4] == 1 ? 1 : 0);
+      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0],
+                         true, entry->skeleton.r8_form, false, lir->operands[3]) -
+          (lir->operands[4] == 1 ? 1 : 0);
     case kShiftRegCl:  // lir operands - 0: reg, 1: cl
-      return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false);
+      DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[1]));
+      // Note: ShiftRegCl form passes reg as reg but encodes it using base.
+      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG,
+                         false, entry->skeleton.r8_form, false, 0);
     case kShiftMemCl:  // lir operands - 0: base, 1: disp, 2: cl
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false);
-    case kShiftArrayCl:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
-      return ComputeSize(entry, lir->operands[0], lir->operands[3],
-                         lir->operands[4], lir->operands[1], true);
+      DCHECK_EQ(false, entry->skeleton.r8_form);
+      DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[2]));
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0],
+                         false, false, false, lir->operands[1]);
+    case kShiftArrayCl:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cl
+      DCHECK_EQ(false, entry->skeleton.r8_form);
+      DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[4]));
+      return ComputeSize(entry, lir->operands[4], lir->operands[1], lir->operands[0],
+                         true, false, false, lir->operands[3]);
     case kRegCond:  // lir operands - 0: reg, 1: cond
-      return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false);
+      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG,
+                         false, entry->skeleton.r8_form, false, 0);
     case kMemCond:  // lir operands - 0: base, 1: disp, 2: cond
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false);
+      DCHECK_EQ(false, entry->skeleton.r8_form);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], false, false, false,
+                         lir->operands[1]);
     case kArrayCond:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cond
-      return ComputeSize(entry, lir->operands[0], lir->operands[3],
-                         NO_REG, lir->operands[1], true);
-    case kRegRegCond:  // lir operands - 0: reg, 1: reg, 2: cond
-      return ComputeSize(entry, 0, 0, lir->operands[0], lir->operands[1], false);
-    case kRegMemCond:  // lir operands - 0: reg, 1: reg, 2: disp, 3:cond
-      return ComputeSize(entry, lir->operands[1], lir->operands[2],
-                         lir->operands[0], lir->operands[1], false);
+      DCHECK_EQ(false, entry->skeleton.r8_form);
+      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], true, false, false,
+                         lir->operands[3]);
+    case kRegRegCond:  // lir operands - 0: reg1, 1: reg2, 2: cond
+      // Note: RegRegCond form passes reg2 as index but encodes it using base.
+      DCHECK_EQ(false, entry->skeleton.r8_form);
+      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, false, false, false, 0);
+    case kRegMemCond:  // lir operands - 0: reg, 1: base, 2: disp, 3:cond
+      DCHECK_EQ(false, entry->skeleton.r8_form);
+      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], false, false, false,
+                         lir->operands[2]);
     case kJcc:
       if (lir->opcode == kX86Jcc8) {
         return 2;  // opcode + rel8
@@ -658,8 +716,8 @@
       } else if (lir->opcode == kX86Jmp32) {
         return 5;  // opcode + rel32
       } else if (lir->opcode == kX86JmpT) {
-        // displacement size is always 32bit
-        return ComputeSize(entry, 0, 0x12345678, NO_REG, NO_REG, false);
+        // Thread displacement size is always 32bit.
+        return ComputeSize(entry, NO_REG, NO_REG, NO_REG, false, false, false, 0x12345678);
       } else {
         DCHECK(lir->opcode == kX86JmpR);
         if (NeedsRex(lir->operands[0])) {
@@ -673,13 +731,14 @@
         case kX86CallI: return 5;  // opcode 0:disp
         case kX86CallR: return 2;  // opcode modrm
         case kX86CallM:  // lir operands - 0: base, 1: disp
-          return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false);
+          return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], false, false, false,
+                             lir->operands[1]);
         case kX86CallA:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
-          return ComputeSize(entry, lir->operands[0], lir->operands[3],
-                             NO_REG, lir->operands[1], true);
+          return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], true, false, false,
+                             lir->operands[3]);
         case kX86CallT:  // lir operands - 0: disp
-          // displacement size is always 32bit
-          return ComputeSize(entry, 0, 0x12345678, NO_REG, NO_REG, false);
+          // Thread displacement size is always 32bit.
+          return ComputeSize(entry, NO_REG, NO_REG, NO_REG, false, false, false, 0x12345678);
         default:
           break;
       }
@@ -687,43 +746,76 @@
     case kPcRel:
       if (entry->opcode == kX86PcRelLoadRA) {
         // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table
-        return ComputeSize(entry, lir->operands[1], 0x12345678,
-                           lir->operands[0], lir->operands[2], true);
+        // Force the displacement size to 32bit, it will hold a computed offset later.
+        return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1],
+                           true, false, false, 0x12345678);
       } else {
-        DCHECK(entry->opcode == kX86PcRelAdr);
+        DCHECK_EQ(entry->opcode, kX86PcRelAdr);
         return 5;  // opcode with reg + 4 byte immediate
       }
     case kMacro:  // lir operands - 0: reg
       DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod));
       return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ +
-          ComputeSize(&X86Mir2Lir::EncodingMap[Gen64Bit() ? kX86Sub64RI : kX86Sub32RI], 0, 0,
-                      lir->operands[0], NO_REG, false) -
-          // shorter ax encoding
-          (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum()  ? 1 : 0);
-    default:
+          ComputeSize(&X86Mir2Lir::EncodingMap[Gen64Bit() ? kX86Sub64RI : kX86Sub32RI],
+                      lir->operands[0], NO_REG, NO_REG, false, false, false, 0) -
+              // Shorter ax encoding.
+              (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum()  ? 1 : 0);
+    case kUnimplemented:
       break;
   }
   UNIMPLEMENTED(FATAL) << "Unimplemented size encoding for: " << entry->name;
   return 0;
 }
 
-void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry) {
-  EmitPrefix(entry, NO_REG, NO_REG, NO_REG);
+static uint8_t ModrmForDisp(int base, int disp) {
+  // BP requires an explicit disp, so do not omit it in the 0 case
+  if (disp == 0 && RegStorage::RegNum(base) != rs_rBP.GetRegNum()) {
+    return 0;
+  } else if (IS_SIMM8(disp)) {
+    return 1;
+  } else {
+    return 2;
+  }
+}
+
+void X86Mir2Lir::CheckValidByteRegister(const X86EncodingMap* entry, int32_t raw_reg) {
+  if (kIsDebugBuild) {
+    // Sanity check r8_form is correctly specified.
+    if (entry->skeleton.r8_form) {
+      CHECK(strchr(entry->name, '8') != nullptr) << entry->name;
+    } else {
+      if (entry->skeleton.immediate_bytes != 1) {  // Ignore ...I8 instructions.
+        if (!StartsWith(entry->name, "Movzx8") && !StartsWith(entry->name, "Movsx8")) {
+          CHECK(strchr(entry->name, '8') == nullptr) << entry->name;
+        }
+      }
+    }
+    if (RegStorage::RegNum(raw_reg) >= 4) {
+      // ah, bh, ch and dh are not valid registers in 32-bit.
+      CHECK(Gen64Bit() || !entry->skeleton.r8_form)
+               << "Invalid register " << static_cast<int>(RegStorage::RegNum(raw_reg))
+               << " for instruction " << entry->name << " in "
+               << PrettyMethod(cu_->method_idx, *cu_->dex_file);
+    }
+  }
 }
 
 void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry,
-                            uint8_t reg_r, uint8_t reg_x, uint8_t reg_b) {
+                            int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b,
+                            bool r8_form) {
   // REX.WRXB
   // W - 64-bit operand
   // R - MODRM.reg
   // X - SIB.index
   // B - MODRM.rm/SIB.base
-  bool force = false;
   bool w = (entry->skeleton.prefix1 == REX_W) || (entry->skeleton.prefix2 == REX_W);
-  bool r = NeedsRex(reg_r);
-  bool x = NeedsRex(reg_x);
-  bool b = NeedsRex(reg_b);
-  uint8_t rex = force ? 0x40 : 0;
+  bool r = NeedsRex(raw_reg_r);
+  bool x = NeedsRex(raw_reg_x);
+  bool b = NeedsRex(raw_reg_b);
+  uint8_t rex = 0;
+  if (r8_form && RegStorage::RegNum(raw_reg_r) > 4) {
+    rex |= 0x40;  // REX.0000
+  }
   if (w) {
     rex |= 0x48;  // REX.W000
   }
@@ -738,7 +830,7 @@
   }
   if (entry->skeleton.prefix1 != 0) {
     if (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX) {
-      // 64 bit adresses by GS, not FS
+      // 64 bit addresses by GS, not FS.
       code_buffer_.push_back(THREAD_PREFIX_GS);
     } else {
       if (entry->skeleton.prefix1 == REX_W) {
@@ -762,6 +854,7 @@
     DCHECK_EQ(0, entry->skeleton.prefix2);
   }
   if (rex != 0) {
+    DCHECK(Gen64Bit());
     code_buffer_.push_back(rex);
   }
 }
@@ -781,28 +874,14 @@
   }
 }
 
-void X86Mir2Lir::EmitPrefixAndOpcode(const X86EncodingMap* entry) {
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG);
-}
-
 void X86Mir2Lir::EmitPrefixAndOpcode(const X86EncodingMap* entry,
-                                     uint8_t reg_r, uint8_t reg_x, uint8_t reg_b) {
-  EmitPrefix(entry, reg_r, reg_x, reg_b);
+                                     int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b,
+                                     bool r8_form) {
+  EmitPrefix(entry, raw_reg_r, raw_reg_x, raw_reg_b, r8_form);
   EmitOpcode(entry);
 }
 
-static uint8_t ModrmForDisp(int base, int disp) {
-  // BP requires an explicit disp, so do not omit it in the 0 case
-  if (disp == 0 && RegStorage::RegNum(base) != rs_rBP.GetRegNum()) {
-    return 0;
-  } else if (IS_SIMM8(disp)) {
-    return 1;
-  } else {
-    return 2;
-  }
-}
-
-void X86Mir2Lir::EmitDisp(uint8_t base, int disp) {
+void X86Mir2Lir::EmitDisp(uint8_t base, int32_t disp) {
   // BP requires an explicit disp, so do not omit it in the 0 case
   if (disp == 0 && RegStorage::RegNum(base) != rs_rBP.GetRegNum()) {
     return;
@@ -829,13 +908,12 @@
   }
 }
 
-void X86Mir2Lir::EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int disp) {
-  DCHECK_LT(RegStorage::RegNum(reg_or_opcode), 8);
-  DCHECK_LT(RegStorage::RegNum(base), 8);
-  uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (RegStorage::RegNum(reg_or_opcode) << 3) |
-     RegStorage::RegNum(base);
+void X86Mir2Lir::EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int32_t disp) {
+  DCHECK_LT(reg_or_opcode, 8);
+  DCHECK_LT(base, 8);
+  uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (reg_or_opcode << 3) | base;
   code_buffer_.push_back(modrm);
-  if (RegStorage::RegNum(base) == rs_rX86_SP.GetRegNum()) {
+  if (base == rs_rX86_SP.GetRegNum()) {
     // Special SIB for SP base
     code_buffer_.push_back(0 << 6 | rs_rX86_SP.GetRegNum() << 3 | rs_rX86_SP.GetRegNum());
   }
@@ -843,7 +921,7 @@
 }
 
 void X86Mir2Lir::EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t index,
-                                  int scale, int disp) {
+                                  int scale, int32_t disp) {
   DCHECK_LT(RegStorage::RegNum(reg_or_opcode), 8);
   uint8_t modrm = (ModrmForDisp(base, disp) << 6) | RegStorage::RegNum(reg_or_opcode) << 3 |
       rs_rX86_SP.GetRegNum();
@@ -868,11 +946,7 @@
       code_buffer_.push_back((imm >> 8) & 0xFF);
       break;
     case 4:
-      if (imm <0) {
-        CHECK_EQ((-imm) & 0x0FFFFFFFFl, -imm);
-      } else {
-        CHECK_EQ(imm & 0x0FFFFFFFFl, imm);
-      }
+      DCHECK(IS_SIMM32(imm));
       code_buffer_.push_back(imm & 0xFF);
       code_buffer_.push_back((imm >> 8) & 0xFF);
       code_buffer_.push_back((imm >> 16) & 0xFF);
@@ -895,128 +969,126 @@
   }
 }
 
-void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg) {
-  EmitPrefixAndOpcode(entry, reg, NO_REG, NO_REG);
-  reg = LowRegisterBits(reg);
-  // There's no 3-byte instruction with +rd
-  DCHECK(entry->skeleton.opcode != 0x0F ||
-         (entry->skeleton.extra_opcode1 != 0x38 && entry->skeleton.extra_opcode1 != 0x3A));
-  DCHECK(!RegStorage::IsFloat(reg));
-  DCHECK_LT(RegStorage::RegNum(reg), 8);
-  code_buffer_.back() += RegStorage::RegNum(reg);
+void X86Mir2Lir::EmitNullary(const X86EncodingMap* entry) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false);
+  DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, uint8_t reg) {
-  EmitPrefixAndOpcode(entry, reg, NO_REG, NO_REG);
-  reg = LowRegisterBits(reg);
-  if (RegStorage::RegNum(reg) >= 4) {
-    DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " "
-        << static_cast<int>(RegStorage::RegNum(reg))
-        << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-  }
-  DCHECK_LT(RegStorage::RegNum(reg), 8);
-  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
+void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, int32_t raw_reg) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg, false);
+  // There's no 3-byte instruction with +rd
+  DCHECK(entry->skeleton.opcode != 0x0F ||
+         (entry->skeleton.extra_opcode1 != 0x38 && entry->skeleton.extra_opcode1 != 0x3A));
+  DCHECK(!RegStorage::IsFloat(raw_reg));
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  code_buffer_.back() += low_reg;
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
+}
+
+void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, int32_t raw_reg) {
+  CheckValidByteRegister(entry, raw_reg);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form);
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
   code_buffer_.push_back(modrm);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp) {
-  EmitPrefix(entry, NO_REG, NO_REG, base);
-  base = LowRegisterBits(base);
+void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_base, false);
   code_buffer_.push_back(entry->skeleton.opcode);
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
   DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitOpArray(const X86EncodingMap* entry, uint8_t base, uint8_t index,
-                             int scale, int disp) {
-  EmitPrefixAndOpcode(entry, NO_REG, index, base);
-  index = LowRegisterBits(index);
-  base = LowRegisterBits(base);
-  EmitModrmSibDisp(entry->skeleton.modrm_opcode, base, index, scale, disp);
+void X86Mir2Lir::EmitOpArray(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index,
+                             int scale, int32_t disp) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base, false);
+  uint8_t low_index = LowRegisterBits(raw_index);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmSibDisp(entry->skeleton.modrm_opcode, low_base, low_index, scale, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-uint8_t X86Mir2Lir::LowRegisterBits(uint8_t reg) {
-  uint8_t res = reg;
-  res = reg & kRegNumMask32;  // 3 bits
-  return res;
-}
-
-bool X86Mir2Lir::NeedsRex(uint8_t reg) {
-  return RegStorage::RegNum(reg) > 7;
-}
-
-void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry,
-                       uint8_t base, int disp, uint8_t reg) {
-  EmitPrefixAndOpcode(entry, reg, NO_REG, base);
-  reg = LowRegisterBits(reg);
-  base = LowRegisterBits(base);
-  if (RegStorage::RegNum(reg) >= 4) {
-    DCHECK(strchr(entry->name, '8') == NULL ||
-           entry->opcode == kX86Movzx8RM || entry->opcode == kX86Movsx8RM)
-        << entry->name << " " << static_cast<int>(RegStorage::RegNum(reg))
-        << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-  }
-  EmitModrmDisp(reg, base, disp);
+void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
+                            int32_t raw_reg) {
+  CheckValidByteRegister(entry, raw_reg);
+  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base, entry->skeleton.r8_form);
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmDisp(low_reg, low_base, disp);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitRegMem(const X86EncodingMap* entry,
-                       uint8_t reg, uint8_t base, int disp) {
+void X86Mir2Lir::EmitRegMem(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base,
+                            int32_t disp) {
   // Opcode will flip operands.
-  EmitMemReg(entry, base, disp, reg);
+  EmitMemReg(entry, raw_base, disp, raw_reg);
 }
 
-void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, uint8_t reg, uint8_t base,
-                              uint8_t index, int scale, int disp) {
-  EmitPrefixAndOpcode(entry, reg, index, base);
-  reg = LowRegisterBits(reg);
-  index = LowRegisterBits(index);
-  base = LowRegisterBits(base);
-  EmitModrmSibDisp(reg, base, index, scale, disp);
+void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base,
+                              int32_t raw_index, int scale, int32_t disp) {
+  CheckValidByteRegister(entry, raw_reg);
+  EmitPrefixAndOpcode(entry, raw_reg, raw_index, raw_base, entry->skeleton.r8_form);
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  uint8_t low_index = LowRegisterBits(raw_index);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmSibDisp(low_reg, low_base, low_index, scale, disp);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitArrayReg(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale,
-                              int disp, uint8_t reg) {
+void X86Mir2Lir::EmitArrayReg(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index,
+                              int scale, int32_t disp, int32_t raw_reg) {
   // Opcode will flip operands.
-  EmitRegArray(entry, reg, base, index, scale, disp);
+  EmitRegArray(entry, raw_reg, raw_base, raw_index, scale, disp);
 }
 
-void X86Mir2Lir::EmitArrayImm(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale,
-                              int disp, int32_t imm) {
-  EmitPrefixAndOpcode(entry, NO_REG, index, base);
-  index = LowRegisterBits(index);
-  base = LowRegisterBits(base);
-  EmitModrmSibDisp(entry->skeleton.modrm_opcode, base, index, scale, disp);
+void X86Mir2Lir::EmitMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
+                            int32_t imm) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base, false);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   EmitImm(entry, imm);
 }
 
-void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp) {
+void X86Mir2Lir::EmitArrayImm(const X86EncodingMap* entry,
+                              int32_t raw_base, int32_t raw_index, int scale, int32_t disp,
+                              int32_t imm) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base, false);
+  uint8_t low_index = LowRegisterBits(raw_index);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmSibDisp(entry->skeleton.modrm_opcode, low_base, low_index, scale, disp);
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+  EmitImm(entry, imm);
+}
+
+void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, int32_t raw_reg, int32_t disp) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
   DCHECK_NE(entry->skeleton.prefix1, 0);
-  EmitPrefixAndOpcode(entry, reg, NO_REG, NO_REG);
-  reg = LowRegisterBits(reg);
-  if (RegStorage::RegNum(reg) >= 4) {
-    DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " "
-        << static_cast<int>(RegStorage::RegNum(reg))
-        << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-  }
-  DCHECK_LT(RegStorage::RegNum(reg), 8);
-  EmitModrmThread(RegStorage::RegNum(reg));
+  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, NO_REG, false);
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  EmitModrmThread(low_reg);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
   code_buffer_.push_back((disp >> 16) & 0xFF);
@@ -1026,79 +1098,67 @@
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2) {
-  EmitPrefixAndOpcode(entry, reg1, NO_REG, reg2);
-  reg1 = LowRegisterBits(reg1);
-  reg2 = LowRegisterBits(reg2);
-  DCHECK_LT(RegStorage::RegNum(reg1), 8);
-  DCHECK_LT(RegStorage::RegNum(reg2), 8);
-  uint8_t modrm = (3 << 6) | (RegStorage::RegNum(reg1) << 3) | RegStorage::RegNum(reg2);
+void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2) {
+  CheckValidByteRegister(entry, raw_reg1);
+  CheckValidByteRegister(entry, raw_reg2);
+  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2, entry->skeleton.r8_form);
+  uint8_t low_reg1 = LowRegisterBits(raw_reg1);
+  uint8_t low_reg2 = LowRegisterBits(raw_reg2);
+  uint8_t modrm = (3 << 6) | (low_reg1 << 3) | low_reg2;
   code_buffer_.push_back(modrm);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitRegRegImm(const X86EncodingMap* entry,
-                          uint8_t reg1, uint8_t reg2, int32_t imm) {
-  EmitPrefixAndOpcode(entry, reg1, NO_REG, reg2);
-  reg1 = LowRegisterBits(reg1);
-  reg2 = LowRegisterBits(reg2);
-  DCHECK_LT(RegStorage::RegNum(reg1), 8);
-  DCHECK_LT(RegStorage::RegNum(reg2), 8);
-  uint8_t modrm = (3 << 6) | (RegStorage::RegNum(reg1) << 3) | RegStorage::RegNum(reg2);
+void X86Mir2Lir::EmitRegRegImm(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2,
+                               int32_t imm) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2, false);
+  uint8_t low_reg1 = LowRegisterBits(raw_reg1);
+  uint8_t low_reg2 = LowRegisterBits(raw_reg2);
+  uint8_t modrm = (3 << 6) | (low_reg1 << 3) | low_reg2;
   code_buffer_.push_back(modrm);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   EmitImm(entry, imm);
 }
 
-void X86Mir2Lir::EmitRegRegImmRev(const X86EncodingMap* entry,
-                                  uint8_t reg1, uint8_t reg2, int32_t imm) {
-  EmitRegRegImm(entry, reg2, reg1, imm);
-}
-
 void X86Mir2Lir::EmitRegMemImm(const X86EncodingMap* entry,
-                               uint8_t reg, uint8_t base, int disp, int32_t imm) {
-  EmitPrefixAndOpcode(entry, reg, NO_REG, base);
-  reg = LowRegisterBits(reg);
-  base = LowRegisterBits(base);
-  DCHECK(!RegStorage::IsFloat(reg));
-  DCHECK_LT(RegStorage::RegNum(reg), 8);
-  EmitModrmDisp(reg, base, disp);
+                               int32_t raw_reg, int32_t raw_base, int disp, int32_t imm) {
+  DCHECK(!RegStorage::IsFloat(raw_reg));
+  CheckValidByteRegister(entry, raw_reg);
+  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base, entry->skeleton.r8_form);
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmDisp(low_reg, low_base, disp);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   EmitImm(entry, imm);
 }
 
 void X86Mir2Lir::EmitMemRegImm(const X86EncodingMap* entry,
-                               uint8_t base, int disp, uint8_t reg, int32_t imm) {
-  EmitRegMemImm(entry, reg, base, disp, imm);
+                               int32_t raw_base, int32_t disp, int32_t raw_reg, int32_t imm) {
+  // Opcode will flip operands.
+  EmitRegMemImm(entry, raw_reg, raw_base, disp, imm);
 }
 
-void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) {
-  EmitPrefix(entry, NO_REG, NO_REG, reg);
-  if (RegStorage::RegNum(reg) == rs_rAX.GetRegNum() && entry->skeleton.ax_opcode != 0) {
+void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm) {
+  CheckValidByteRegister(entry, raw_reg);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form);
+  if (RegStorage::RegNum(raw_reg) == rs_rAX.GetRegNum() && entry->skeleton.ax_opcode != 0) {
     code_buffer_.push_back(entry->skeleton.ax_opcode);
   } else {
-    reg = LowRegisterBits(reg);
+    uint8_t low_reg = LowRegisterBits(raw_reg);
     EmitOpcode(entry);
-    uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
+    uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
     code_buffer_.push_back(modrm);
   }
   EmitImm(entry, imm);
 }
 
-void X86Mir2Lir::EmitMemImm(const X86EncodingMap* entry, uint8_t base, int disp, int32_t imm) {
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, base);
-  base = LowRegisterBits(base);
-  EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  EmitImm(entry, imm);
-}
-
-void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int disp, int imm) {
-  EmitPrefixAndOpcode(entry);
+void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int32_t disp, int32_t imm) {
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false);
   EmitModrmThread(entry->skeleton.modrm_opcode);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
@@ -1108,11 +1168,11 @@
   DCHECK_EQ(entry->skeleton.ax_opcode, 0);
 }
 
-void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int64_t imm) {
-  EmitPrefix(entry, NO_REG, NO_REG, reg);
-  reg = LowRegisterBits(reg);
-  DCHECK_LT(RegStorage::RegNum(reg), 8);
-  code_buffer_.push_back(0xB8 + RegStorage::RegNum(reg));
+void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, int32_t raw_reg, int64_t imm) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, false);
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  code_buffer_.push_back(0xB8 + low_reg);
   switch (entry->skeleton.immediate_bytes) {
     case 4:
       code_buffer_.push_back(imm & 0xFF);
@@ -1136,9 +1196,9 @@
   }
 }
 
-void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) {
-  EmitPrefix(entry, NO_REG, NO_REG, reg);
-  reg = LowRegisterBits(reg);
+void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm) {
+  CheckValidByteRegister(entry, raw_reg);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form);
   if (imm != 1) {
     code_buffer_.push_back(entry->skeleton.opcode);
   } else {
@@ -1148,13 +1208,8 @@
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
   DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  if (RegStorage::RegNum(reg) >= 4) {
-    DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " "
-        << static_cast<int>(RegStorage::RegNum(reg))
-        << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-  }
-  DCHECK_LT(RegStorage::RegNum(reg), 8);
-  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
   code_buffer_.push_back(modrm);
   if (imm != 1) {
     DCHECK_EQ(entry->skeleton.immediate_bytes, 1);
@@ -1163,40 +1218,40 @@
   }
 }
 
-void X86Mir2Lir::EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_t cl) {
-  DCHECK_EQ(cl, static_cast<uint8_t>(rs_rCX.GetReg()));
-  EmitPrefix(entry, reg, NO_REG, NO_REG);
-  reg = LowRegisterBits(reg);
+void X86Mir2Lir::EmitShiftRegCl(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_cl) {
+  CheckValidByteRegister(entry, raw_reg);
+  DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(raw_cl));
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form);
   code_buffer_.push_back(entry->skeleton.opcode);
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
   DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  DCHECK_LT(RegStorage::RegNum(reg), 8);
-  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
   code_buffer_.push_back(modrm);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base,
-                                int displacement, uint8_t cl) {
-  DCHECK_EQ(cl, static_cast<uint8_t>(rs_rCX.GetReg()));
-  EmitPrefix(entry, NO_REG, NO_REG, base);
-  base = LowRegisterBits(base);
+void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, int32_t raw_base,
+                                int32_t displacement, int32_t raw_cl) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(raw_cl));
+  EmitPrefix(entry, NO_REG, NO_REG, raw_base, false);
   code_buffer_.push_back(entry->skeleton.opcode);
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
   DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  DCHECK_LT(RegStorage::RegNum(base), 8);
-  EmitModrmDisp(entry->skeleton.modrm_opcode, base, displacement);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, displacement);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, uint8_t base,
-                                int displacement, int imm) {
-  EmitPrefix(entry, NO_REG, NO_REG, base);
-  base = LowRegisterBits(base);
+void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
+                                 int32_t imm) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_base, false);
   if (imm != 1) {
     code_buffer_.push_back(entry->skeleton.opcode);
   } else {
@@ -1206,7 +1261,8 @@
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
   DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  EmitModrmDisp(entry->skeleton.modrm_opcode, base, displacement);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp);
   if (imm != 1) {
     DCHECK_EQ(entry->skeleton.immediate_bytes, 1);
     DCHECK(IS_SIMM8(imm));
@@ -1214,23 +1270,26 @@
   }
 }
 
-void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition) {
-  EmitPrefix(entry, reg, NO_REG, NO_REG);
-  reg = LowRegisterBits(reg);
+void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, int32_t raw_reg, int32_t cc) {
+  CheckValidByteRegister(entry, raw_reg);
+  EmitPrefix(entry, raw_reg, NO_REG, NO_REG, entry->skeleton.r8_form);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0x0F, entry->skeleton.opcode);
   code_buffer_.push_back(0x0F);
   DCHECK_EQ(0x90, entry->skeleton.extra_opcode1);
-  code_buffer_.push_back(0x90 | condition);
+  DCHECK_GE(cc, 0);
+  DCHECK_LT(cc, 16);
+  code_buffer_.push_back(0x90 | cc);
   DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  DCHECK_LT(RegStorage::RegNum(reg), 8);
-  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
   code_buffer_.push_back(modrm);
   DCHECK_EQ(entry->skeleton.immediate_bytes, 0);
 }
 
-void X86Mir2Lir::EmitMemCond(const X86EncodingMap* entry, uint8_t base, int displacement,
-                             uint8_t condition) {
+void X86Mir2Lir::EmitMemCond(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
+                             int32_t cc) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
   if (entry->skeleton.prefix1 != 0) {
     code_buffer_.push_back(entry->skeleton.prefix1);
     if (entry->skeleton.prefix2 != 0) {
@@ -1243,61 +1302,63 @@
   DCHECK_EQ(0x0F, entry->skeleton.opcode);
   code_buffer_.push_back(0x0F);
   DCHECK_EQ(0x90, entry->skeleton.extra_opcode1);
-  code_buffer_.push_back(0x90 | condition);
+  DCHECK_GE(cc, 0);
+  DCHECK_LT(cc, 16);
+  code_buffer_.push_back(0x90 | cc);
   DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  EmitModrmDisp(entry->skeleton.modrm_opcode, base, displacement);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp);
   DCHECK_EQ(entry->skeleton.immediate_bytes, 0);
 }
 
-void X86Mir2Lir::EmitRegRegCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2,
-                                uint8_t condition) {
-  // Generate prefix and opcode without the condition
-  EmitPrefixAndOpcode(entry, reg1, NO_REG, reg2);
-  reg1 = LowRegisterBits(reg1);
-  reg2 = LowRegisterBits(reg2);
+void X86Mir2Lir::EmitRegRegCond(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2,
+                                int32_t cc) {
+  // Generate prefix and opcode without the condition.
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2, false);
 
   // Now add the condition. The last byte of opcode is the one that receives it.
-  DCHECK_LE(condition, 0xF);
-  code_buffer_.back() += condition;
+  DCHECK_GE(cc, 0);
+  DCHECK_LT(cc, 16);
+  code_buffer_.back() += cc;
 
-  // Not expecting to have to encode immediate or do anything special for ModR/M since there are two registers.
+  // Not expecting to have to encode immediate or do anything special for ModR/M since there are
+  // two registers.
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
 
-  // Check that registers requested for encoding are sane.
-  DCHECK_LT(RegStorage::RegNum(reg1), 8);
-  DCHECK_LT(RegStorage::RegNum(reg2), 8);
-
   // For register to register encoding, the mod is 3.
   const uint8_t mod = (3 << 6);
 
   // Encode the ModR/M byte now.
-  const uint8_t modrm = mod | (RegStorage::RegNum(reg1) << 3) | RegStorage::RegNum(reg2);
+  uint8_t low_reg1 = LowRegisterBits(raw_reg1);
+  uint8_t low_reg2 = LowRegisterBits(raw_reg2);
+  const uint8_t modrm = mod | (low_reg1 << 3) | low_reg2;
   code_buffer_.push_back(modrm);
 }
 
-void X86Mir2Lir::EmitRegMemCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t base,
-                                int displacement, uint8_t condition) {
-  // Generate prefix and opcode without the condition
-  EmitPrefixAndOpcode(entry, reg1, NO_REG, base);
-  reg1 = LowRegisterBits(reg1);
-  base = LowRegisterBits(base);
+void X86Mir2Lir::EmitRegMemCond(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_base,
+                                int32_t disp, int32_t cc) {
+  // Generate prefix and opcode without the condition.
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_base, false);
 
   // Now add the condition. The last byte of opcode is the one that receives it.
-  DCHECK_LE(condition, 0xF);
-  code_buffer_.back() += condition;
+  DCHECK_GE(cc, 0);
+  DCHECK_LT(cc, 16);
+  code_buffer_.back() += cc;
 
+  // Not expecting to have to encode immediate or do anything special for ModR/M since there are
+  // two registers.
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
 
-  // Check that registers requested for encoding are sane.
-  DCHECK_LT(reg1, 8);
-  DCHECK_LT(base, 8);
-
-  EmitModrmDisp(reg1, base, displacement);
+  uint8_t low_reg1 = LowRegisterBits(raw_reg1);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmDisp(low_reg1, low_base, disp);
 }
 
-void X86Mir2Lir::EmitJmp(const X86EncodingMap* entry, int rel) {
+void X86Mir2Lir::EmitJmp(const X86EncodingMap* entry, int32_t rel) {
   if (entry->opcode == kX86Jmp8) {
     DCHECK(IS_SIMM8(rel));
     code_buffer_.push_back(0xEB);
@@ -1314,17 +1375,17 @@
     code_buffer_.push_back(rel & 0xFF);
   } else {
     DCHECK(entry->opcode == kX86JmpR);
-    uint8_t reg = static_cast<uint8_t>(rel);
-    EmitPrefix(entry, NO_REG, NO_REG, reg);
+    DCHECK_EQ(false, entry->skeleton.r8_form);
+    EmitPrefix(entry, NO_REG, NO_REG, rel, false);
     code_buffer_.push_back(entry->skeleton.opcode);
-    reg = LowRegisterBits(reg);
-    DCHECK_LT(RegStorage::RegNum(reg), 8);
-    uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
+    uint8_t low_reg = LowRegisterBits(rel);
+    uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
     code_buffer_.push_back(modrm);
   }
 }
 
-void X86Mir2Lir::EmitJcc(const X86EncodingMap* entry, int rel, uint8_t cc) {
+void X86Mir2Lir::EmitJcc(const X86EncodingMap* entry, int32_t rel, int32_t cc) {
+  DCHECK_GE(cc, 0);
   DCHECK_LT(cc, 16);
   if (entry->opcode == kX86Jcc8) {
     DCHECK(IS_SIMM8(rel));
@@ -1341,16 +1402,18 @@
   }
 }
 
-void X86Mir2Lir::EmitCallMem(const X86EncodingMap* entry, uint8_t base, int disp) {
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, base);
-  base = LowRegisterBits(base);
-  EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp);
+void X86Mir2Lir::EmitCallMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base, false);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitCallImmediate(const X86EncodingMap* entry, int disp) {
-  EmitPrefixAndOpcode(entry);
+void X86Mir2Lir::EmitCallImmediate(const X86EncodingMap* entry, int32_t disp) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false);
   DCHECK_EQ(4, entry->skeleton.immediate_bytes);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
@@ -1359,9 +1422,10 @@
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
 }
 
-void X86Mir2Lir::EmitCallThread(const X86EncodingMap* entry, int disp) {
+void X86Mir2Lir::EmitCallThread(const X86EncodingMap* entry, int32_t disp) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
   DCHECK_NE(entry->skeleton.prefix1, 0);
-  EmitPrefixAndOpcode(entry);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false);
   EmitModrmThread(entry->skeleton.modrm_opcode);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
@@ -1371,8 +1435,8 @@
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, uint8_t reg,
-                      int base_or_table, uint8_t index, int scale, int table_or_disp) {
+void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base_or_table,
+                           int32_t raw_index, int scale, int32_t table_or_disp) {
   int disp;
   if (entry->opcode == kX86PcRelLoadRA) {
     Mir2Lir::EmbeddedData *tab_rec =
@@ -1381,31 +1445,28 @@
   } else {
     DCHECK(entry->opcode == kX86PcRelAdr);
     Mir2Lir::EmbeddedData *tab_rec =
-        reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(base_or_table));
+        reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(raw_base_or_table));
     disp = tab_rec->offset;
   }
   if (entry->opcode == kX86PcRelLoadRA) {
-    EmitPrefix(entry, reg, index, base_or_table);
-    reg = LowRegisterBits(reg);
-    base_or_table = LowRegisterBits(base_or_table);
-    index = LowRegisterBits(index);
-    DCHECK_LT(RegStorage::RegNum(reg), 8);
+    DCHECK_EQ(false, entry->skeleton.r8_form);
+    EmitPrefix(entry, raw_reg, raw_index, raw_base_or_table, false);
     code_buffer_.push_back(entry->skeleton.opcode);
     DCHECK_NE(0x0F, entry->skeleton.opcode);
     DCHECK_EQ(0, entry->skeleton.extra_opcode1);
     DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-    uint8_t modrm = (2 << 6) | (RegStorage::RegNum(reg) << 3) | rs_rX86_SP.GetRegNum();
+    uint8_t low_reg = LowRegisterBits(raw_reg);
+    uint8_t modrm = (2 << 6) | (low_reg << 3) | rs_rX86_SP.GetRegNum();
     code_buffer_.push_back(modrm);
     DCHECK_LT(scale, 4);
-    DCHECK_LT(RegStorage::RegNum(index), 8);
-    DCHECK_LT(RegStorage::RegNum(base_or_table), 8);
-    uint8_t base = static_cast<uint8_t>(base_or_table);
-    uint8_t sib = (scale << 6) | (RegStorage::RegNum(index) << 3) | RegStorage::RegNum(base);
+    uint8_t low_base_or_table = LowRegisterBits(raw_base_or_table);
+    uint8_t low_index = LowRegisterBits(raw_index);
+    uint8_t sib = (scale << 6) | (low_index << 3) | low_base_or_table;
     code_buffer_.push_back(sib);
     DCHECK_EQ(0, entry->skeleton.immediate_bytes);
   } else {
-    DCHECK_LT(RegStorage::RegNum(reg), 8);
-    code_buffer_.push_back(entry->skeleton.opcode + RegStorage::RegNum(reg));
+    uint8_t low_reg = LowRegisterBits(raw_reg);
+    code_buffer_.push_back(entry->skeleton.opcode + low_reg);
   }
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
@@ -1415,21 +1476,21 @@
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
 }
 
-void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, uint8_t reg, int offset) {
-  DCHECK(entry->opcode == kX86StartOfMethod) << entry->name;
-  EmitPrefix(entry, reg, NO_REG, NO_REG);
-  reg = LowRegisterBits(reg);
+void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t offset) {
+  DCHECK_EQ(entry->opcode, kX86StartOfMethod) << entry->name;
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefix(entry, raw_reg, NO_REG, NO_REG, false);
   code_buffer_.push_back(0xE8);  // call +0
   code_buffer_.push_back(0);
   code_buffer_.push_back(0);
   code_buffer_.push_back(0);
   code_buffer_.push_back(0);
 
-  DCHECK_LT(RegStorage::RegNum(reg), 8);
-  code_buffer_.push_back(0x58 + RegStorage::RegNum(reg));  // pop reg
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  code_buffer_.push_back(0x58 + low_reg);  // pop reg
 
   EmitRegImm(&X86Mir2Lir::EncodingMap[Gen64Bit() ? kX86Sub64RI : kX86Sub32RI],
-             RegStorage::RegNum(reg), offset + 5 /* size of call +0 */);
+             raw_reg, offset + 5 /* size of call +0 */);
 }
 
 void X86Mir2Lir::EmitUnimplemented(const X86EncodingMap* entry, LIR* lir) {
@@ -1590,21 +1651,8 @@
       case kData:  // 4 bytes of data
         code_buffer_.push_back(lir->operands[0]);
         break;
-      case kNullary:  // 1 byte of opcode
-        DCHECK_EQ(0, entry->skeleton.prefix1);
-        DCHECK_EQ(0, entry->skeleton.prefix2);
-        EmitOpcode(entry);
-        DCHECK_EQ(0, entry->skeleton.modrm_opcode);
-        DCHECK_EQ(0, entry->skeleton.ax_opcode);
-        DCHECK_EQ(0, entry->skeleton.immediate_bytes);
-        break;
-      case kPrefix2Nullary:  // 1 byte of opcode + 2 prefixes.
-        DCHECK_NE(0, entry->skeleton.prefix1);
-        DCHECK_NE(0, entry->skeleton.prefix2);
-        EmitPrefixAndOpcode(entry);
-        DCHECK_EQ(0, entry->skeleton.modrm_opcode);
-        DCHECK_EQ(0, entry->skeleton.ax_opcode);
-        DCHECK_EQ(0, entry->skeleton.immediate_bytes);
+      case kNullary:  // 1 byte of opcode and possible prefixes.
+        EmitNullary(entry);
         break;
       case kRegOpcode:  // lir operands - 0: reg
         EmitOpRegOpcode(entry, lir->operands[0]);
@@ -1648,17 +1696,17 @@
       case kRegRegStore:  // lir operands - 0: reg2, 1: reg1
         EmitRegReg(entry, lir->operands[1], lir->operands[0]);
         break;
-      case kRegRegImmRev:
-        EmitRegRegImmRev(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
-        break;
-      case kMemRegImm:
+      case kMemRegImm:  // lir operands - 0: base, 1: disp, 2: reg 3: immediate
         EmitMemRegImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                       lir->operands[3]);
         break;
-      case kRegRegImm:
+      case kRegRegImm:  // lir operands - 0: reg1, 1: reg2, 2: imm
         EmitRegRegImm(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
         break;
-      case kRegMemImm:
+      case kRegRegImmStore:   // lir operands - 0: reg2, 1: reg1, 2: imm
+        EmitRegRegImm(entry, lir->operands[1], lir->operands[0], lir->operands[2]);
+        break;
+      case kRegMemImm:  // lir operands - 0: reg, 1: base, 2: disp, 3: imm
         EmitRegMemImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                       lir->operands[3]);
         break;
@@ -1731,7 +1779,13 @@
       case kMacro:  // lir operands - 0: reg
         EmitMacro(entry, lir->operands[0], lir->offset);
         break;
-      default:
+      case kNop:  // TODO: these instruction kinds are missing implementations.
+      case kThreadReg:
+      case kRegArrayImm:
+      case kShiftArrayImm:
+      case kShiftArrayCl:
+      case kArrayCond:
+      case kUnimplemented:
         EmitUnimplemented(entry, lir);
         break;
     }
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 38d60d2..61c9f4f 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -25,834 +25,822 @@
 namespace art {
 
 class X86Mir2Lir : public Mir2Lir {
-  protected:
-    class InToRegStorageMapper {
-      public:
-        virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide) = 0;
-        virtual ~InToRegStorageMapper() {}
-    };
+ protected:
+  class InToRegStorageMapper {
+   public:
+    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide) = 0;
+    virtual ~InToRegStorageMapper() {}
+  };
 
-    class InToRegStorageX86_64Mapper : public InToRegStorageMapper {
-      public:
-        InToRegStorageX86_64Mapper() : cur_core_reg_(0), cur_fp_reg_(0) {}
-        virtual ~InToRegStorageX86_64Mapper() {}
-        virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide);
-      private:
-        int cur_core_reg_;
-        int cur_fp_reg_;
-    };
+  class InToRegStorageX86_64Mapper : public InToRegStorageMapper {
+   public:
+    InToRegStorageX86_64Mapper() : cur_core_reg_(0), cur_fp_reg_(0) {}
+    virtual ~InToRegStorageX86_64Mapper() {}
+    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide);
+   private:
+    int cur_core_reg_;
+    int cur_fp_reg_;
+  };
 
-    class InToRegStorageMapping {
-      public:
-        InToRegStorageMapping() : initialized_(false) {}
-        void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper);
-        int GetMaxMappedIn() { return max_mapped_in_; }
-        bool IsThereStackMapped() { return is_there_stack_mapped_; }
-        RegStorage Get(int in_position);
-        bool IsInitialized() { return initialized_; }
-      private:
-        std::map<int, RegStorage> mapping_;
-        int max_mapped_in_;
-        bool is_there_stack_mapped_;
-        bool initialized_;
-    };
+  class InToRegStorageMapping {
+   public:
+    InToRegStorageMapping() : max_mapped_in_(0), is_there_stack_mapped_(false),
+    initialized_(false) {}
+    void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper);
+    int GetMaxMappedIn() { return max_mapped_in_; }
+    bool IsThereStackMapped() { return is_there_stack_mapped_; }
+    RegStorage Get(int in_position);
+    bool IsInitialized() { return initialized_; }
+   private:
+    std::map<int, RegStorage> mapping_;
+    int max_mapped_in_;
+    bool is_there_stack_mapped_;
+    bool initialized_;
+  };
 
-  public:
-    X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit);
+ public:
+  X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit);
 
-    // Required for target - codegen helpers.
-    bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
-                            RegLocation rl_dest, int lit);
-    bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
-    LIR* CheckSuspendUsingLoad() OVERRIDE;
-    RegStorage LoadHelper(ThreadOffset<4> offset) OVERRIDE;
-    RegStorage LoadHelper(ThreadOffset<8> offset) OVERRIDE;
-    LIR* LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest,
-                              OpSize size) OVERRIDE;
-    LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
-                      OpSize size) OVERRIDE;
-    LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
-                         OpSize size) OVERRIDE;
-    LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
-                             RegStorage r_dest, OpSize size) OVERRIDE;
-    LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
-    LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
-    LIR* StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_src,
-                               OpSize size) OVERRIDE;
-    LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
+  // Required for target - codegen helpers.
+  bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
+                          RegLocation rl_dest, int lit);
+  bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
+  LIR* CheckSuspendUsingLoad() OVERRIDE;
+  RegStorage LoadHelper(ThreadOffset<4> offset) OVERRIDE;
+  RegStorage LoadHelper(ThreadOffset<8> offset) OVERRIDE;
+  LIR* LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest,
+                            OpSize size) OVERRIDE;
+  LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+                    OpSize size) OVERRIDE;
+  LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
                        OpSize size) OVERRIDE;
-    LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
-                          OpSize size) OVERRIDE;
-    LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
-                              RegStorage r_src, OpSize size) OVERRIDE;
-    void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
+  LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
+                           RegStorage r_dest, OpSize size) OVERRIDE;
+  LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
+  LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
+  LIR* StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_src,
+                             OpSize size) OVERRIDE;
+  LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
+                     OpSize size) OVERRIDE;
+  LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
+                        OpSize size) OVERRIDE;
+  LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
+                            RegStorage r_src, OpSize size) OVERRIDE;
+  void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
 
-    // Required for target - register utilities.
-    RegStorage TargetReg(SpecialTargetRegister reg);
-    RegStorage GetArgMappingToPhysicalReg(int arg_num);
-    RegStorage GetCoreArgMappingToPhysicalReg(int core_arg_num);
-    RegLocation GetReturnAlt();
-    RegLocation GetReturnWideAlt();
-    RegLocation LocCReturn();
-    RegLocation LocCReturnRef();
-    RegLocation LocCReturnDouble();
-    RegLocation LocCReturnFloat();
-    RegLocation LocCReturnWide();
-    uint64_t GetRegMaskCommon(RegStorage reg);
-    void AdjustSpillMask();
-    void ClobberCallerSave();
-    void FreeCallTemps();
-    void LockCallTemps();
-    void MarkPreservedSingle(int v_reg, RegStorage reg);
-    void MarkPreservedDouble(int v_reg, RegStorage reg);
-    void CompilerInitializeRegAlloc();
+  // Required for target - register utilities.
+  RegStorage TargetReg(SpecialTargetRegister reg);
+  RegStorage GetArgMappingToPhysicalReg(int arg_num);
+  RegStorage GetCoreArgMappingToPhysicalReg(int core_arg_num);
+  RegLocation GetReturnAlt();
+  RegLocation GetReturnWideAlt();
+  RegLocation LocCReturn();
+  RegLocation LocCReturnRef();
+  RegLocation LocCReturnDouble();
+  RegLocation LocCReturnFloat();
+  RegLocation LocCReturnWide();
+  uint64_t GetRegMaskCommon(RegStorage reg);
+  void AdjustSpillMask();
+  void ClobberCallerSave();
+  void FreeCallTemps();
+  void LockCallTemps();
+  void MarkPreservedSingle(int v_reg, RegStorage reg);
+  void MarkPreservedDouble(int v_reg, RegStorage reg);
+  void CompilerInitializeRegAlloc();
 
-    // Required for target - miscellaneous.
-    void AssembleLIR();
-    int AssignInsnOffsets();
-    void AssignOffsets();
-    AssemblerStatus AssembleInstructions(CodeOffset start_addr);
-    void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix);
-    void SetupTargetResourceMasks(LIR* lir, uint64_t flags);
-    const char* GetTargetInstFmt(int opcode);
-    const char* GetTargetInstName(int opcode);
-    std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
-    uint64_t GetPCUseDefEncoding();
-    uint64_t GetTargetInstFlags(int opcode);
-    int GetInsnSize(LIR* lir);
-    bool IsUnconditionalBranch(LIR* lir);
+  // Required for target - miscellaneous.
+  void AssembleLIR();
+  int AssignInsnOffsets();
+  void AssignOffsets();
+  AssemblerStatus AssembleInstructions(CodeOffset start_addr);
+  void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix);
+  void SetupTargetResourceMasks(LIR* lir, uint64_t flags);
+  const char* GetTargetInstFmt(int opcode);
+  const char* GetTargetInstName(int opcode);
+  std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
+  uint64_t GetPCUseDefEncoding();
+  uint64_t GetTargetInstFlags(int opcode);
+  int GetInsnSize(LIR* lir);
+  bool IsUnconditionalBranch(LIR* lir);
 
-    // Check support for volatile load/store of a given size.
-    bool SupportsVolatileLoadStore(OpSize size) OVERRIDE;
-    // Get the register class for load/store of a field.
-    RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE;
+  // Check support for volatile load/store of a given size.
+  bool SupportsVolatileLoadStore(OpSize size) OVERRIDE;
+  // Get the register class for load/store of a field.
+  RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE;
 
-    // Required for target - Dalvik-level generators.
-    void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                           RegLocation rl_src2);
-    void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index,
-                     RegLocation rl_dest, int scale);
-    void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                     RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark);
-    void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                           RegLocation rl_src1, RegLocation rl_shift);
-    void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                    RegLocation rl_src2);
-    void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                    RegLocation rl_src2);
-    void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                    RegLocation rl_src2);
-    void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                          RegLocation rl_src2);
-    void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+  // Required for target - Dalvik-level generators.
+  void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                          RegLocation rl_src2);
-    void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+  void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index,
+                   RegLocation rl_dest, int scale);
+  void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
+                   RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark);
+  void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                         RegLocation rl_src1, RegLocation rl_shift);
+  void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                   RegLocation rl_src2);
-    void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
-    bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
-    bool GenInlinedMinMaxInt(CallInfo* info, bool is_min);
-    bool GenInlinedSqrt(CallInfo* info);
-    bool GenInlinedPeek(CallInfo* info, OpSize size);
-    bool GenInlinedPoke(CallInfo* info, OpSize size);
-    void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
-    void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
-    void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                   RegLocation rl_src2);
-    void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                    RegLocation rl_src2);
-    void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                    RegLocation rl_src2);
-    void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
-                       RegLocation rl_src2, bool is_div);
-    // TODO: collapse reg_lo, reg_hi
-    RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div);
-    RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div);
-    void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenDivZeroCheckWide(RegStorage reg);
-    void GenArrayBoundsCheck(RegStorage index, RegStorage array_base, int32_t len_offset);
-    void GenArrayBoundsCheck(int32_t index, RegStorage array_base, int32_t len_offset);
-    void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method);
-    void GenExitSequence();
-    void GenSpecialExitSequence();
-    void GenFillArrayData(DexOffset table_offset, RegLocation rl_src);
-    void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
-    void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
-    void GenSelect(BasicBlock* bb, MIR* mir);
-    bool GenMemBarrier(MemBarrierKind barrier_kind);
-    void GenMoveException(RegLocation rl_dest);
-    void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
-                                       int first_bit, int second_bit);
-    void GenNegDouble(RegLocation rl_dest, RegLocation rl_src);
-    void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
-    void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
-    void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
-    void GenIntToLong(RegLocation rl_dest, RegLocation rl_src);
+  void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                  RegLocation rl_src2);
+  void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                  RegLocation rl_src2);
+  void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                        RegLocation rl_src2);
+  void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                       RegLocation rl_src2);
+  void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                RegLocation rl_src2);
+  void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
+  bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
+  bool GenInlinedMinMaxInt(CallInfo* info, bool is_min);
+  bool GenInlinedSqrt(CallInfo* info);
+  bool GenInlinedPeek(CallInfo* info, OpSize size);
+  bool GenInlinedPoke(CallInfo* info, OpSize size);
+  void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
+  void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
+  void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                 RegLocation rl_src2);
+  void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                  RegLocation rl_src2);
+  void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                  RegLocation rl_src2);
+  void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
+                     RegLocation rl_src2, bool is_div);
+  // TODO: collapse reg_lo, reg_hi
+  RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div);
+  RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div);
+  void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+  void GenDivZeroCheckWide(RegStorage reg);
+  void GenArrayBoundsCheck(RegStorage index, RegStorage array_base, int32_t len_offset);
+  void GenArrayBoundsCheck(int32_t index, RegStorage array_base, int32_t len_offset);
+  void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method);
+  void GenExitSequence();
+  void GenSpecialExitSequence();
+  void GenFillArrayData(DexOffset table_offset, RegLocation rl_src);
+  void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
+  void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
+  void GenSelect(BasicBlock* bb, MIR* mir);
+  bool GenMemBarrier(MemBarrierKind barrier_kind);
+  void GenMoveException(RegLocation rl_dest);
+  void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
+                                     int first_bit, int second_bit);
+  void GenNegDouble(RegLocation rl_dest, RegLocation rl_src);
+  void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
+  void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+  void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+  void GenIntToLong(RegLocation rl_dest, RegLocation rl_src);
 
-    /*
-     * @brief Generate a two address long operation with a constant value
-     * @param rl_dest location of result
-     * @param rl_src constant source operand
-     * @param op Opcode to be generated
-     * @return success or not
-     */
-    bool GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
-    /*
-     * @brief Generate a three address long operation with a constant value
-     * @param rl_dest location of result
-     * @param rl_src1 source operand
-     * @param rl_src2 constant source operand
-     * @param op Opcode to be generated
-     * @return success or not
-     */
-    bool GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                        Instruction::Code op);
+  /*
+   * @brief Generate a two address long operation with a constant value
+   * @param rl_dest location of result
+   * @param rl_src constant source operand
+   * @param op Opcode to be generated
+   * @return success or not
+   */
+  bool GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+  /*
+   * @brief Generate a three address long operation with a constant value
+   * @param rl_dest location of result
+   * @param rl_src1 source operand
+   * @param rl_src2 constant source operand
+   * @param op Opcode to be generated
+   * @return success or not
+   */
+  bool GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+                      Instruction::Code op);
 
-    /**
-      * @brief Generate a long arithmetic operation.
-      * @param rl_dest The destination.
-      * @param rl_src1 First operand.
-      * @param rl_src2 Second operand.
-      * @param op The DEX opcode for the operation.
-      * @param is_commutative The sources can be swapped if needed.
-      */
-    virtual void GenLongArith(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                              Instruction::Code op, bool is_commutative);
+  /**
+   * @brief Generate a long arithmetic operation.
+   * @param rl_dest The destination.
+   * @param rl_src1 First operand.
+   * @param rl_src2 Second operand.
+   * @param op The DEX opcode for the operation.
+   * @param is_commutative The sources can be swapped if needed.
+   */
+  virtual void GenLongArith(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+                            Instruction::Code op, bool is_commutative);
 
-    /**
-      * @brief Generate a two operand long arithmetic operation.
-      * @param rl_dest The destination.
-      * @param rl_src Second operand.
-      * @param op The DEX opcode for the operation.
-      */
-    void GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+  /**
+   * @brief Generate a two operand long arithmetic operation.
+   * @param rl_dest The destination.
+   * @param rl_src Second operand.
+   * @param op The DEX opcode for the operation.
+   */
+  void GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
 
-    /**
-      * @brief Generate a long operation.
-      * @param rl_dest The destination.  Must be in a register
-      * @param rl_src The other operand.  May be in a register or in memory.
-      * @param op The DEX opcode for the operation.
-      */
-    virtual void GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+  /**
+   * @brief Generate a long operation.
+   * @param rl_dest The destination.  Must be in a register
+   * @param rl_src The other operand.  May be in a register or in memory.
+   * @param op The DEX opcode for the operation.
+   */
+  virtual void GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
 
-    /**
-     * @brief Implement instanceof a final class with x86 specific code.
-     * @param use_declaring_class 'true' if we can use the class itself.
-     * @param type_idx Type index to use if use_declaring_class is 'false'.
-     * @param rl_dest Result to be set to 0 or 1.
-     * @param rl_src Object to be tested.
-     */
-    void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest,
-                            RegLocation rl_src);
-    /*
-     *
-     * @brief Implement Set up instanceof a class with x86 specific code.
-     * @param needs_access_check 'true' if we must check the access.
-     * @param type_known_final 'true' if the type is known to be a final class.
-     * @param type_known_abstract 'true' if the type is known to be an abstract class.
-     * @param use_declaring_class 'true' if the type can be loaded off the current Method*.
-     * @param can_assume_type_is_in_dex_cache 'true' if the type is known to be in the cache.
-     * @param type_idx Type index to use if use_declaring_class is 'false'.
-     * @param rl_dest Result to be set to 0 or 1.
-     * @param rl_src Object to be tested.
-     */
-    void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
-                                    bool type_known_abstract, bool use_declaring_class,
-                                    bool can_assume_type_is_in_dex_cache,
-                                    uint32_t type_idx, RegLocation rl_dest, RegLocation rl_src);
+  /**
+   * @brief Implement instanceof a final class with x86 specific code.
+   * @param use_declaring_class 'true' if we can use the class itself.
+   * @param type_idx Type index to use if use_declaring_class is 'false'.
+   * @param rl_dest Result to be set to 0 or 1.
+   * @param rl_src Object to be tested.
+   */
+  void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest,
+                          RegLocation rl_src);
+  /*
+   *
+   * @brief Implement Set up instanceof a class with x86 specific code.
+   * @param needs_access_check 'true' if we must check the access.
+   * @param type_known_final 'true' if the type is known to be a final class.
+   * @param type_known_abstract 'true' if the type is known to be an abstract class.
+   * @param use_declaring_class 'true' if the type can be loaded off the current Method*.
+   * @param can_assume_type_is_in_dex_cache 'true' if the type is known to be in the cache.
+   * @param type_idx Type index to use if use_declaring_class is 'false'.
+   * @param rl_dest Result to be set to 0 or 1.
+   * @param rl_src Object to be tested.
+   */
+  void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
+                                  bool type_known_abstract, bool use_declaring_class,
+                                  bool can_assume_type_is_in_dex_cache,
+                                  uint32_t type_idx, RegLocation rl_dest, RegLocation rl_src);
 
-    void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                        RegLocation rl_src1, RegLocation rl_shift);
+  void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                      RegLocation rl_src1, RegLocation rl_shift);
 
-    // Single operation generators.
-    LIR* OpUnconditionalBranch(LIR* target);
-    LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target);
-    LIR* OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target);
-    LIR* OpCondBranch(ConditionCode cc, LIR* target);
-    LIR* OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target);
-    LIR* OpFpRegCopy(RegStorage r_dest, RegStorage r_src);
-    LIR* OpIT(ConditionCode cond, const char* guide);
-    void OpEndIT(LIR* it);
-    LIR* OpMem(OpKind op, RegStorage r_base, int disp);
-    LIR* OpPcRelLoad(RegStorage reg, LIR* target);
-    LIR* OpReg(OpKind op, RegStorage r_dest_src);
-    void OpRegCopy(RegStorage r_dest, RegStorage r_src);
-    LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
-    LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value);
-    LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset);
-    LIR* OpRegMem(OpKind op, RegStorage r_dest, RegLocation value);
-    LIR* OpMemReg(OpKind op, RegLocation rl_dest, int value);
-    LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2);
-    LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type);
-    LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type);
-    LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src);
-    LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value);
-    LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2);
-    LIR* OpTestSuspend(LIR* target);
-    LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) OVERRIDE;
-    LIR* OpThreadMem(OpKind op, ThreadOffset<8> thread_offset) OVERRIDE;
-    LIR* OpVldm(RegStorage r_base, int count);
-    LIR* OpVstm(RegStorage r_base, int count);
-    void OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset);
-    void OpRegCopyWide(RegStorage dest, RegStorage src);
-    void OpTlsCmp(ThreadOffset<4> offset, int val) OVERRIDE;
-    void OpTlsCmp(ThreadOffset<8> offset, int val) OVERRIDE;
+  // Single operation generators.
+  LIR* OpUnconditionalBranch(LIR* target);
+  LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target);
+  LIR* OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target);
+  LIR* OpCondBranch(ConditionCode cc, LIR* target);
+  LIR* OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target);
+  LIR* OpFpRegCopy(RegStorage r_dest, RegStorage r_src);
+  LIR* OpIT(ConditionCode cond, const char* guide);
+  void OpEndIT(LIR* it);
+  LIR* OpMem(OpKind op, RegStorage r_base, int disp);
+  LIR* OpPcRelLoad(RegStorage reg, LIR* target);
+  LIR* OpReg(OpKind op, RegStorage r_dest_src);
+  void OpRegCopy(RegStorage r_dest, RegStorage r_src);
+  LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
+  LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value);
+  LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset);
+  LIR* OpRegMem(OpKind op, RegStorage r_dest, RegLocation value);
+  LIR* OpMemReg(OpKind op, RegLocation rl_dest, int value);
+  LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2);
+  LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type);
+  LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type);
+  LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src);
+  LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value);
+  LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2);
+  LIR* OpTestSuspend(LIR* target);
+  LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) OVERRIDE;
+  LIR* OpThreadMem(OpKind op, ThreadOffset<8> thread_offset) OVERRIDE;
+  LIR* OpVldm(RegStorage r_base, int count);
+  LIR* OpVstm(RegStorage r_base, int count);
+  void OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset);
+  void OpRegCopyWide(RegStorage dest, RegStorage src);
+  void OpTlsCmp(ThreadOffset<4> offset, int val) OVERRIDE;
+  void OpTlsCmp(ThreadOffset<8> offset, int val) OVERRIDE;
 
-    void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset);
-    void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset);
-    void SpillCoreRegs();
-    void UnSpillCoreRegs();
-    static const X86EncodingMap EncodingMap[kX86Last];
-    bool InexpensiveConstantInt(int32_t value);
-    bool InexpensiveConstantFloat(int32_t value);
-    bool InexpensiveConstantLong(int64_t value);
-    bool InexpensiveConstantDouble(int64_t value);
+  void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset);
+  void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset);
+  void SpillCoreRegs();
+  void UnSpillCoreRegs();
+  static const X86EncodingMap EncodingMap[kX86Last];
+  bool InexpensiveConstantInt(int32_t value);
+  bool InexpensiveConstantFloat(int32_t value);
+  bool InexpensiveConstantLong(int64_t value);
+  bool InexpensiveConstantDouble(int64_t value);
 
-    /*
-     * @brief Should try to optimize for two address instructions?
-     * @return true if we try to avoid generating three operand instructions.
-     */
-    virtual bool GenerateTwoOperandInstructions() const { return true; }
+  /*
+   * @brief Should try to optimize for two address instructions?
+   * @return true if we try to avoid generating three operand instructions.
+   */
+  virtual bool GenerateTwoOperandInstructions() const { return true; }
 
-    /*
-     * @brief x86 specific codegen for int operations.
-     * @param opcode Operation to perform.
-     * @param rl_dest Destination for the result.
-     * @param rl_lhs Left hand operand.
-     * @param rl_rhs Right hand operand.
-     */
-    void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_lhs,
-                       RegLocation rl_rhs);
+  /*
+   * @brief x86 specific codegen for int operations.
+   * @param opcode Operation to perform.
+   * @param rl_dest Destination for the result.
+   * @param rl_lhs Left hand operand.
+   * @param rl_rhs Right hand operand.
+   */
+  void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_lhs,
+                     RegLocation rl_rhs);
 
-    /*
-     * @brief Dump a RegLocation using printf
-     * @param loc Register location to dump
-     */
-    static void DumpRegLocation(RegLocation loc);
+  /*
+   * @brief Dump a RegLocation using printf
+   * @param loc Register location to dump
+   */
+  static void DumpRegLocation(RegLocation loc);
 
-    /*
-     * @brief Load the Method* of a dex method into the register.
-     * @param target_method The MethodReference of the method to be invoked.
-     * @param type How the method will be invoked.
-     * @param register that will contain the code address.
-     * @note register will be passed to TargetReg to get physical register.
-     */
-    void LoadMethodAddress(const MethodReference& target_method, InvokeType type,
-                           SpecialTargetRegister symbolic_reg);
+  /*
+   * @brief Load the Method* of a dex method into the register.
+   * @param target_method The MethodReference of the method to be invoked.
+   * @param type How the method will be invoked.
+   * @param register that will contain the code address.
+   * @note register will be passed to TargetReg to get physical register.
+   */
+  void LoadMethodAddress(const MethodReference& target_method, InvokeType type,
+                         SpecialTargetRegister symbolic_reg);
 
-    /*
-     * @brief Load the Class* of a Dex Class type into the register.
-     * @param type How the method will be invoked.
-     * @param register that will contain the code address.
-     * @note register will be passed to TargetReg to get physical register.
-     */
-    void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg);
+  /*
+   * @brief Load the Class* of a Dex Class type into the register.
+   * @param type How the method will be invoked.
+   * @param register that will contain the code address.
+   * @note register will be passed to TargetReg to get physical register.
+   */
+  void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg);
 
-    void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
+  void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
 
-    int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                             NextCallInsn next_call_insn,
-                             const MethodReference& target_method,
-                             uint32_t vtable_idx,
-                             uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                             bool skip_this);
-
-    int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
+  int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
                            NextCallInsn next_call_insn,
                            const MethodReference& target_method,
                            uint32_t vtable_idx,
                            uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
                            bool skip_this);
 
-    /*
-     * @brief Generate a relative call to the method that will be patched at link time.
-     * @param target_method The MethodReference of the method to be invoked.
-     * @param type How the method will be invoked.
-     * @returns Call instruction
-     */
-    virtual LIR * CallWithLinkerFixup(const MethodReference& target_method, InvokeType type);
+  int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
+                         NextCallInsn next_call_insn,
+                         const MethodReference& target_method,
+                         uint32_t vtable_idx,
+                         uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+                         bool skip_this);
 
-    /*
-     * @brief Handle x86 specific literals
-     */
-    void InstallLiteralPools();
+  /*
+   * @brief Generate a relative call to the method that will be patched at link time.
+   * @param target_method The MethodReference of the method to be invoked.
+   * @param type How the method will be invoked.
+   * @returns Call instruction
+   */
+  virtual LIR * CallWithLinkerFixup(const MethodReference& target_method, InvokeType type);
 
-    /*
-     * @brief Generate the debug_frame CFI information.
-     * @returns pointer to vector containing CFE information
-     */
-    static std::vector<uint8_t>* ReturnCommonCallFrameInformation();
+  /*
+   * @brief Handle x86 specific literals
+   */
+  void InstallLiteralPools();
 
-    /*
-     * @brief Generate the debug_frame FDE information.
-     * @returns pointer to vector containing CFE information
-     */
-    std::vector<uint8_t>* ReturnCallFrameInformation();
+  /*
+   * @brief Generate the debug_frame CFI information.
+   * @returns pointer to vector containing CFE information
+   */
+  static std::vector<uint8_t>* ReturnCommonCallFrameInformation();
 
-  protected:
-    size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement,
-                       int reg_r, int reg_x, bool has_sib);
-    uint8_t LowRegisterBits(uint8_t reg);
-    bool NeedsRex(uint8_t reg);
-    void EmitPrefix(const X86EncodingMap* entry);
-    void EmitPrefix(const X86EncodingMap* entry, uint8_t reg_r, uint8_t reg_x, uint8_t reg_b);
-    void EmitOpcode(const X86EncodingMap* entry);
-    void EmitPrefixAndOpcode(const X86EncodingMap* entry);
-    void EmitPrefixAndOpcode(const X86EncodingMap* entry,
-                             uint8_t reg_r, uint8_t reg_x, uint8_t reg_b);
-    void EmitDisp(uint8_t base, int disp);
-    void EmitModrmThread(uint8_t reg_or_opcode);
-    void EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int disp);
-    void EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t index, int scale, int disp);
-    void EmitImm(const X86EncodingMap* entry, int64_t imm);
-    void EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg);
-    void EmitOpReg(const X86EncodingMap* entry, uint8_t reg);
-    void EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp);
-    void EmitOpArray(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, int disp);
-    void EmitMemReg(const X86EncodingMap* entry, uint8_t base, int disp, uint8_t reg);
-    void EmitMemImm(const X86EncodingMap* entry, uint8_t base, int disp, int32_t imm);
-    void EmitRegMem(const X86EncodingMap* entry, uint8_t reg, uint8_t base, int disp);
-    void EmitRegArray(const X86EncodingMap* entry, uint8_t reg, uint8_t base, uint8_t index,
-                      int scale, int disp);
-    void EmitArrayReg(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, int disp,
-                      uint8_t reg);
-    void EmitArrayImm(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, int disp,
-                      int32_t imm);
-    void EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp);
-    void EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2);
-    void EmitRegRegImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm);
-    void EmitRegRegImmRev(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm);
-    void EmitRegMemImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, int disp,
-                       int32_t imm);
-    void EmitMemRegImm(const X86EncodingMap* entry, uint8_t base, int disp, uint8_t reg1, int32_t imm);
-    void EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
-    void EmitThreadImm(const X86EncodingMap* entry, int disp, int imm);
-    void EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int64_t imm);
-    void EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
-    void EmitShiftMemImm(const X86EncodingMap* entry, uint8_t base, int disp, int imm);
-    void EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t cl);
-    void EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_t cl);
-    void EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition);
-    void EmitMemCond(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t condition);
+  /*
+   * @brief Generate the debug_frame FDE information.
+   * @returns pointer to vector containing CFE information
+   */
+  std::vector<uint8_t>* ReturnCallFrameInformation();
 
-    /**
-     * @brief Used for encoding conditional register to register operation.
-     * @param entry The entry in the encoding map for the opcode.
-     * @param reg1 The first physical register.
-     * @param reg2 The second physical register.
-     * @param condition The condition code for operation.
-     */
-    void EmitRegRegCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, uint8_t condition);
+ protected:
+  size_t ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_index,
+                     int32_t raw_base, bool has_sib, bool r8_form, bool r8_reg_reg_form,
+                     int32_t displacement);
+  void CheckValidByteRegister(const X86EncodingMap* entry, int32_t raw_reg);
+  void EmitPrefix(const X86EncodingMap* entry,
+                  int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b,
+                  bool r8_form);
+  void EmitOpcode(const X86EncodingMap* entry);
+  void EmitPrefixAndOpcode(const X86EncodingMap* entry,
+                           int32_t reg_r, int32_t reg_x, int32_t reg_b, bool r8_form);
+  void EmitDisp(uint8_t base, int32_t disp);
+  void EmitModrmThread(uint8_t reg_or_opcode);
+  void EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int32_t disp);
+  void EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t index, int scale,
+                        int32_t disp);
+  void EmitImm(const X86EncodingMap* entry, int64_t imm);
+  void EmitNullary(const X86EncodingMap* entry);
+  void EmitOpRegOpcode(const X86EncodingMap* entry, int32_t raw_reg);
+  void EmitOpReg(const X86EncodingMap* entry, int32_t raw_reg);
+  void EmitOpMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp);
+  void EmitOpArray(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index, int scale,
+                   int32_t disp);
+  void EmitMemReg(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t raw_reg);
+  void EmitRegMem(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base, int32_t disp);
+  void EmitRegArray(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base,
+                    int32_t raw_index, int scale, int32_t disp);
+  void EmitArrayReg(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index, int scale,
+                    int32_t disp, int32_t raw_reg);
+  void EmitMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t imm);
+  void EmitArrayImm(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index, int scale,
+                    int32_t raw_disp, int32_t imm);
+  void EmitRegThread(const X86EncodingMap* entry, int32_t raw_reg, int32_t disp);
+  void EmitRegReg(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2);
+  void EmitRegRegImm(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2, int32_t imm);
+  void EmitRegMemImm(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_base, int32_t disp,
+                     int32_t imm);
+  void EmitMemRegImm(const X86EncodingMap* entry, int32_t base, int32_t disp, int32_t raw_reg1,
+                     int32_t imm);
+  void EmitRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm);
+  void EmitThreadImm(const X86EncodingMap* entry, int32_t disp, int32_t imm);
+  void EmitMovRegImm(const X86EncodingMap* entry, int32_t raw_reg, int64_t imm);
+  void EmitShiftRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm);
+  void EmitShiftRegCl(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_cl);
+  void EmitShiftMemCl(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t raw_cl);
+  void EmitShiftMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t imm);
+  void EmitRegCond(const X86EncodingMap* entry, int32_t raw_reg, int32_t cc);
+  void EmitMemCond(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t cc);
+  void EmitRegRegCond(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2, int32_t cc);
+  void EmitRegMemCond(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_base, int32_t disp,
+                      int32_t cc);
 
-    /**
-     * @brief Used for encoding conditional register to memory operation.
-     * @param entry The entry in the encoding map for the opcode.
-     * @param reg1 The first physical register.
-     * @param base The memory base register.
-     * @param displacement The memory displacement.
-     * @param condition The condition code for operation.
-     */
-    void EmitRegMemCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, int displacement, uint8_t condition);
+  void EmitJmp(const X86EncodingMap* entry, int32_t rel);
+  void EmitJcc(const X86EncodingMap* entry, int32_t rel, int32_t cc);
+  void EmitCallMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp);
+  void EmitCallImmediate(const X86EncodingMap* entry, int32_t disp);
+  void EmitCallThread(const X86EncodingMap* entry, int32_t disp);
+  void EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base_or_table,
+                 int32_t raw_index, int scale, int32_t table_or_disp);
+  void EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t offset);
+  void EmitUnimplemented(const X86EncodingMap* entry, LIR* lir);
+  void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
+                                int64_t val, ConditionCode ccode);
+  void GenConstWide(RegLocation rl_dest, int64_t value);
 
-    void EmitJmp(const X86EncodingMap* entry, int rel);
-    void EmitJcc(const X86EncodingMap* entry, int rel, uint8_t cc);
-    void EmitCallMem(const X86EncodingMap* entry, uint8_t base, int disp);
-    void EmitCallImmediate(const X86EncodingMap* entry, int disp);
-    void EmitCallThread(const X86EncodingMap* entry, int disp);
-    void EmitPcRel(const X86EncodingMap* entry, uint8_t reg, int base_or_table, uint8_t index,
-                   int scale, int table_or_disp);
-    void EmitMacro(const X86EncodingMap* entry, uint8_t reg, int offset);
-    void EmitUnimplemented(const X86EncodingMap* entry, LIR* lir);
-    void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
-                                  int64_t val, ConditionCode ccode);
-    void GenConstWide(RegLocation rl_dest, int64_t value);
+  static bool ProvidesFullMemoryBarrier(X86OpCode opcode);
 
-    static bool ProvidesFullMemoryBarrier(X86OpCode opcode);
+  /*
+   * @brief Ensure that a temporary register is byte addressable.
+   * @returns a temporary guarenteed to be byte addressable.
+   */
+  virtual RegStorage AllocateByteRegister();
 
-    /*
-     * @brief Ensure that a temporary register is byte addressable.
-     * @returns a temporary guarenteed to be byte addressable.
-     */
-    virtual RegStorage AllocateByteRegister();
+  /*
+   * @brief generate inline code for fast case of Strng.indexOf.
+   * @param info Call parameters
+   * @param zero_based 'true' if the index into the string is 0.
+   * @returns 'true' if the call was inlined, 'false' if a regular call needs to be
+   * generated.
+   */
+  bool GenInlinedIndexOf(CallInfo* info, bool zero_based);
 
-    /*
-     * @brief generate inline code for fast case of Strng.indexOf.
-     * @param info Call parameters
-     * @param zero_based 'true' if the index into the string is 0.
-     * @returns 'true' if the call was inlined, 'false' if a regular call needs to be
-     * generated.
-     */
-    bool GenInlinedIndexOf(CallInfo* info, bool zero_based);
+  /*
+   * @brief Load 128 bit constant into vector register.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector
+   * @note vA is the TypeSize for the register.
+   * @note vB is the destination XMM register. arg[0..3] are 32 bit constant values.
+   */
+  void GenConst128(BasicBlock* bb, MIR* mir);
 
-    /*
-     * @brief Load 128 bit constant into vector register.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector
-     * @note vA is the TypeSize for the register.
-     * @note vB is the destination XMM register. arg[0..3] are 32 bit constant values.
-     */
-    void GenConst128(BasicBlock* bb, MIR* mir);
+  /*
+   * @brief MIR to move a vectorized register to another.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize
+   * @note vB: destination
+   * @note vC: source
+   */
+  void GenMoveVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief MIR to move a vectorized register to another.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize
-     * @note vB: destination
-     * @note vC: source
-     */
-    void GenMoveVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Packed multiply of units in two vector registers: vB = vB .* @note vC using vA to know the type of the vector.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize
+   * @note vB: destination and source
+   * @note vC: source
+   */
+  void GenMultiplyVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Packed multiply of units in two vector registers: vB = vB .* @note vC using vA to know the type of the vector.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize
-     * @note vB: destination and source
-     * @note vC: source
-     */
-    void GenMultiplyVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Packed addition of units in two vector registers: vB = vB .+ vC using vA to know the type of the vector.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize
+   * @note vB: destination and source
+   * @note vC: source
+   */
+  void GenAddVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Packed addition of units in two vector registers: vB = vB .+ vC using vA to know the type of the vector.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize
-     * @note vB: destination and source
-     * @note vC: source
-     */
-    void GenAddVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Packed subtraction of units in two vector registers: vB = vB .- vC using vA to know the type of the vector.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize
+   * @note vB: destination and source
+   * @note vC: source
+   */
+  void GenSubtractVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Packed subtraction of units in two vector registers: vB = vB .- vC using vA to know the type of the vector.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize
-     * @note vB: destination and source
-     * @note vC: source
-     */
-    void GenSubtractVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Packed shift left of units in two vector registers: vB = vB .<< vC using vA to know the type of the vector.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize
+   * @note vB: destination and source
+   * @note vC: immediate
+   */
+  void GenShiftLeftVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Packed shift left of units in two vector registers: vB = vB .<< vC using vA to know the type of the vector.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize
-     * @note vB: destination and source
-     * @note vC: immediate
-     */
-    void GenShiftLeftVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Packed signed shift right of units in two vector registers: vB = vB .>> vC using vA to know the type of the vector.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize
+   * @note vB: destination and source
+   * @note vC: immediate
+   */
+  void GenSignedShiftRightVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Packed signed shift right of units in two vector registers: vB = vB .>> vC using vA to know the type of the vector.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize
-     * @note vB: destination and source
-     * @note vC: immediate
-     */
-    void GenSignedShiftRightVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Packed unsigned shift right of units in two vector registers: vB = vB .>>> vC using vA to know the type of the vector.
+   * @param bb The basic block in which the MIR is from..
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize
+   * @note vB: destination and source
+   * @note vC: immediate
+   */
+  void GenUnsignedShiftRightVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Packed unsigned shift right of units in two vector registers: vB = vB .>>> vC using vA to know the type of the vector.
-     * @param bb The basic block in which the MIR is from..
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize
-     * @note vB: destination and source
-     * @note vC: immediate
-     */
-    void GenUnsignedShiftRightVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Packed bitwise and of units in two vector registers: vB = vB .& vC using vA to know the type of the vector.
+   * @note vA: TypeSize
+   * @note vB: destination and source
+   * @note vC: source
+   */
+  void GenAndVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Packed bitwise and of units in two vector registers: vB = vB .& vC using vA to know the type of the vector.
-     * @note vA: TypeSize
-     * @note vB: destination and source
-     * @note vC: source
-     */
-    void GenAndVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Packed bitwise or of units in two vector registers: vB = vB .| vC using vA to know the type of the vector.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize
+   * @note vB: destination and source
+   * @note vC: source
+   */
+  void GenOrVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Packed bitwise or of units in two vector registers: vB = vB .| vC using vA to know the type of the vector.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize
-     * @note vB: destination and source
-     * @note vC: source
-     */
-    void GenOrVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Packed bitwise xor of units in two vector registers: vB = vB .^ vC using vA to know the type of the vector.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize
+   * @note vB: destination and source
+   * @note vC: source
+   */
+  void GenXorVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Packed bitwise xor of units in two vector registers: vB = vB .^ vC using vA to know the type of the vector.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize
-     * @note vB: destination and source
-     * @note vC: source
-     */
-    void GenXorVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Reduce a 128-bit packed element into a single VR by taking lower bits
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @details Instruction does a horizontal addition of the packed elements and then adds it to VR.
+   * @note vA: TypeSize
+   * @note vB: destination and source VR (not vector register)
+   * @note vC: source (vector register)
+   */
+  void GenAddReduceVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Reduce a 128-bit packed element into a single VR by taking lower bits
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @details Instruction does a horizontal addition of the packed elements and then adds it to VR.
-     * @note vA: TypeSize
-     * @note vB: destination and source VR (not vector register)
-     * @note vC: source (vector register)
-     */
-    void GenAddReduceVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Extract a packed element into a single VR.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize
+   * @note vB: destination VR (not vector register)
+   * @note vC: source (vector register)
+   * @note arg[0]: The index to use for extraction from vector register (which packed element).
+   */
+  void GenReduceVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Extract a packed element into a single VR.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize
-     * @note vB: destination VR (not vector register)
-     * @note vC: source (vector register)
-     * @note arg[0]: The index to use for extraction from vector register (which packed element).
-     */
-    void GenReduceVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Create a vector value, with all TypeSize values equal to vC
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize.
+   * @note vB: destination vector register.
+   * @note vC: source VR (not vector register).
+   */
+  void GenSetVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Create a vector value, with all TypeSize values equal to vC
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize.
-     * @note vB: destination vector register.
-     * @note vC: source VR (not vector register).
-     */
-    void GenSetVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Generate code for a vector opcode.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is a non-standard opcode.
+   */
+  void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir);
 
-    /*
-     * @brief Generate code for a vector opcode.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is a non-standard opcode.
-     */
-    void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir);
+  /*
+   * @brief Return the correct x86 opcode for the Dex operation
+   * @param op Dex opcode for the operation
+   * @param loc Register location of the operand
+   * @param is_high_op 'true' if this is an operation on the high word
+   * @param value Immediate value for the operation.  Used for byte variants
+   * @returns the correct x86 opcode to perform the operation
+   */
+  X86OpCode GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op, int32_t value);
 
-    /*
-     * @brief Return the correct x86 opcode for the Dex operation
-     * @param op Dex opcode for the operation
-     * @param loc Register location of the operand
-     * @param is_high_op 'true' if this is an operation on the high word
-     * @param value Immediate value for the operation.  Used for byte variants
-     * @returns the correct x86 opcode to perform the operation
-     */
-    X86OpCode GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op, int32_t value);
+  /*
+   * @brief Return the correct x86 opcode for the Dex operation
+   * @param op Dex opcode for the operation
+   * @param dest location of the destination.  May be register or memory.
+   * @param rhs Location for the rhs of the operation.  May be in register or memory.
+   * @param is_high_op 'true' if this is an operation on the high word
+   * @returns the correct x86 opcode to perform the operation
+   * @note at most one location may refer to memory
+   */
+  X86OpCode GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
+                      bool is_high_op);
 
-    /*
-     * @brief Return the correct x86 opcode for the Dex operation
-     * @param op Dex opcode for the operation
-     * @param dest location of the destination.  May be register or memory.
-     * @param rhs Location for the rhs of the operation.  May be in register or memory.
-     * @param is_high_op 'true' if this is an operation on the high word
-     * @returns the correct x86 opcode to perform the operation
-     * @note at most one location may refer to memory
-     */
-    X86OpCode GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
-                        bool is_high_op);
+  /*
+   * @brief Is this operation a no-op for this opcode and value
+   * @param op Dex opcode for the operation
+   * @param value Immediate value for the operation.
+   * @returns 'true' if the operation will have no effect
+   */
+  bool IsNoOp(Instruction::Code op, int32_t value);
 
-    /*
-     * @brief Is this operation a no-op for this opcode and value
-     * @param op Dex opcode for the operation
-     * @param value Immediate value for the operation.
-     * @returns 'true' if the operation will have no effect
-     */
-    bool IsNoOp(Instruction::Code op, int32_t value);
+  /**
+   * @brief Calculate magic number and shift for a given divisor
+   * @param divisor divisor number for calculation
+   * @param magic hold calculated magic number
+   * @param shift hold calculated shift
+   */
+  void CalculateMagicAndShift(int divisor, int& magic, int& shift);
 
-    /**
-     * @brief Calculate magic number and shift for a given divisor
-     * @param divisor divisor number for calculation
-     * @param magic hold calculated magic number
-     * @param shift hold calculated shift
-     */
-    void CalculateMagicAndShift(int divisor, int& magic, int& shift);
+  /*
+   * @brief Generate an integer div or rem operation.
+   * @param rl_dest Destination Location.
+   * @param rl_src1 Numerator Location.
+   * @param rl_src2 Divisor Location.
+   * @param is_div 'true' if this is a division, 'false' for a remainder.
+   * @param check_zero 'true' if an exception should be generated if the divisor is 0.
+   */
+  RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+                        bool is_div, bool check_zero);
 
-    /*
-     * @brief Generate an integer div or rem operation.
-     * @param rl_dest Destination Location.
-     * @param rl_src1 Numerator Location.
-     * @param rl_src2 Divisor Location.
-     * @param is_div 'true' if this is a division, 'false' for a remainder.
-     * @param check_zero 'true' if an exception should be generated if the divisor is 0.
-     */
-    RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                          bool is_div, bool check_zero);
+  /*
+   * @brief Generate an integer div or rem operation by a literal.
+   * @param rl_dest Destination Location.
+   * @param rl_src Numerator Location.
+   * @param lit Divisor.
+   * @param is_div 'true' if this is a division, 'false' for a remainder.
+   */
+  RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, int lit, bool is_div);
 
-    /*
-     * @brief Generate an integer div or rem operation by a literal.
-     * @param rl_dest Destination Location.
-     * @param rl_src Numerator Location.
-     * @param lit Divisor.
-     * @param is_div 'true' if this is a division, 'false' for a remainder.
-     */
-    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, int lit, bool is_div);
+  /*
+   * Generate code to implement long shift operations.
+   * @param opcode The DEX opcode to specify the shift type.
+   * @param rl_dest The destination.
+   * @param rl_src The value to be shifted.
+   * @param shift_amount How much to shift.
+   * @returns the RegLocation of the result.
+   */
+  RegLocation GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                                RegLocation rl_src, int shift_amount);
+  /*
+   * Generate an imul of a register by a constant or a better sequence.
+   * @param dest Destination Register.
+   * @param src Source Register.
+   * @param val Constant multiplier.
+   */
+  void GenImulRegImm(RegStorage dest, RegStorage src, int val);
 
-    /*
-     * Generate code to implement long shift operations.
-     * @param opcode The DEX opcode to specify the shift type.
-     * @param rl_dest The destination.
-     * @param rl_src The value to be shifted.
-     * @param shift_amount How much to shift.
-     * @returns the RegLocation of the result.
-     */
-    RegLocation GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                  RegLocation rl_src, int shift_amount);
-    /*
-     * Generate an imul of a register by a constant or a better sequence.
-     * @param dest Destination Register.
-     * @param src Source Register.
-     * @param val Constant multiplier.
-     */
-    void GenImulRegImm(RegStorage dest, RegStorage src, int val);
+  /*
+   * Generate an imul of a memory location by a constant or a better sequence.
+   * @param dest Destination Register.
+   * @param sreg Symbolic register.
+   * @param displacement Displacement on stack of Symbolic Register.
+   * @param val Constant multiplier.
+   */
+  void GenImulMemImm(RegStorage dest, int sreg, int displacement, int val);
 
-    /*
-     * Generate an imul of a memory location by a constant or a better sequence.
-     * @param dest Destination Register.
-     * @param sreg Symbolic register.
-     * @param displacement Displacement on stack of Symbolic Register.
-     * @param val Constant multiplier.
-     */
-    void GenImulMemImm(RegStorage dest, int sreg, int displacement, int val);
+  /*
+   * @brief Compare memory to immediate, and branch if condition true.
+   * @param cond The condition code that when true will branch to the target.
+   * @param temp_reg A temporary register that can be used if compare memory is not
+   * supported by the architecture.
+   * @param base_reg The register holding the base address.
+   * @param offset The offset from the base.
+   * @param check_value The immediate to compare to.
+   */
+  LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
+                         int offset, int check_value, LIR* target);
 
-    /*
-     * @brief Compare memory to immediate, and branch if condition true.
-     * @param cond The condition code that when true will branch to the target.
-     * @param temp_reg A temporary register that can be used if compare memory is not
-     * supported by the architecture.
-     * @param base_reg The register holding the base address.
-     * @param offset The offset from the base.
-     * @param check_value The immediate to compare to.
-     */
-    LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                           int offset, int check_value, LIR* target);
+  /*
+   * Can this operation be using core registers without temporaries?
+   * @param rl_lhs Left hand operand.
+   * @param rl_rhs Right hand operand.
+   * @returns 'true' if the operation can proceed without needing temporary regs.
+   */
+  bool IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs);
 
-    /*
-     * Can this operation be using core registers without temporaries?
-     * @param rl_lhs Left hand operand.
-     * @param rl_rhs Right hand operand.
-     * @returns 'true' if the operation can proceed without needing temporary regs.
-     */
-    bool IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs);
+  /**
+   * @brief Generates inline code for conversion of long to FP by using x87/
+   * @param rl_dest The destination of the FP.
+   * @param rl_src The source of the long.
+   * @param is_double 'true' if dealing with double, 'false' for float.
+   */
+  virtual void GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double);
 
-    /**
-     * @brief Generates inline code for conversion of long to FP by using x87/
-     * @param rl_dest The destination of the FP.
-     * @param rl_src The source of the long.
-     * @param is_double 'true' if dealing with double, 'false' for float.
-     */
-    virtual void GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double);
+  /*
+   * @brief Perform MIR analysis before compiling method.
+   * @note Invokes Mir2LiR::Materialize after analysis.
+   */
+  void Materialize();
 
-    /*
-     * @brief Perform MIR analysis before compiling method.
-     * @note Invokes Mir2LiR::Materialize after analysis.
-     */
-    void Materialize();
+  /*
+   * Mir2Lir's UpdateLoc() looks to see if the Dalvik value is currently live in any temp register
+   * without regard to data type.  In practice, this can result in UpdateLoc returning a
+   * location record for a Dalvik float value in a core register, and vis-versa.  For targets
+   * which can inexpensively move data between core and float registers, this can often be a win.
+   * However, for x86 this is generally not a win.  These variants of UpdateLoc()
+   * take a register class argument - and will return an in-register location record only if
+   * the value is live in a temp register of the correct class.  Additionally, if the value is in
+   * a temp register of the wrong register class, it will be clobbered.
+   */
+  RegLocation UpdateLocTyped(RegLocation loc, int reg_class);
+  RegLocation UpdateLocWideTyped(RegLocation loc, int reg_class);
 
-    /*
-     * Mir2Lir's UpdateLoc() looks to see if the Dalvik value is currently live in any temp register
-     * without regard to data type.  In practice, this can result in UpdateLoc returning a
-     * location record for a Dalvik float value in a core register, and vis-versa.  For targets
-     * which can inexpensively move data between core and float registers, this can often be a win.
-     * However, for x86 this is generally not a win.  These variants of UpdateLoc()
-     * take a register class argument - and will return an in-register location record only if
-     * the value is live in a temp register of the correct class.  Additionally, if the value is in
-     * a temp register of the wrong register class, it will be clobbered.
-     */
-    RegLocation UpdateLocTyped(RegLocation loc, int reg_class);
-    RegLocation UpdateLocWideTyped(RegLocation loc, int reg_class);
+  /*
+   * @brief Analyze MIR before generating code, to prepare for the code generation.
+   */
+  void AnalyzeMIR();
 
-    /*
-     * @brief Analyze MIR before generating code, to prepare for the code generation.
-     */
-    void AnalyzeMIR();
+  /*
+   * @brief Analyze one basic block.
+   * @param bb Basic block to analyze.
+   */
+  void AnalyzeBB(BasicBlock * bb);
 
-    /*
-     * @brief Analyze one basic block.
-     * @param bb Basic block to analyze.
-     */
-    void AnalyzeBB(BasicBlock * bb);
+  /*
+   * @brief Analyze one extended MIR instruction
+   * @param opcode MIR instruction opcode.
+   * @param bb Basic block containing instruction.
+   * @param mir Extended instruction to analyze.
+   */
+  void AnalyzeExtendedMIR(int opcode, BasicBlock * bb, MIR *mir);
 
-    /*
-     * @brief Analyze one extended MIR instruction
-     * @param opcode MIR instruction opcode.
-     * @param bb Basic block containing instruction.
-     * @param mir Extended instruction to analyze.
-     */
-    void AnalyzeExtendedMIR(int opcode, BasicBlock * bb, MIR *mir);
+  /*
+   * @brief Analyze one MIR instruction
+   * @param opcode MIR instruction opcode.
+   * @param bb Basic block containing instruction.
+   * @param mir Instruction to analyze.
+   */
+  virtual void AnalyzeMIR(int opcode, BasicBlock * bb, MIR *mir);
 
-    /*
-     * @brief Analyze one MIR instruction
-     * @param opcode MIR instruction opcode.
-     * @param bb Basic block containing instruction.
-     * @param mir Instruction to analyze.
-     */
-    virtual void AnalyzeMIR(int opcode, BasicBlock * bb, MIR *mir);
+  /*
+   * @brief Analyze one MIR float/double instruction
+   * @param opcode MIR instruction opcode.
+   * @param bb Basic block containing instruction.
+   * @param mir Instruction to analyze.
+   */
+  void AnalyzeFPInstruction(int opcode, BasicBlock * bb, MIR *mir);
 
-    /*
-     * @brief Analyze one MIR float/double instruction
-     * @param opcode MIR instruction opcode.
-     * @param bb Basic block containing instruction.
-     * @param mir Instruction to analyze.
-     */
-    void AnalyzeFPInstruction(int opcode, BasicBlock * bb, MIR *mir);
+  /*
+   * @brief Analyze one use of a double operand.
+   * @param rl_use Double RegLocation for the operand.
+   */
+  void AnalyzeDoubleUse(RegLocation rl_use);
 
-    /*
-     * @brief Analyze one use of a double operand.
-     * @param rl_use Double RegLocation for the operand.
-     */
-    void AnalyzeDoubleUse(RegLocation rl_use);
+  bool Gen64Bit() const  { return gen64bit_; }
 
-    bool Gen64Bit() const  { return gen64bit_; }
+  // Information derived from analysis of MIR
 
-    // Information derived from analysis of MIR
+  // The compiler temporary for the code address of the method.
+  CompilerTemp *base_of_code_;
 
-    // The compiler temporary for the code address of the method.
-    CompilerTemp *base_of_code_;
+  // Have we decided to compute a ptr to code and store in temporary VR?
+  bool store_method_addr_;
 
-    // Have we decided to compute a ptr to code and store in temporary VR?
-    bool store_method_addr_;
+  // Have we used the stored method address?
+  bool store_method_addr_used_;
 
-    // Have we used the stored method address?
-    bool store_method_addr_used_;
+  // Instructions to remove if we didn't use the stored method address.
+  LIR* setup_method_address_[2];
 
-    // Instructions to remove if we didn't use the stored method address.
-    LIR* setup_method_address_[2];
+  // Instructions needing patching with Method* values.
+  GrowableArray<LIR*> method_address_insns_;
 
-    // Instructions needing patching with Method* values.
-    GrowableArray<LIR*> method_address_insns_;
+  // Instructions needing patching with Class Type* values.
+  GrowableArray<LIR*> class_type_address_insns_;
 
-    // Instructions needing patching with Class Type* values.
-    GrowableArray<LIR*> class_type_address_insns_;
+  // Instructions needing patching with PC relative code addresses.
+  GrowableArray<LIR*> call_method_insns_;
 
-    // Instructions needing patching with PC relative code addresses.
-    GrowableArray<LIR*> call_method_insns_;
+  // Prologue decrement of stack pointer.
+  LIR* stack_decrement_;
 
-    // Prologue decrement of stack pointer.
-    LIR* stack_decrement_;
+  // Epilogue increment of stack pointer.
+  LIR* stack_increment_;
 
-    // Epilogue increment of stack pointer.
-    LIR* stack_increment_;
+  // 64-bit mode
+  bool gen64bit_;
 
-    // 64-bit mode
-    bool gen64bit_;
+  // The list of const vector literals.
+  LIR *const_vectors_;
 
-    // The list of const vector literals.
-    LIR *const_vectors_;
+  /*
+   * @brief Search for a matching vector literal
+   * @param mir A kMirOpConst128b MIR instruction to match.
+   * @returns pointer to matching LIR constant, or nullptr if not found.
+   */
+  LIR *ScanVectorLiteral(MIR *mir);
 
-    /*
-     * @brief Search for a matching vector literal
-     * @param mir A kMirOpConst128b MIR instruction to match.
-     * @returns pointer to matching LIR constant, or nullptr if not found.
-     */
-    LIR *ScanVectorLiteral(MIR *mir);
+  /*
+   * @brief Add a constant vector literal
+   * @param mir A kMirOpConst128b MIR instruction to match.
+   */
+  LIR *AddVectorLiteral(MIR *mir);
 
-    /*
-     * @brief Add a constant vector literal
-     * @param mir A kMirOpConst128b MIR instruction to match.
-     */
-    LIR *AddVectorLiteral(MIR *mir);
-
-    InToRegStorageMapping in_to_reg_storage_mapping_;
+  InToRegStorageMapping in_to_reg_storage_mapping_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index d214b8d..8093fd7 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -810,7 +810,7 @@
                    : (IsInReg(this, rl_src_offset, rs_rDI) ? 4
                    : (SRegOffset(rl_src_offset.s_reg_low) + push_offset));
     LoadWordDisp(TargetReg(kSp), srcOffsetSp, rs_rSI);
-    NewLIR4(kX86LockCmpxchg8bA, rs_rDI.GetReg(), rs_rSI.GetReg(), 0, 0);
+    NewLIR4(kX86LockCmpxchg64A, rs_rDI.GetReg(), rs_rSI.GetReg(), 0, 0);
 
     // After a store we need to insert barrier in case of potential load. Since the
     // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
@@ -853,8 +853,18 @@
   // Convert ZF to boolean
   RegLocation rl_dest = InlineTarget(info);  // boolean place for result
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondZ);
-  NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
+  RegStorage result_reg = rl_result.reg;
+
+  // SETcc only works with EAX..EDX.
+  if (result_reg.GetRegNum() >= rs_rX86_SP.GetRegNum()) {
+    result_reg = AllocateByteRegister();
+    DCHECK_LT(result_reg.GetRegNum(), rs_rX86_SP.GetRegNum());
+  }
+  NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondZ);
+  NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), result_reg.GetReg());
+  if (IsTemp(result_reg)) {
+    FreeTemp(result_reg);
+  }
   StoreValue(rl_dest, rl_result);
   return true;
 }
@@ -2154,7 +2164,12 @@
   LoadConstant(result_reg, 0);
   LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL);
 
-  RegStorage check_class = AllocTypedTemp(false, kRefReg);
+  // We will use this register to compare to memory below.
+  // References are 32 bit in memory, and 64 bit in registers (in 64 bit mode).
+  // For this reason, force allocation of a 32 bit register to use, so that the
+  // compare to memory will be done using a 32 bit comparision.
+  // The LoadRefDisp(s) below will work normally, even in 64 bit mode.
+  RegStorage check_class = AllocTemp();
 
   // If Method* is already in a register, we can save a copy.
   RegLocation rl_method = mir_graph_->GetMethodLoc();
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 1ac15a2..ce7229b 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -31,33 +31,25 @@
 };
 static constexpr RegStorage core_regs_arr_64[] = {
     rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI,
-#ifdef TARGET_REX_SUPPORT
     rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
-#endif
 };
 static constexpr RegStorage core_regs_arr_64q[] = {
     rs_r0q, rs_r1q, rs_r2q, rs_r3q, rs_rX86_SP_64, rs_r5q, rs_r6q, rs_r7q,
-#ifdef TARGET_REX_SUPPORT
     rs_r8q, rs_r9q, rs_r10q, rs_r11q, rs_r12q, rs_r13q, rs_r14q, rs_r15q
-#endif
 };
 static constexpr RegStorage sp_regs_arr_32[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
 };
 static constexpr RegStorage sp_regs_arr_64[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
-#ifdef TARGET_REX_SUPPORT
     rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
-#endif
 };
 static constexpr RegStorage dp_regs_arr_32[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
 };
 static constexpr RegStorage dp_regs_arr_64[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
-#ifdef TARGET_REX_SUPPORT
     rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
-#endif
 };
 static constexpr RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32};
 static constexpr RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_32};
@@ -65,33 +57,25 @@
 static constexpr RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
 static constexpr RegStorage core_temps_arr_64[] = {
     rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI,
-#ifdef TARGET_REX_SUPPORT
     rs_r8, rs_r9, rs_r10, rs_r11
-#endif
 };
 static constexpr RegStorage core_temps_arr_64q[] = {
     rs_r0q, rs_r1q, rs_r2q, rs_r6q, rs_r7q,
-#ifdef TARGET_REX_SUPPORT
     rs_r8q, rs_r9q, rs_r10q, rs_r11q
-#endif
 };
 static constexpr RegStorage sp_temps_arr_32[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
 };
 static constexpr RegStorage sp_temps_arr_64[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
-#ifdef TARGET_REX_SUPPORT
     rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
-#endif
 };
 static constexpr RegStorage dp_temps_arr_32[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
 };
 static constexpr RegStorage dp_temps_arr_64[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
-#ifdef TARGET_REX_SUPPORT
     rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
-#endif
 };
 
 static constexpr RegStorage xp_temps_arr_32[] = {
@@ -99,9 +83,7 @@
 };
 static constexpr RegStorage xp_temps_arr_64[] = {
     rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
-#ifdef TARGET_REX_SUPPORT
     rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15
-#endif
 };
 
 static constexpr ArrayRef<const RegStorage> empty_pool;
@@ -132,10 +114,8 @@
 X86NativeRegisterPool rX86_ARG1;
 X86NativeRegisterPool rX86_ARG2;
 X86NativeRegisterPool rX86_ARG3;
-#ifdef TARGET_REX_SUPPORT
 X86NativeRegisterPool rX86_ARG4;
 X86NativeRegisterPool rX86_ARG5;
-#endif
 X86NativeRegisterPool rX86_FARG0;
 X86NativeRegisterPool rX86_FARG1;
 X86NativeRegisterPool rX86_FARG2;
@@ -216,7 +196,7 @@
     case kRet1: res_reg = rs_rX86_RET1; break;
     case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break;
     case kHiddenArg: res_reg = rs_rAX; break;
-    case kHiddenFpArg: res_reg = rs_fr0; break;
+    case kHiddenFpArg: DCHECK(!Gen64Bit()); res_reg = rs_fr0; break;
     case kCount: res_reg = rs_rX86_COUNT; break;
     default: res_reg = RegStorage::InvalidReg();
   }
@@ -488,7 +468,6 @@
   LockTemp(rs_rX86_ARG1);
   LockTemp(rs_rX86_ARG2);
   LockTemp(rs_rX86_ARG3);
-#ifdef TARGET_REX_SUPPORT
   if (Gen64Bit()) {
     LockTemp(rs_rX86_ARG4);
     LockTemp(rs_rX86_ARG5);
@@ -501,7 +480,6 @@
     LockTemp(rs_rX86_FARG6);
     LockTemp(rs_rX86_FARG7);
   }
-#endif
 }
 
 /* To be used when explicitly managing register use */
@@ -510,7 +488,6 @@
   FreeTemp(rs_rX86_ARG1);
   FreeTemp(rs_rX86_ARG2);
   FreeTemp(rs_rX86_ARG3);
-#ifdef TARGET_REX_SUPPORT
   if (Gen64Bit()) {
     FreeTemp(rs_rX86_ARG4);
     FreeTemp(rs_rX86_ARG5);
@@ -523,15 +500,14 @@
     FreeTemp(rs_rX86_FARG6);
     FreeTemp(rs_rX86_FARG7);
   }
-#endif
 }
 
 bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
     switch (opcode) {
       case kX86LockCmpxchgMR:
       case kX86LockCmpxchgAR:
-      case kX86LockCmpxchg8bM:
-      case kX86LockCmpxchg8bA:
+      case kX86LockCmpxchg64M:
+      case kX86LockCmpxchg64A:
       case kX86XchgMR:
       case kX86Mfence:
         // Atomic memory instructions provide full barrier.
@@ -730,13 +706,8 @@
     rs_rX86_ARG1 = rs_rSI;
     rs_rX86_ARG2 = rs_rDX;
     rs_rX86_ARG3 = rs_rCX;
-#ifdef TARGET_REX_SUPPORT
     rs_rX86_ARG4 = rs_r8;
     rs_rX86_ARG5 = rs_r9;
-#else
-    rs_rX86_ARG4 = RegStorage::InvalidReg();
-    rs_rX86_ARG5 = RegStorage::InvalidReg();
-#endif
     rs_rX86_FARG0 = rs_fr0;
     rs_rX86_FARG1 = rs_fr1;
     rs_rX86_FARG2 = rs_fr2;
@@ -749,10 +720,8 @@
     rX86_ARG1 = rSI;
     rX86_ARG2 = rDX;
     rX86_ARG3 = rCX;
-#ifdef TARGET_REX_SUPPORT
     rX86_ARG4 = r8;
     rX86_ARG5 = r9;
-#endif
     rX86_FARG0 = fr0;
     rX86_FARG1 = fr1;
     rX86_FARG2 = fr2;
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index e550488..5022529 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -142,10 +142,6 @@
   r7             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 7,
   r7q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 7,
   rDI            = r7,
-#ifndef TARGET_REX_SUPPORT
-  // fake return address register for core spill mask.
-  rRET           = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 8,
-#else
   r8             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 8,
   r8q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 8,
   r9             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 9,
@@ -164,7 +160,6 @@
   r15q           = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 15,
   // fake return address register for core spill mask.
   rRET           = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 16,
-#endif
 
   // xmm registers, single precision view.
   fr0  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 0,
@@ -175,7 +170,6 @@
   fr5  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 5,
   fr6  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 6,
   fr7  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 7,
-#ifdef TARGET_REX_SUPPORT
   fr8  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 8,
   fr9  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 9,
   fr10 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 10,
@@ -184,7 +178,6 @@
   fr13 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 13,
   fr14 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 14,
   fr15 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 15,
-#endif
 
   // xmm registers, double precision aliases.
   dr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 0,
@@ -195,7 +188,6 @@
   dr5  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 5,
   dr6  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 6,
   dr7  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 7,
-#ifdef TARGET_REX_SUPPORT
   dr8  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 8,
   dr9  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 9,
   dr10 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
@@ -204,7 +196,6 @@
   dr13 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13,
   dr14 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
   dr15 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15,
-#endif
 
   // xmm registers, quad precision aliases
   xr0  = RegStorage::k128BitSolo | 0,
@@ -215,7 +206,6 @@
   xr5  = RegStorage::k128BitSolo | 5,
   xr6  = RegStorage::k128BitSolo | 6,
   xr7  = RegStorage::k128BitSolo | 7,
-#ifdef TARGET_REX_SUPPORT
   xr8  = RegStorage::k128BitSolo | 8,
   xr9  = RegStorage::k128BitSolo | 9,
   xr10 = RegStorage::k128BitSolo | 10,
@@ -224,7 +214,6 @@
   xr13 = RegStorage::k128BitSolo | 13,
   xr14 = RegStorage::k128BitSolo | 14,
   xr15 = RegStorage::k128BitSolo | 15,
-#endif
 
   // TODO: as needed, add 256, 512 and 1024-bit xmm views.
 };
@@ -254,7 +243,6 @@
 constexpr RegStorage rs_r7q(RegStorage::kValid | r7q);
 constexpr RegStorage rs_rDI = rs_r7;
 constexpr RegStorage rs_rRET(RegStorage::kValid | rRET);
-#ifdef TARGET_REX_SUPPORT
 constexpr RegStorage rs_r8(RegStorage::kValid | r8);
 constexpr RegStorage rs_r8q(RegStorage::kValid | r8q);
 constexpr RegStorage rs_r9(RegStorage::kValid | r9);
@@ -271,7 +259,6 @@
 constexpr RegStorage rs_r14q(RegStorage::kValid | r14q);
 constexpr RegStorage rs_r15(RegStorage::kValid | r15);
 constexpr RegStorage rs_r15q(RegStorage::kValid | r15q);
-#endif
 
 constexpr RegStorage rs_fr0(RegStorage::kValid | fr0);
 constexpr RegStorage rs_fr1(RegStorage::kValid | fr1);
@@ -281,7 +268,6 @@
 constexpr RegStorage rs_fr5(RegStorage::kValid | fr5);
 constexpr RegStorage rs_fr6(RegStorage::kValid | fr6);
 constexpr RegStorage rs_fr7(RegStorage::kValid | fr7);
-#ifdef TARGET_REX_SUPPORT
 constexpr RegStorage rs_fr8(RegStorage::kValid | fr8);
 constexpr RegStorage rs_fr9(RegStorage::kValid | fr9);
 constexpr RegStorage rs_fr10(RegStorage::kValid | fr10);
@@ -290,7 +276,6 @@
 constexpr RegStorage rs_fr13(RegStorage::kValid | fr13);
 constexpr RegStorage rs_fr14(RegStorage::kValid | fr14);
 constexpr RegStorage rs_fr15(RegStorage::kValid | fr15);
-#endif
 
 constexpr RegStorage rs_dr0(RegStorage::kValid | dr0);
 constexpr RegStorage rs_dr1(RegStorage::kValid | dr1);
@@ -300,7 +285,6 @@
 constexpr RegStorage rs_dr5(RegStorage::kValid | dr5);
 constexpr RegStorage rs_dr6(RegStorage::kValid | dr6);
 constexpr RegStorage rs_dr7(RegStorage::kValid | dr7);
-#ifdef TARGET_REX_SUPPORT
 constexpr RegStorage rs_dr8(RegStorage::kValid | dr8);
 constexpr RegStorage rs_dr9(RegStorage::kValid | dr9);
 constexpr RegStorage rs_dr10(RegStorage::kValid | dr10);
@@ -309,7 +293,6 @@
 constexpr RegStorage rs_dr13(RegStorage::kValid | dr13);
 constexpr RegStorage rs_dr14(RegStorage::kValid | dr14);
 constexpr RegStorage rs_dr15(RegStorage::kValid | dr15);
-#endif
 
 constexpr RegStorage rs_xr0(RegStorage::kValid | xr0);
 constexpr RegStorage rs_xr1(RegStorage::kValid | xr1);
@@ -319,7 +302,6 @@
 constexpr RegStorage rs_xr5(RegStorage::kValid | xr5);
 constexpr RegStorage rs_xr6(RegStorage::kValid | xr6);
 constexpr RegStorage rs_xr7(RegStorage::kValid | xr7);
-#ifdef TARGET_REX_SUPPORT
 constexpr RegStorage rs_xr8(RegStorage::kValid | xr8);
 constexpr RegStorage rs_xr9(RegStorage::kValid | xr9);
 constexpr RegStorage rs_xr10(RegStorage::kValid | xr10);
@@ -328,16 +310,13 @@
 constexpr RegStorage rs_xr13(RegStorage::kValid | xr13);
 constexpr RegStorage rs_xr14(RegStorage::kValid | xr14);
 constexpr RegStorage rs_xr15(RegStorage::kValid | xr15);
-#endif
 
 extern X86NativeRegisterPool rX86_ARG0;
 extern X86NativeRegisterPool rX86_ARG1;
 extern X86NativeRegisterPool rX86_ARG2;
 extern X86NativeRegisterPool rX86_ARG3;
-#ifdef TARGET_REX_SUPPORT
 extern X86NativeRegisterPool rX86_ARG4;
 extern X86NativeRegisterPool rX86_ARG5;
-#endif
 extern X86NativeRegisterPool rX86_FARG0;
 extern X86NativeRegisterPool rX86_FARG1;
 extern X86NativeRegisterPool rX86_FARG2;
@@ -620,7 +599,7 @@
   Binary0fOpCode(kX86Imul64),   // 64bit multiply
   kX86CmpxchgRR, kX86CmpxchgMR, kX86CmpxchgAR,  // compare and exchange
   kX86LockCmpxchgMR, kX86LockCmpxchgAR,  // locked compare and exchange
-  kX86LockCmpxchg8bM, kX86LockCmpxchg8bA,  // locked compare and exchange
+  kX86LockCmpxchg64M, kX86LockCmpxchg64A,  // locked compare and exchange
   kX86XchgMR,  // exchange memory with register (automatically locked)
   Binary0fOpCode(kX86Movzx8),   // zero-extend 8-bit value
   Binary0fOpCode(kX86Movzx16),  // zero-extend 16-bit value
@@ -654,7 +633,6 @@
   kData,                                    // Special case for raw data.
   kNop,                                     // Special case for variable length nop.
   kNullary,                                 // Opcode that takes no arguments.
-  kPrefix2Nullary,                          // Opcode that takes no arguments, but 2 prefixes.
   kRegOpcode,                               // Shorter form of R instruction kind (opcode+rd)
   kReg, kMem, kArray,                       // R, M and A instruction kinds.
   kMemReg, kArrayReg, kThreadReg,           // MR, AR and TR instruction kinds.
@@ -663,11 +641,11 @@
   kRegImm, kMemImm, kArrayImm, kThreadImm,  // RI, MI, AI and TI instruction kinds.
   kRegRegImm, kRegMemImm, kRegArrayImm,     // RRI, RMI and RAI instruction kinds.
   kMovRegImm,                               // Shorter form move RI.
-  kRegRegImmRev,                            // RRI with first reg in r/m
+  kRegRegImmStore,                          // RRI following the store modrm reg-reg encoding rather than the load.
   kMemRegImm,                               // MRI instruction kinds.
   kShiftRegImm, kShiftMemImm, kShiftArrayImm,  // Shift opcode with immediate.
   kShiftRegCl, kShiftMemCl, kShiftArrayCl,     // Shift opcode with register CL.
-  kRegRegReg, kRegRegMem, kRegRegArray,    // RRR, RRM, RRA instruction kinds.
+  // kRegRegReg, kRegRegMem, kRegRegArray,    // RRR, RRM, RRA instruction kinds.
   kRegCond, kMemCond, kArrayCond,          // R, M, A instruction kinds following by a condition.
   kRegRegCond,                             // RR instruction kind followed by a condition.
   kRegMemCond,                             // RM instruction kind followed by a condition.
@@ -680,19 +658,25 @@
 /* Struct used to define the EncodingMap positions for each X86 opcode */
 struct X86EncodingMap {
   X86OpCode opcode;      // e.g. kOpAddRI
-  X86EncodingKind kind;  // Used to discriminate in the union below
+  // The broad category the instruction conforms to, such as kRegReg. Identifies which LIR operands
+  // hold meaning for the opcode.
+  X86EncodingKind kind;
   uint64_t flags;
   struct {
-  uint8_t prefix1;       // non-zero => a prefix byte
-  uint8_t prefix2;       // non-zero => a second prefix byte
-  uint8_t opcode;        // 1 byte opcode
-  uint8_t extra_opcode1;  // possible extra opcode byte
-  uint8_t extra_opcode2;  // possible second extra opcode byte
-  // 3bit opcode that gets encoded in the register bits of the modrm byte, use determined by the
-  // encoding kind
+  uint8_t prefix1;       // Non-zero => a prefix byte.
+  uint8_t prefix2;       // Non-zero => a second prefix byte.
+  uint8_t opcode;        // 1 byte opcode.
+  uint8_t extra_opcode1;  // Possible extra opcode byte.
+  uint8_t extra_opcode2;  // Possible second extra opcode byte.
+  // 3-bit opcode that gets encoded in the register bits of the modrm byte, use determined by the
+  // encoding kind.
   uint8_t modrm_opcode;
-  uint8_t ax_opcode;  // non-zero => shorter encoding for AX as a destination
-  uint8_t immediate_bytes;  // number of bytes of immediate
+  uint8_t ax_opcode;  // Non-zero => shorter encoding for AX as a destination.
+  uint8_t immediate_bytes;  // Number of bytes of immediate.
+  // Does the instruction address a byte register? In 32-bit mode the registers ah, bh, ch and dh
+  // are not used. In 64-bit mode the REX prefix is used to normalize and allow any byte register
+  // to be addressed.
+  bool r8_form;
   } skeleton;
   const char *name;
   const char* fmt;
@@ -726,6 +710,7 @@
 
 #define IS_SIMM8(v) ((-128 <= (v)) && ((v) <= 127))
 #define IS_SIMM16(v) ((-32768 <= (v)) && ((v) <= 32767))
+#define IS_SIMM32(v) ((INT64_C(-2147483648) <= (v)) && ((v) <= INT64_C(2147483647)))
 
 extern X86EncodingMap EncodingMap[kX86Last];
 extern X86ConditionCode X86ConditionEncoding(ConditionCode cond);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index ed3f43c..e888cc1 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -484,7 +484,10 @@
 }
 
 void LocationsBuilderARM::VisitIntConstant(HIntConstant* constant) {
-  constant->SetLocations(nullptr);
+  // TODO: Support constant locations.
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::RequiresRegister());
+  constant->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorARM::VisitIntConstant(HIntConstant* constant) {
@@ -492,7 +495,10 @@
 }
 
 void LocationsBuilderARM::VisitLongConstant(HLongConstant* constant) {
-  constant->SetLocations(nullptr);
+  // TODO: Support constant locations.
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::RequiresRegister());
+  constant->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorARM::VisitLongConstant(HLongConstant* constant) {
@@ -794,7 +800,12 @@
 }
 
 void LocationsBuilderARM::VisitPhi(HPhi* instruction) {
-  LOG(FATAL) << "Unimplemented";
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+    locations->SetInAt(i, Location::Any());
+  }
+  locations->SetOut(Location::Any());
+  instruction->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction) {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 8bfd8d6..72c697f 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -494,7 +494,10 @@
 }
 
 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
-  constant->SetLocations(nullptr);
+  // TODO: Support constant locations.
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::RequiresRegister());
+  constant->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant) {
@@ -502,7 +505,10 @@
 }
 
 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
-  constant->SetLocations(nullptr);
+  // TODO: Support constant locations.
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::RequiresRegister());
+  constant->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant) {
@@ -814,7 +820,12 @@
 }
 
 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
-  LOG(FATAL) << "Unimplemented";
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+    locations->SetInAt(i, Location::Any());
+  }
+  locations->SetOut(Location::Any());
+  instruction->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction) {
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index f9ae529..e4f9371 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -18,6 +18,7 @@
 
 #include "base/stringprintf.h"
 #include "builder.h"
+#include "code_generator.h"
 #include "dex_file.h"
 #include "dex_instruction.h"
 #include "graph_visualizer.h"
@@ -41,8 +42,11 @@
   ASSERT_NE(graph, nullptr);
 
   graph->BuildDominatorTree();
+  graph->TransformToSSA();
   graph->FindNaturalLoops();
-  SsaLivenessAnalysis liveness(*graph);
+
+  CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, InstructionSet::kX86);
+  SsaLivenessAnalysis liveness(*graph, codegen);
   liveness.Analyze();
 
   ASSERT_EQ(liveness.GetLinearPostOrder().Size(), number_of_blocks);
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 017117a..987c5f2 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -15,6 +15,7 @@
  */
 
 #include "builder.h"
+#include "code_generator.h"
 #include "dex_file.h"
 #include "dex_instruction.h"
 #include "nodes.h"
@@ -56,14 +57,16 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = BuildGraph(data, &allocator);
-  SsaLivenessAnalysis liveness(*graph);
+
+  CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, InstructionSet::kX86);
+  SsaLivenessAnalysis liveness(*graph, codegen);
   liveness.Analyze();
 
   LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
   LiveRange* range = interval->GetFirstRange();
   ASSERT_EQ(2u, range->GetStart());
   // Last use is the return instruction.
-  ASSERT_EQ(8u, range->GetEnd());
+  ASSERT_EQ(9u, range->GetEnd());
   HBasicBlock* block = graph->GetBlocks().Get(1);
   ASSERT_TRUE(block->GetLastInstruction()->AsReturn() != nullptr);
   ASSERT_EQ(8u, block->GetLastInstruction()->GetLifetimePosition());
@@ -101,14 +104,15 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = BuildGraph(data, &allocator);
-  SsaLivenessAnalysis liveness(*graph);
+  CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, InstructionSet::kX86);
+  SsaLivenessAnalysis liveness(*graph, codegen);
   liveness.Analyze();
 
   LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
   LiveRange* range = interval->GetFirstRange();
   ASSERT_EQ(2u, range->GetStart());
   // Last use is the return instruction.
-  ASSERT_EQ(22u, range->GetEnd());
+  ASSERT_EQ(23u, range->GetEnd());
   HBasicBlock* block = graph->GetBlocks().Get(3);
   ASSERT_TRUE(block->GetLastInstruction()->AsReturn() != nullptr);
   ASSERT_EQ(22u, block->GetLastInstruction()->GetLifetimePosition());
@@ -149,7 +153,8 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = BuildGraph(data, &allocator);
-  SsaLivenessAnalysis liveness(*graph);
+  CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, InstructionSet::kX86);
+  SsaLivenessAnalysis liveness(*graph, codegen);
   liveness.Analyze();
 
   // Test for the 4 constant.
@@ -181,7 +186,7 @@
   range = interval->GetFirstRange();
   ASSERT_EQ(22u, liveness.GetInstructionFromSsaIndex(3)->GetLifetimePosition());
   ASSERT_EQ(22u, range->GetStart());
-  ASSERT_EQ(24u, range->GetEnd());
+  ASSERT_EQ(25u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
@@ -224,7 +229,8 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = BuildGraph(data, &allocator);
-  SsaLivenessAnalysis liveness(*graph);
+  CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, InstructionSet::kX86);
+  SsaLivenessAnalysis liveness(*graph, codegen);
   liveness.Analyze();
 
   // Test for the 0 constant.
@@ -249,7 +255,7 @@
   range = interval->GetFirstRange();
   // The instruction is live until the return instruction after the loop.
   ASSERT_EQ(6u, range->GetStart());
-  ASSERT_EQ(26u, range->GetEnd());
+  ASSERT_EQ(27u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 
   // Test for the phi.
@@ -257,7 +263,7 @@
   range = interval->GetFirstRange();
   // Instruction is consumed by the if.
   ASSERT_EQ(14u, range->GetStart());
-  ASSERT_EQ(16u, range->GetEnd());
+  ASSERT_EQ(17u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 7a33620..2d0bc39 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -15,6 +15,7 @@
  */
 
 #include "builder.h"
+#include "code_generator.h"
 #include "dex_file.h"
 #include "dex_instruction.h"
 #include "nodes.h"
@@ -48,7 +49,8 @@
   graph->BuildDominatorTree();
   graph->TransformToSSA();
   graph->FindNaturalLoops();
-  SsaLivenessAnalysis liveness(*graph);
+  CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, InstructionSet::kX86);
+  SsaLivenessAnalysis liveness(*graph, codegen);
   liveness.Analyze();
 
   std::ostringstream buffer;
@@ -69,17 +71,17 @@
 TEST(LivenessTest, CFG1) {
   const char* expected =
     "Block 0\n"
-    "  live in: ()\n"
-    "  live out: ()\n"
-    "  kill: ()\n"
+    "  live in: (0)\n"
+    "  live out: (0)\n"
+    "  kill: (1)\n"
     "Block 1\n"
-    "  live in: ()\n"
-    "  live out: ()\n"
-    "  kill: ()\n"
+    "  live in: (0)\n"
+    "  live out: (0)\n"
+    "  kill: (0)\n"
     "Block 2\n"
-    "  live in: ()\n"
-    "  live out: ()\n"
-    "  kill: ()\n";
+    "  live in: (0)\n"
+    "  live out: (0)\n"
+    "  kill: (0)\n";
 
   // Constant is not used.
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index dfbb488..3dc0928 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -131,7 +131,7 @@
   visualizer.DumpGraph("ssa");
 
   graph->FindNaturalLoops();
-  SsaLivenessAnalysis liveness(*graph);
+  SsaLivenessAnalysis liveness(*graph, codegen);
   liveness.Analyze();
   visualizer.DumpGraph("liveness");
 
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index dd175d2..8c6eb2a 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -22,6 +22,7 @@
 namespace art {
 
 static constexpr size_t kMaxLifetimePosition = -1;
+static constexpr size_t kDefaultNumberOfSpillSlots = 4;
 
 RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, const CodeGenerator& codegen)
       : allocator_(allocator),
@@ -30,6 +31,7 @@
         handled_(allocator, 0),
         active_(allocator, 0),
         inactive_(allocator, 0),
+        spill_slots_(allocator, kDefaultNumberOfSpillSlots),
         processing_core_registers_(false),
         number_of_registers_(-1),
         registers_array_(nullptr),
@@ -78,11 +80,39 @@
       intervals.Add(instruction->GetLiveInterval());
     }
   }
-  return ValidateIntervals(intervals, codegen_, allocator_, processing_core_registers_,
-                           log_fatal_on_failure);
+  return ValidateIntervals(intervals, spill_slots_.Size(), codegen_, allocator_,
+                           processing_core_registers_, log_fatal_on_failure);
 }
 
-bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& ranges,
+class AllRangesIterator : public ValueObject {
+ public:
+  explicit AllRangesIterator(LiveInterval* interval)
+      : current_interval_(interval),
+        current_range_(interval->GetFirstRange()) {}
+
+  bool Done() const { return current_interval_ == nullptr; }
+  LiveRange* CurrentRange() const { return current_range_; }
+  LiveInterval* CurrentInterval() const { return current_interval_; }
+
+  void Advance() {
+    current_range_ = current_range_->GetNext();
+    if (current_range_ == nullptr) {
+      current_interval_ = current_interval_->GetNextSibling();
+      if (current_interval_ != nullptr) {
+        current_range_ = current_interval_->GetFirstRange();
+      }
+    }
+  }
+
+ private:
+  LiveInterval* current_interval_;
+  LiveRange* current_range_;
+
+  DISALLOW_COPY_AND_ASSIGN(AllRangesIterator);
+};
+
+bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& intervals,
+                                          size_t number_of_spill_slots,
                                           const CodeGenerator& codegen,
                                           ArenaAllocator* allocator,
                                           bool processing_core_registers,
@@ -90,25 +120,40 @@
   size_t number_of_registers = processing_core_registers
       ? codegen.GetNumberOfCoreRegisters()
       : codegen.GetNumberOfFloatingPointRegisters();
-  GrowableArray<ArenaBitVector*> bit_vectors(allocator, number_of_registers);
+  GrowableArray<ArenaBitVector*> liveness_of_values(
+      allocator, number_of_registers + number_of_spill_slots);
 
   // Allocate a bit vector per register. A live interval that has a register
   // allocated will populate the associated bit vector based on its live ranges.
-  for (size_t i = 0; i < number_of_registers; i++) {
-    bit_vectors.Add(new (allocator) ArenaBitVector(allocator, 0, true));
+  for (size_t i = 0; i < number_of_registers + number_of_spill_slots; ++i) {
+    liveness_of_values.Add(new (allocator) ArenaBitVector(allocator, 0, true));
   }
 
-  for (size_t i = 0, e = ranges.Size(); i < e; ++i) {
-    LiveInterval* current = ranges.Get(i);
-    do {
-      if (!current->HasRegister()) {
-        continue;
+  for (size_t i = 0, e = intervals.Size(); i < e; ++i) {
+    for (AllRangesIterator it(intervals.Get(i)); !it.Done(); it.Advance()) {
+      LiveInterval* current = it.CurrentInterval();
+      if (current->GetParent()->HasSpillSlot()) {
+        BitVector* liveness_of_spill_slot = liveness_of_values.Get(
+            number_of_registers + current->GetParent()->GetSpillSlot() / kVRegSize);
+        for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) {
+          if (liveness_of_spill_slot->IsBitSet(j)) {
+            if (log_fatal_on_failure) {
+              std::ostringstream message;
+              message << "Spill slot conflict at " << j;
+              LOG(FATAL) << message.str();
+            } else {
+              return false;
+            }
+          } else {
+            liveness_of_spill_slot->SetBit(j);
+          }
+        }
       }
-      BitVector* vector = bit_vectors.Get(current->GetRegister());
-      LiveRange* range = current->GetFirstRange();
-      do {
-        for (size_t j = range->GetStart(); j < range->GetEnd(); ++j) {
-          if (vector->IsBitSet(j)) {
+
+      if (current->HasRegister()) {
+        BitVector* liveness_of_register = liveness_of_values.Get(current->GetRegister());
+        for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) {
+          if (liveness_of_register->IsBitSet(j)) {
             if (log_fatal_on_failure) {
               std::ostringstream message;
               message << "Register conflict at " << j << " for ";
@@ -122,11 +167,11 @@
               return false;
             }
           } else {
-            vector->SetBit(j);
+            liveness_of_register->SetBit(j);
           }
         }
-      } while ((range = range->GetNext()) != nullptr);
-    } while ((current = current->GetNextSibling()) != nullptr);
+      }
+    }
   }
   return true;
 }
@@ -270,7 +315,7 @@
 bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
   size_t first_register_use = current->FirstRegisterUse();
   if (current->FirstRegisterUse() == kNoLifetime) {
-    // TODO: Allocate spill slot for `current`.
+    AllocateSpillSlotFor(current);
     return false;
   }
 
@@ -317,6 +362,7 @@
   if (first_register_use >= next_use[reg]) {
     // If the first use of that instruction is after the last use of the found
     // register, we split this interval just before its first register use.
+    AllocateSpillSlotFor(current);
     LiveInterval* split = Split(current, first_register_use - 1);
     AddToUnhandled(split);
     return false;
@@ -370,9 +416,42 @@
     return interval;
   } else {
     LiveInterval* new_interval = interval->SplitAt(position);
-    // TODO: Allocate spill slot for `interval`.
     return new_interval;
   }
 }
 
+void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
+  LiveInterval* parent = interval->GetParent();
+
+  // An instruction gets a spill slot for its entire lifetime. If the parent
+  // of this interval already has a spill slot, there is nothing to do.
+  if (parent->HasSpillSlot()) {
+    return;
+  }
+
+  // Find when this instruction dies.
+  LiveInterval* last_sibling = interval;
+  while (last_sibling->GetNextSibling() != nullptr) {
+    last_sibling = last_sibling->GetNextSibling();
+  }
+  size_t end = last_sibling->GetEnd();
+
+  // Find an available spill slot.
+  size_t slot = 0;
+  for (size_t e = spill_slots_.Size(); slot < e; ++slot) {
+    if (spill_slots_.Get(slot) <= parent->GetStart()) {
+      break;
+    }
+  }
+
+  if (slot == spill_slots_.Size()) {
+    // We need a new spill slot.
+    spill_slots_.Add(end);
+  } else {
+    spill_slots_.Put(slot, end);
+  }
+
+  interval->GetParent()->SetSpillSlot(slot * kVRegSize);
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index e575b96..3393a04 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -55,6 +55,7 @@
 
   // Helper method for validation. Used by unit testing.
   static bool ValidateIntervals(const GrowableArray<LiveInterval*>& intervals,
+                                size_t number_of_spill_slots,
                                 const CodeGenerator& codegen,
                                 ArenaAllocator* allocator,
                                 bool processing_core_registers,
@@ -75,6 +76,9 @@
   // Returns whether `reg` is blocked by the code generator.
   bool IsBlocked(int reg) const;
 
+  // Allocate a spill slot for the given interval.
+  void AllocateSpillSlotFor(LiveInterval* interval);
+
   // Helper methods.
   void AllocateRegistersInternal(const SsaLivenessAnalysis& liveness);
   bool ValidateInternal(const SsaLivenessAnalysis& liveness, bool log_fatal_on_failure) const;
@@ -98,6 +102,9 @@
   // That is, they have a lifetime hole that spans the start of the new interval.
   GrowableArray<LiveInterval*> inactive_;
 
+  // The spill slots allocated for live intervals.
+  GrowableArray<size_t> spill_slots_;
+
   // True if processing core registers. False if processing floating
   // point registers.
   bool processing_core_registers_;
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 019d0f8..ff9b9be 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -40,9 +40,9 @@
   graph->BuildDominatorTree();
   graph->TransformToSSA();
   graph->FindNaturalLoops();
-  SsaLivenessAnalysis liveness(*graph);
-  liveness.Analyze();
   CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, kX86);
+  SsaLivenessAnalysis liveness(*graph, codegen);
+  liveness.Analyze();
   RegisterAllocator register_allocator(&allocator, *codegen);
   register_allocator.AllocateRegisters(liveness);
   return register_allocator.Validate(liveness, false);
@@ -64,10 +64,12 @@
     static constexpr size_t ranges[][2] = {{0, 42}};
     intervals.Add(BuildInterval(ranges, arraysize(ranges), &allocator, 0));
     intervals.Add(BuildInterval(ranges, arraysize(ranges), &allocator, 1));
-    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
 
     intervals.Get(1)->SetRegister(0);
-    ASSERT_FALSE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_FALSE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
     intervals.Reset();
   }
 
@@ -77,10 +79,12 @@
     intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0));
     static constexpr size_t ranges2[][2] = {{42, 43}};
     intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1));
-    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
 
     intervals.Get(1)->SetRegister(0);
-    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
     intervals.Reset();
   }
 
@@ -90,10 +94,12 @@
     intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0));
     static constexpr size_t ranges2[][2] = {{42, 43}};
     intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1));
-    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
 
     intervals.Get(1)->SetRegister(0);
-    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
     intervals.Reset();
   }
 
@@ -103,10 +109,12 @@
     intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0));
     static constexpr size_t ranges2[][2] = {{42, 47}};
     intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1));
-    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
 
     intervals.Get(1)->SetRegister(0);
-    ASSERT_FALSE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_FALSE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
     intervals.Reset();
   }
 
@@ -117,14 +125,17 @@
     intervals.Get(0)->SplitAt(43);
     static constexpr size_t ranges2[][2] = {{42, 47}};
     intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1));
-    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
 
     intervals.Get(1)->SetRegister(0);
     // Sibling of the first interval has no register allocated to it.
-    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
 
     intervals.Get(0)->GetNextSibling()->SetRegister(0);
-    ASSERT_FALSE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_FALSE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
   }
 }
 
@@ -286,9 +297,9 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = BuildSSAGraph(data, &allocator);
-  SsaLivenessAnalysis liveness(*graph);
-  liveness.Analyze();
   CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, kX86);
+  SsaLivenessAnalysis liveness(*graph, codegen);
+  liveness.Analyze();
   RegisterAllocator register_allocator(&allocator, *codegen);
   register_allocator.AllocateRegisters(liveness);
   ASSERT_TRUE(register_allocator.Validate(liveness, false));
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 54c3c5d..471307e 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -15,22 +15,12 @@
  */
 
 #include "ssa_builder.h"
+
 #include "nodes.h"
+#include "ssa_type_propagation.h"
 
 namespace art {
 
-static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_type) {
-  // We trust the verifier has already done the necessary checking.
-  switch (existing) {
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-    case Primitive::kPrimNot:
-      return existing;
-    default:
-      return new_type;
-  }
-}
-
 void SsaBuilder::BuildSsa() {
   // 1) Visit in reverse post order. We need to have all predecessors of a block visited
   // (with the exception of loops) in order to create the right environment for that
@@ -44,18 +34,18 @@
     HBasicBlock* block = loop_headers_.Get(i);
     for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
       HPhi* phi = it.Current()->AsPhi();
-      Primitive::Type type = Primitive::kPrimVoid;
       for (size_t pred = 0; pred < block->GetPredecessors().Size(); pred++) {
         HInstruction* input = ValueOfLocal(block->GetPredecessors().Get(pred), phi->GetRegNumber());
         phi->AddInput(input);
-        type = MergeTypes(type, input->GetType());
       }
-      phi->SetType(type);
     }
   }
-  // TODO: Now that the type of loop phis is set, we need a type propagation phase.
 
-  // 3) Clear locals.
+  // 3) Propagate types of phis.
+  SsaTypePropagation type_propagation(GetGraph());
+  type_propagation.Run();
+
+  // 4) Clear locals.
   // TODO: Move this to a dead code eliminator phase.
   for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
        !it.Done();
@@ -118,16 +108,10 @@
       if (is_different) {
         HPhi* phi = new (GetGraph()->GetArena()) HPhi(
             GetGraph()->GetArena(), local, block->GetPredecessors().Size(), Primitive::kPrimVoid);
-        Primitive::Type type = Primitive::kPrimVoid;
         for (size_t i = 0; i < block->GetPredecessors().Size(); i++) {
           HInstruction* value = ValueOfLocal(block->GetPredecessors().Get(i), local);
-          // We need to merge the incoming types, as the Dex format does not
-          // guarantee the inputs have the same type. In particular the 0 constant is
-          // used for all types, but the graph builder treats it as an int.
-          type = MergeTypes(type, value->GetType());
           phi->SetRawInputAt(i, value);
         }
-        phi->SetType(type);
         block->AddPhi(phi);
         value = phi;
       }
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index c367611..50ea00f 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -15,6 +15,8 @@
  */
 
 #include "ssa_liveness_analysis.h"
+
+#include "code_generator.h"
 #include "nodes.h"
 
 namespace art {
@@ -80,38 +82,6 @@
   order->Add(block);
 }
 
-class HLinearOrderIterator : public ValueObject {
- public:
-  explicit HLinearOrderIterator(const GrowableArray<HBasicBlock*>& post_order)
-      : post_order_(post_order), index_(post_order.Size()) {}
-
-  bool Done() const { return index_ == 0; }
-  HBasicBlock* Current() const { return post_order_.Get(index_ -1); }
-  void Advance() { --index_; DCHECK_GE(index_, 0U); }
-
- private:
-  const GrowableArray<HBasicBlock*>& post_order_;
-  size_t index_;
-
-  DISALLOW_COPY_AND_ASSIGN(HLinearOrderIterator);
-};
-
-class HLinearPostOrderIterator : public ValueObject {
- public:
-  explicit HLinearPostOrderIterator(const GrowableArray<HBasicBlock*>& post_order)
-      : post_order_(post_order), index_(0) {}
-
-  bool Done() const { return index_ == post_order_.Size(); }
-  HBasicBlock* Current() const { return post_order_.Get(index_); }
-  void Advance() { ++index_; }
-
- private:
-  const GrowableArray<HBasicBlock*>& post_order_;
-  size_t index_;
-
-  DISALLOW_COPY_AND_ASSIGN(HLinearPostOrderIterator);
-};
-
 void SsaLivenessAnalysis::LinearizeGraph() {
   // For simplicity of the implementation, we create post linear order. The order for
   // computing live ranges is the reverse of that order.
@@ -131,30 +101,38 @@
   // to differentiate between the start and end of an instruction. Adding 2 to
   // the lifetime position for each instruction ensures the start of an
   // instruction is different than the end of the previous instruction.
-  for (HLinearOrderIterator it(linear_post_order_); !it.Done(); it.Advance()) {
+  for (HLinearOrderIterator it(*this); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     block->SetLifetimeStart(lifetime_position);
 
     for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
       HInstruction* current = it.Current();
-      if (current->HasUses()) {
+      current->Accept(codegen_->GetLocationBuilder());
+      LocationSummary* locations = current->GetLocations();
+      if (locations != nullptr && locations->Out().IsValid()) {
         instructions_from_ssa_index_.Add(current);
         current->SetSsaIndex(ssa_index++);
         current->SetLiveInterval(
-            new (graph_.GetArena()) LiveInterval(graph_.GetArena(), current->GetType()));
+            new (graph_.GetArena()) LiveInterval(graph_.GetArena(), current->GetType(), current));
       }
       current->SetLifetimePosition(lifetime_position);
     }
     lifetime_position += 2;
 
+    // Add a null marker to notify we are starting a block.
+    instructions_from_lifetime_position_.Add(nullptr);
+
     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* current = it.Current();
-      if (current->HasUses()) {
+      current->Accept(codegen_->GetLocationBuilder());
+      LocationSummary* locations = current->GetLocations();
+      if (locations != nullptr && locations->Out().IsValid()) {
         instructions_from_ssa_index_.Add(current);
         current->SetSsaIndex(ssa_index++);
         current->SetLiveInterval(
-            new (graph_.GetArena()) LiveInterval(graph_.GetArena(), current->GetType()));
+            new (graph_.GetArena()) LiveInterval(graph_.GetArena(), current->GetType(), current));
       }
+      instructions_from_lifetime_position_.Add(current);
       current->SetLifetimePosition(lifetime_position);
       lifetime_position += 2;
     }
@@ -165,7 +143,7 @@
 }
 
 void SsaLivenessAnalysis::ComputeLiveness() {
-  for (HLinearOrderIterator it(linear_post_order_); !it.Done(); it.Advance()) {
+  for (HLinearOrderIterator it(*this); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     block_infos_.Put(
         block->GetBlockId(),
@@ -186,7 +164,7 @@
 void SsaLivenessAnalysis::ComputeLiveRanges() {
   // Do a post order visit, adding inputs of instructions live in the block where
   // that instruction is defined, and killing instructions that are being visited.
-  for (HLinearPostOrderIterator it(linear_post_order_); !it.Done(); it.Advance()) {
+  for (HLinearPostOrderIterator it(*this); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
 
     BitVector* kill = GetKillSet(*block);
@@ -201,7 +179,7 @@
       for (HInstructionIterator it(successor->GetPhis()); !it.Done(); it.Advance()) {
         HInstruction* phi = it.Current();
         HInstruction* input = phi->InputAt(phi_input_index);
-        input->GetLiveInterval()->AddPhiUse(phi, block);
+        input->GetLiveInterval()->AddPhiUse(phi, phi_input_index, block);
         // A phi input whose last user is the phi dies at the end of the predecessor block,
         // and not at the phi's lifetime position.
         live_in->SetBit(input->GetSsaIndex());
@@ -228,7 +206,7 @@
         HInstruction* input = current->InputAt(i);
         DCHECK(input->HasSsaIndex());
         live_in->SetBit(input->GetSsaIndex());
-        input->GetLiveInterval()->AddUse(current);
+        input->GetLiveInterval()->AddUse(current, i, false);
       }
 
       if (current->HasEnvironment()) {
@@ -239,7 +217,7 @@
           if (instruction != nullptr) {
             DCHECK(instruction->HasSsaIndex());
             live_in->SetBit(instruction->GetSsaIndex());
-            instruction->GetLiveInterval()->AddUse(current);
+            instruction->GetLiveInterval()->AddUse(current, i, true);
           }
         }
       }
@@ -251,6 +229,10 @@
       if (current->HasSsaIndex()) {
         kill->SetBit(current->GetSsaIndex());
         live_in->ClearBit(current->GetSsaIndex());
+        LiveInterval* interval = current->GetLiveInterval();
+        DCHECK((interval->GetFirstRange() == nullptr)
+               || (interval->GetStart() == current->GetLifetimePosition()));
+        interval->SetFrom(current->GetLifetimePosition());
       }
     }
 
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 733535e..7903ad6 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -21,6 +21,8 @@
 
 namespace art {
 
+class CodeGenerator;
+
 class BlockInfo : public ArenaObject {
  public:
   BlockInfo(ArenaAllocator* allocator, const HBasicBlock& block, size_t number_of_ssa_values)
@@ -87,9 +89,17 @@
  */
 class UsePosition : public ArenaObject {
  public:
-  UsePosition(HInstruction* user, size_t position, UsePosition* next)
-      : user_(user), position_(position), next_(next) {
-    DCHECK(user->AsPhi() != nullptr || GetPosition() == user->GetLifetimePosition());
+  UsePosition(HInstruction* user,
+              size_t input_index,
+              bool is_environment,
+              size_t position,
+              UsePosition* next)
+      : user_(user),
+        input_index_(input_index),
+        is_environment_(is_environment),
+        position_(position),
+        next_(next) {
+    DCHECK(user->AsPhi() != nullptr || GetPosition() == user->GetLifetimePosition() + 1);
     DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition());
   }
 
@@ -99,12 +109,18 @@
 
   HInstruction* GetUser() const { return user_; }
 
+  bool GetIsEnvironment() const { return is_environment_; }
+
+  size_t GetInputIndex() const { return input_index_; }
+
   void Dump(std::ostream& stream) const {
     stream << position_;
   }
 
  private:
   HInstruction* const user_;
+  const size_t input_index_;
+  const bool is_environment_;
   const size_t position_;
   UsePosition* const next_;
 
@@ -117,17 +133,33 @@
  */
 class LiveInterval : public ArenaObject {
  public:
-  LiveInterval(ArenaAllocator* allocator, Primitive::Type type)
+  LiveInterval(ArenaAllocator* allocator, Primitive::Type type, HInstruction* defined_by = nullptr)
       : allocator_(allocator),
         first_range_(nullptr),
         last_range_(nullptr),
         first_use_(nullptr),
         type_(type),
         next_sibling_(nullptr),
-        register_(kNoRegister) {}
+        parent_(this),
+        register_(kNoRegister),
+        spill_slot_(kNoSpillSlot),
+        is_fixed_(false),
+        defined_by_(defined_by) {}
 
-  void AddUse(HInstruction* instruction) {
-    size_t position = instruction->GetLifetimePosition();
+  static LiveInterval* MakeFixedInterval(ArenaAllocator* allocator, int reg, Primitive::Type type) {
+    LiveInterval* interval = new (allocator) LiveInterval(allocator, type);
+    interval->SetRegister(reg);
+    interval->is_fixed_ = true;
+    return interval;
+  }
+
+  bool IsFixed() const { return is_fixed_; }
+
+  void AddUse(HInstruction* instruction, size_t input_index, bool is_environment) {
+    // Set the use within the instruction.
+    // TODO: Use the instruction's location to know whether the instruction can die
+    // at entry, or needs to say alive within the user.
+    size_t position = instruction->GetLifetimePosition() + 1;
     size_t start_block_position = instruction->GetBlock()->GetLifetimeStart();
     size_t end_block_position = instruction->GetBlock()->GetLifetimeEnd();
     if (first_range_ == nullptr) {
@@ -143,12 +175,14 @@
       // There is a hole in the interval. Create a new range.
       first_range_ = new (allocator_) LiveRange(start_block_position, position, first_range_);
     }
-    first_use_ = new (allocator_) UsePosition(instruction, position, first_use_);
+    first_use_ = new (allocator_) UsePosition(
+        instruction, input_index, is_environment, position, first_use_);
   }
 
-  void AddPhiUse(HInstruction* instruction, HBasicBlock* block) {
+  void AddPhiUse(HInstruction* instruction, size_t input_index, HBasicBlock* block) {
     DCHECK(instruction->AsPhi() != nullptr);
-    first_use_ = new (allocator_) UsePosition(instruction, block->GetLifetimeEnd(), first_use_);
+    first_use_ = new (allocator_) UsePosition(
+        instruction, input_index, false, block->GetLifetimeEnd(), first_use_);
   }
 
   void AddRange(size_t start, size_t end) {
@@ -178,11 +212,23 @@
     }
   }
 
+  bool HasSpillSlot() const { return spill_slot_ != kNoSpillSlot; }
+  void SetSpillSlot(int slot) { spill_slot_ = slot; }
+  int GetSpillSlot() const { return spill_slot_; }
+
   void SetFrom(size_t from) {
-    DCHECK(first_range_ != nullptr);
-    first_range_->start_ = from;
+    if (first_range_ != nullptr) {
+      first_range_->start_ = from;
+    } else {
+      // Instruction without uses.
+      DCHECK(!defined_by_->HasUses());
+      DCHECK(from == defined_by_->GetLifetimePosition());
+      first_range_ = last_range_ = new (allocator_) LiveRange(from, from + 2, nullptr);
+    }
   }
 
+  LiveInterval* GetParent() const { return parent_; }
+
   LiveRange* GetFirstRange() const { return first_range_; }
 
   int GetRegister() const { return register_; }
@@ -190,11 +236,11 @@
   void ClearRegister() { register_ = kNoRegister; }
   bool HasRegister() const { return register_ != kNoRegister; }
 
-  bool IsDeadAt(size_t position) {
+  bool IsDeadAt(size_t position) const {
     return last_range_->GetEnd() <= position;
   }
 
-  bool Covers(size_t position) {
+  bool Covers(size_t position) const {
     LiveRange* current = first_range_;
     while (current != nullptr) {
       if (position >= current->GetStart() && position < current->GetEnd()) {
@@ -208,27 +254,10 @@
   /**
    * Returns the first intersection of this interval with `other`.
    */
-  size_t FirstIntersectionWith(LiveInterval* other) {
-    // We only call this method if there is a lifetime hole in this interval
-    // at the start of `other`.
-    DCHECK(!Covers(other->GetStart()));
-    DCHECK_LE(GetStart(), other->GetStart());
-    // Move to the range in this interval that starts after the other interval.
-    size_t other_start = other->GetStart();
-    LiveRange* my_range = first_range_;
-    while (my_range != nullptr) {
-      if (my_range->GetStart() >= other_start) {
-        break;
-      } else {
-        my_range = my_range->GetNext();
-      }
-    }
-    if (my_range == nullptr) {
-      return kNoLifetime;
-    }
-
+  size_t FirstIntersectionWith(LiveInterval* other) const {
     // Advance both intervals and find the first matching range start in
     // this interval.
+    LiveRange* my_range = first_range_;
     LiveRange* other_range = other->first_range_;
     do {
       if (my_range->IntersectsWith(*other_range)) {
@@ -252,16 +281,33 @@
     return first_range_->GetStart();
   }
 
+  size_t GetEnd() const {
+    return last_range_->GetEnd();
+  }
+
   size_t FirstRegisterUseAfter(size_t position) const {
+    if (position == GetStart() && defined_by_ != nullptr) {
+      Location location = defined_by_->GetLocations()->Out();
+      // This interval is the first interval of the instruction. If the output
+      // of the instruction requires a register, we return the position of that instruction
+      // as the first register use.
+      if (location.IsUnallocated()) {
+        if ((location.GetPolicy() == Location::kRequiresRegister)
+             || (location.GetPolicy() == Location::kSameAsFirstInput
+                && defined_by_->GetLocations()->InAt(0).GetPolicy() == Location::kRequiresRegister)) {
+          return position;
+        }
+      }
+    }
+
     UsePosition* use = first_use_;
     while (use != nullptr) {
       size_t use_position = use->GetPosition();
-      // TODO: Once we plug the Locations builder of the code generator
-      // to the register allocator, this method must be adjusted. We
-      // test if there is an environment, because these are currently the only
-      // instructions that could have more uses than the number of registers.
-      if (use_position >= position && !use->GetUser()->NeedsEnvironment()) {
-        return use_position;
+      if (use_position >= position && !use->GetIsEnvironment()) {
+        Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex());
+        if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
+          return use_position;
+        }
       }
       use = use->GetNext();
     }
@@ -272,10 +318,18 @@
     return FirstRegisterUseAfter(GetStart());
   }
 
+  UsePosition* GetFirstUse() const {
+    return first_use_;
+  }
+
   Primitive::Type GetType() const {
     return type_;
   }
 
+  HInstruction* GetDefinedBy() const {
+    return defined_by_;
+  }
+
   /**
    * Split this interval at `position`. This interval is changed to:
    * [start ... position).
@@ -284,7 +338,7 @@
    * [position ... end)
    */
   LiveInterval* SplitAt(size_t position) {
-    DCHECK(next_sibling_ == nullptr);
+    DCHECK(!is_fixed_);
     DCHECK_GT(position, GetStart());
 
     if (last_range_->GetEnd() <= position) {
@@ -293,7 +347,9 @@
     }
 
     LiveInterval* new_interval = new (allocator_) LiveInterval(allocator_, type_);
+    new_interval->next_sibling_ = next_sibling_;
     next_sibling_ = new_interval;
+    new_interval->parent_ = parent_;
 
     new_interval->first_use_ = first_use_;
     LiveRange* current = first_range_;
@@ -383,21 +439,36 @@
   // Live interval that is the result of a split.
   LiveInterval* next_sibling_;
 
+  // The first interval from which split intervals come from.
+  LiveInterval* parent_;
+
   // The register allocated to this interval.
   int register_;
 
+  // The spill slot allocated to this interval.
+  int spill_slot_;
+
+  // Whether the interval is for a fixed register.
+  bool is_fixed_;
+
+  // The instruction represented by this interval.
+  HInstruction* const defined_by_;
+
   static constexpr int kNoRegister = -1;
+  static constexpr int kNoSpillSlot = -1;
 
   DISALLOW_COPY_AND_ASSIGN(LiveInterval);
 };
 
 class SsaLivenessAnalysis : public ValueObject {
  public:
-  explicit SsaLivenessAnalysis(const HGraph& graph)
+  SsaLivenessAnalysis(const HGraph& graph, CodeGenerator* codegen)
       : graph_(graph),
+        codegen_(codegen),
         linear_post_order_(graph.GetArena(), graph.GetBlocks().Size()),
         block_infos_(graph.GetArena(), graph.GetBlocks().Size()),
         instructions_from_ssa_index_(graph.GetArena(), 0),
+        instructions_from_lifetime_position_(graph.GetArena(), 0),
         number_of_ssa_values_(0) {
     block_infos_.SetSize(graph.GetBlocks().Size());
   }
@@ -424,6 +495,14 @@
     return instructions_from_ssa_index_.Get(index);
   }
 
+  HInstruction* GetInstructionFromPosition(size_t index) const {
+    return instructions_from_lifetime_position_.Get(index);
+  }
+
+  size_t GetMaxLifetimePosition() const {
+    return instructions_from_lifetime_position_.Size() * 2 - 1;
+  }
+
   size_t GetNumberOfSsaValues() const {
     return number_of_ssa_values_;
   }
@@ -458,14 +537,52 @@
   bool UpdateLiveOut(const HBasicBlock& block);
 
   const HGraph& graph_;
+  CodeGenerator* const codegen_;
   GrowableArray<HBasicBlock*> linear_post_order_;
   GrowableArray<BlockInfo*> block_infos_;
+
+  // Temporary array used when computing live_in, live_out, and kill sets.
   GrowableArray<HInstruction*> instructions_from_ssa_index_;
+
+  // Temporary array used when inserting moves in the graph.
+  GrowableArray<HInstruction*> instructions_from_lifetime_position_;
   size_t number_of_ssa_values_;
 
   DISALLOW_COPY_AND_ASSIGN(SsaLivenessAnalysis);
 };
 
+class HLinearOrderIterator : public ValueObject {
+ public:
+  explicit HLinearOrderIterator(const SsaLivenessAnalysis& liveness)
+      : post_order_(liveness.GetLinearPostOrder()), index_(liveness.GetLinearPostOrder().Size()) {}
+
+  bool Done() const { return index_ == 0; }
+  HBasicBlock* Current() const { return post_order_.Get(index_ -1); }
+  void Advance() { --index_; DCHECK_GE(index_, 0U); }
+
+ private:
+  const GrowableArray<HBasicBlock*>& post_order_;
+  size_t index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HLinearOrderIterator);
+};
+
+class HLinearPostOrderIterator : public ValueObject {
+ public:
+  explicit HLinearPostOrderIterator(const SsaLivenessAnalysis& liveness)
+      : post_order_(liveness.GetLinearPostOrder()), index_(0) {}
+
+  bool Done() const { return index_ == post_order_.Size(); }
+  HBasicBlock* Current() const { return post_order_.Get(index_); }
+  void Advance() { ++index_; }
+
+ private:
+  const GrowableArray<HBasicBlock*>& post_order_;
+  size_t index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HLinearPostOrderIterator);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index 3b354f1..088a5c4 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -87,6 +87,13 @@
   graph->TransformToSSA();
   ReNumberInstructions(graph);
 
+  // Test that phis had their type set.
+  for (size_t i = 0, e = graph->GetBlocks().Size(); i < e; ++i) {
+    for (HInstructionIterator it(graph->GetBlocks().Get(i)->GetPhis()); !it.Done(); it.Advance()) {
+      ASSERT_NE(it.Current()->GetType(), Primitive::kPrimVoid);
+    }
+  }
+
   SsaPrettyPrinter printer(graph);
   printer.VisitInsertionOrder();
 
diff --git a/compiler/optimizing/ssa_type_propagation.cc b/compiler/optimizing/ssa_type_propagation.cc
new file mode 100644
index 0000000..53fa74e
--- /dev/null
+++ b/compiler/optimizing/ssa_type_propagation.cc
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ssa_type_propagation.h"
+
+#include "nodes.h"
+
+namespace art {
+
+static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_type) {
+  // We trust the verifier has already done the necessary checking.
+  switch (existing) {
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+    case Primitive::kPrimNot:
+      return existing;
+    default:
+      return new_type;
+  }
+}
+
+// Re-compute and update the type of the instruction. Returns
+// whether or not the type was changed.
+static bool UpdateType(HPhi* phi) {
+  Primitive::Type existing = phi->GetType();
+
+  Primitive::Type new_type = Primitive::kPrimVoid;
+  for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+    Primitive::Type input_type = phi->InputAt(i)->GetType();
+    new_type = MergeTypes(new_type, input_type);
+  }
+  phi->SetType(new_type);
+  return existing != new_type;
+}
+
+void SsaTypePropagation::Run() {
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    VisitBasicBlock(it.Current());
+  }
+  ProcessWorklist();
+}
+
+void SsaTypePropagation::VisitBasicBlock(HBasicBlock* block) {
+  if (block->IsLoopHeader()) {
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+      HPhi* phi = it.Current()->AsPhi();
+      // Set the initial type for the phi. Use the non back edge input for reaching
+      // a fixed point faster.
+      phi->SetType(phi->InputAt(0)->GetType());
+      AddToWorklist(phi);
+    }
+  } else {
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+      HPhi* phi = it.Current()->AsPhi();
+      if (UpdateType(phi)) {
+        AddDependentInstructionsToWorklist(phi);
+      }
+    }
+  }
+}
+
+void SsaTypePropagation::ProcessWorklist() {
+  while (!worklist_.IsEmpty()) {
+    HPhi* instruction = worklist_.Pop();
+    if (UpdateType(instruction)) {
+      AddDependentInstructionsToWorklist(instruction);
+    }
+  }
+}
+
+void SsaTypePropagation::AddToWorklist(HPhi* instruction) {
+  worklist_.Add(instruction);
+}
+
+void SsaTypePropagation::AddDependentInstructionsToWorklist(HPhi* instruction) {
+  for (HUseIterator<HInstruction> it(instruction->GetUses()); !it.Done(); it.Advance()) {
+    HPhi* phi = it.Current()->GetUser()->AsPhi();
+    if (phi != nullptr) {
+      AddToWorklist(phi);
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/ssa_type_propagation.h b/compiler/optimizing/ssa_type_propagation.h
new file mode 100644
index 0000000..5f471a9
--- /dev/null
+++ b/compiler/optimizing/ssa_type_propagation.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SSA_TYPE_PROPAGATION_H_
+#define ART_COMPILER_OPTIMIZING_SSA_TYPE_PROPAGATION_H_
+
+#include "nodes.h"
+
+namespace art {
+
+// Compute and propagate types of phis in the graph.
+class SsaTypePropagation : public ValueObject {
+ public:
+  explicit SsaTypePropagation(HGraph* graph)
+      : graph_(graph), worklist_(graph->GetArena(), kDefaultWorklistSize) {}
+
+  void Run();
+
+ private:
+  void VisitBasicBlock(HBasicBlock* block);
+  void ProcessWorklist();
+  void AddToWorklist(HPhi* phi);
+  void AddDependentInstructionsToWorklist(HPhi* phi);
+
+  HGraph* const graph_;
+  GrowableArray<HPhi*> worklist_;
+
+  static constexpr size_t kDefaultWorklistSize = 8;
+
+  DISALLOW_COPY_AND_ASSIGN(SsaTypePropagation);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_SSA_TYPE_PROPAGATION_H_
diff --git a/compiler/utils/arena_allocator.cc b/compiler/utils/arena_allocator.cc
index ca4635d..6a39641 100644
--- a/compiler/utils/arena_allocator.cc
+++ b/compiler/utils/arena_allocator.cc
@@ -215,7 +215,7 @@
 }
 
 void* ArenaAllocator::AllocValgrind(size_t bytes, ArenaAllocKind kind) {
-  size_t rounded_bytes = (bytes + 3 + kValgrindRedZoneBytes) & ~3;
+  size_t rounded_bytes = RoundUp(bytes + kValgrindRedZoneBytes, 8);
   if (UNLIKELY(ptr_ + rounded_bytes > end_)) {
     // Obtain a new block.
     ObtainNewArenaForAllocation(rounded_bytes);
diff --git a/compiler/utils/arena_allocator.h b/compiler/utils/arena_allocator.h
index dbe482d..ac3938f 100644
--- a/compiler/utils/arena_allocator.h
+++ b/compiler/utils/arena_allocator.h
@@ -156,7 +156,7 @@
     if (UNLIKELY(running_on_valgrind_)) {
       return AllocValgrind(bytes, kind);
     }
-    bytes = RoundUp(bytes, 4);
+    bytes = RoundUp(bytes, 8);
     if (UNLIKELY(ptr_ + bytes > end_)) {
       // Obtain a new block.
       ObtainNewArenaForAllocation(bytes);
diff --git a/compiler/utils/scoped_arena_allocator.cc b/compiler/utils/scoped_arena_allocator.cc
index b8b0e6e..aeb2f76 100644
--- a/compiler/utils/scoped_arena_allocator.cc
+++ b/compiler/utils/scoped_arena_allocator.cc
@@ -92,7 +92,7 @@
 }
 
 void* ArenaStack::AllocValgrind(size_t bytes, ArenaAllocKind kind) {
-  size_t rounded_bytes = RoundUp(bytes + kValgrindRedZoneBytes, 4);
+  size_t rounded_bytes = RoundUp(bytes + kValgrindRedZoneBytes, 8);
   uint8_t* ptr = top_ptr_;
   if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
     ptr = AllocateFromNextArena(rounded_bytes);
diff --git a/compiler/utils/scoped_arena_allocator.h b/compiler/utils/scoped_arena_allocator.h
index c090062..37799cb 100644
--- a/compiler/utils/scoped_arena_allocator.h
+++ b/compiler/utils/scoped_arena_allocator.h
@@ -67,7 +67,7 @@
     if (UNLIKELY(running_on_valgrind_)) {
       return AllocValgrind(bytes, kind);
     }
-    size_t rounded_bytes = RoundUp(bytes, 4);
+    size_t rounded_bytes = RoundUp(bytes, 8);
     uint8_t* ptr = top_ptr_;
     if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
       ptr = AllocateFromNextArena(rounded_bytes);
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 7a832c1..8d532c7 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -26,6 +26,7 @@
 	base/hex_dump.cc \
 	base/logging.cc \
 	base/mutex.cc \
+	base/scoped_flock.cc \
 	base/stringpiece.cc \
 	base/stringprintf.cc \
 	base/timing_logger.cc \
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 69f5957..9a877f6 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -196,6 +196,11 @@
     .cfi_adjust_cfa_offset -176
 .endm
 
+.macro POP_REF_ONLY_CALLEE_SAVE_FRAME
+    add sp, sp, #176
+    .cfi_adjust_cfa_offset -176
+.endm
+
 .macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
     ret
@@ -479,7 +484,7 @@
     // Helper signature is always
     // (method_idx, *this_object, *caller_method, *self, sp)
 
-    ldr    x2, [sp, #FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE]  // pass caller Method*
+    ldr    w2, [sp, #FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE]  // pass caller Method*
     mov    x3, xSELF                      // pass Thread::Current
     mov    x4, sp
     bl     \cxx_name                      // (method_idx, this, caller, Thread*, SP)
@@ -600,12 +605,12 @@
     str x0, [x4]
 
 .Lexit_art_quick_invoke_stub\@:
-    ldp x2, x19, [x29, #32]   // Restore stack pointer and x19.
+    ldp x2, x19, [xFP, #32]   // Restore stack pointer and x19.
     .cfi_restore x19
     mov sp, x2
     .cfi_restore sp
 
-    ldp x29, x30, [x29]    // Restore old frame pointer and link register.
+    ldp xFP, xLR, [xFP]    // Restore old frame pointer and link register.
     .cfi_restore x29
     .cfi_restore x30
 
@@ -1577,9 +1582,74 @@
     RETURN_OR_DELIVER_PENDING_EXCEPTION
 END art_quick_to_interpreter_bridge
 
-UNIMPLEMENTED art_quick_instrumentation_entry
-UNIMPLEMENTED art_quick_instrumentation_exit
-UNIMPLEMENTED art_quick_deoptimize
+
+//
+// Instrumentation-related stubs
+//
+    .extern artInstrumentationMethodEntryFromCode
+ENTRY art_quick_instrumentation_entry
+    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+
+    mov   x19, x0             // Preserve method reference in a callee-save.
+
+    mov   x2, xSELF
+    mov   x3, sp
+    mov   x4, xLR
+    bl    artInstrumentationMethodEntryFromCode  // (Method*, Object*, Thread*, SP, LR)
+
+    mov   x9, x0              // x0 = result of call.
+    mov   x0, x19             // Reload method reference.
+
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME  // Note: will restore xSELF
+    adr   xLR, art_quick_instrumentation_exit
+    br    x9                 // Tail-call method with lr set to art_quick_instrumentation_exit.
+END art_quick_instrumentation_entry
+
+    .extern artInstrumentationMethodExitFromCode
+ENTRY art_quick_instrumentation_exit
+    mov   xLR, #0             // Clobber LR for later checks.
+
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+
+    // We need to save x0 and d0. We could use a callee-save from SETUP_REF_ONLY, but then
+    // we would need to fully restore it. As there are a lot of callee-save registers, it seems
+    // easier to have an extra small stack area.
+
+    str x19, [sp, #-16]!      // Save integer result.
+    .cfi_adjust_cfa_offset 16
+    str d0,  [sp, #8]         // Save floating-point result.
+
+    mov   x0, xSELF           // Pass Thread.
+    add   x1, sp, #16         // Pass SP.
+    mov   x2, x0              // Pass integer result.
+    fmov  x3, d0              // Pass floating-point result.
+    bl   artInstrumentationMethodExitFromCode    // (Thread*, SP, gpr_res, fpr_res)
+
+    mov   x9, x0              // Return address from instrumentation call.
+    mov   xLR, x1             // r1 is holding link register if we're to bounce to deoptimize
+
+    ldr   d0, [sp, #8]        // Restore floating-point result.
+    ldr   x0, [sp], 16        // Restore integer result, and drop stack area.
+    .cfi_adjust_cfa_offset 16
+
+    POP_REF_ONLY_CALLEE_SAVE_FRAME
+
+    br    x9                  // Tail-call out.
+END art_quick_instrumentation_exit
+
+    /*
+     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
+     * will long jump to the upcall with a special exception of -1.
+     */
+    .extern artDeoptimize
+ENTRY art_quick_deoptimize
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    mov    x0, xSELF          // Pass thread.
+    mov    x1, sp             // Pass SP.
+    bl     artDeoptimize      // artDeoptimize(Thread*, SP)
+END art_quick_deoptimize
+
+
 UNIMPLEMENTED art_quick_indexof
 
    /*
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 07268ea..989ecf9 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1152,8 +1152,92 @@
 END_FUNCTION art_quick_resolution_trampoline
 
 DEFINE_FUNCTION art_quick_generic_jni_trampoline
-    int3
-    int3
+    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    // This also stores the native ArtMethod reference at the bottom of the stack.
+
+    movl %esp, %ebp                 // save SP at callee-save frame
+    movl %esp, %edi
+    CFI_DEF_CFA_REGISTER(edi)
+    subl LITERAL(5120), %esp
+    // prepare for artQuickGenericJniTrampoline call
+    // (Thread*,  SP)
+    //  (esp)    4(esp)   <= C calling convention
+    //  fs:...    ebp     <= where they are
+    // Also: PLT, so need GOT in ebx.
+
+    subl LITERAL(8), %esp         // Padding for 16B alignment.
+    pushl %ebp                    // Pass SP (to ArtMethod).
+    pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
+    SETUP_GOT_NOSAVE              // Clobbers ebx.
+    call PLT_SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
+    // Drop call stack.
+    addl LITERAL(16), %esp
+
+    // At the bottom of the alloca we now have the name pointer to the method=bottom of callee-save
+    // get the adjusted frame pointer
+    popl %ebp
+
+    // Check for error, negative value.
+    test %eax, %eax
+    js .Lentry_error
+
+    // release part of the alloca, get the code pointer
+    addl %eax, %esp
+    popl %eax
+
+    // On x86 there are no registers passed, so nothing to pop here.
+
+    // Native call.
+    call *%eax
+
+    // Pop native stack, but keep the space that was reserved cookie.
+    movl %ebp, %esp
+    subl LITERAL(16), %esp        // Alignment.
+
+    // result sign extension is handled in C code
+    // prepare for artQuickGenericJniEndTrampoline call
+    // (Thread*,  SP,  result, result_f)
+    //  (esp)   4(esp)  8(esp)  16(esp)    <= C calling convention
+    //  fs:...    ebp  eax:edx   xmm0      <= where they are
+
+    subl LITERAL(8), %esp         // Pass float result.
+    movsd %xmm0, (%esp)
+    pushl %edx                    // Pass int result.
+    pushl %eax
+    pushl %ebp                    // Pass SP (to ArtMethod).
+    pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
+    call PLT_SYMBOL(artQuickGenericJniEndTrampoline)
+
+    // Tear down the alloca.
+    movl %edi, %esp
+    CFI_DEF_CFA_REGISTER(esp)
+
+    // Pending exceptions possible.
+    mov %fs:THREAD_EXCEPTION_OFFSET, %ebx
+    testl %ebx, %ebx
+    jnz .Lexception_in_native
+
+    // Tear down the callee-save frame.
+    addl MACRO_LITERAL(4), %esp     // Remove padding
+    CFI_ADJUST_CFA_OFFSET(-4)
+    POP ecx
+    addl MACRO_LITERAL(4), %esp     // Avoid edx, as it may be part of the result.
+    CFI_ADJUST_CFA_OFFSET(-4)
+    POP ebx
+    POP ebp  // Restore callee saves
+    POP esi
+    POP edi
+    // store into fpr, for when it's a fpr return...
+    movd %eax, %xmm0
+    movd %edx, %xmm1
+    punpckldq %xmm1, %xmm0
+    ret
+.Lentry_error:
+    movl %edi, %esp
+    CFI_DEF_CFA_REGISTER(esp)
+.Lexception_in_native:
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_generic_jni_trampoline
 
 DEFINE_FUNCTION art_quick_to_interpreter_bridge
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 1a60557..c9220c8 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1002,15 +1002,12 @@
 END_FUNCTION art_quick_proxy_invoke_handler
 
     /*
-     * Called to resolve an imt conflict. Clobbers %rax (which will be clobbered later anyways).
-     *
-     * xmm0 is a hidden argument that holds the target method's dex method index.
-     * TODO: With proper hard-float support, this needs to be kept in sync with the quick compiler.
+     * Called to resolve an imt conflict.
+     * rax is a hidden argument that holds the target method's dex method index.
      */
 DEFINE_FUNCTION art_quick_imt_conflict_trampoline
     movl 8(%rsp), %edi            // load caller Method*
     movl METHOD_DEX_CACHE_METHODS_OFFSET(%rdi), %edi  // load dex_cache_resolved_methods
-    movd %xmm0, %rax               // get target method index stored in xmm0
     movl OBJECT_ARRAY_DATA_OFFSET(%rdi, %rax, 4), %edi  // load the target method
     jmp art_quick_invoke_interface_trampoline_local
 END_FUNCTION art_quick_imt_conflict_trampoline
@@ -1296,14 +1293,77 @@
     /*
      * Routine that intercepts method calls and returns.
      */
-UNIMPLEMENTED art_quick_instrumentation_entry
-UNIMPLEMENTED art_quick_instrumentation_exit
+DEFINE_FUNCTION art_quick_instrumentation_entry
+    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+
+    movq %rdi, %r12               // Preserve method pointer in a callee-save.
+
+    movq %gs:THREAD_SELF_OFFSET, %rdx   // Pass thread.
+    movq %rsp, %rcx                     // Pass SP.
+    movq FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp), %r8   // Pass return PC.
+
+    call PLT_SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP, LR)
+
+                                  // %rax = result of call.
+    movq %r12, %rdi               // Reload method pointer.
+
+    leaq art_quick_instrumentation_exit_local(%rip), %r12   // Set up return through instrumentation
+    movq %r12, FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp) // exit.
+
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+
+    jmp *%rax                     // Tail call to intended method.
+END_FUNCTION art_quick_instrumentation_entry
+
+DEFINE_FUNCTION art_quick_instrumentation_exit
+    pushq LITERAL(0)          // Push a fake return PC as there will be none on the stack.
+
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+
+    // We need to save rax and xmm0. We could use a callee-save from SETUP_REF_ONLY, but then
+    // we would need to fully restore it. As there are a good number of callee-save registers, it
+    // seems easier to have an extra small stack area. But this should be revisited.
+
+    movq  %rsp, %rsi                          // Pass SP.
+
+    PUSH rax                  // Save integer result.
+    subq LITERAL(8), %rsp     // Save floating-point result.
+    CFI_ADJUST_CFA_OFFSET(8)
+    movd %xmm0, (%rsp)
+
+    movq  %gs:THREAD_SELF_OFFSET, %rdi        // Pass Thread.
+    movq  %rax, %rdx                          // Pass integer result.
+    movq  %xmm0, %rcx                         // Pass floating-point result.
+
+    call PLT_SYMBOL(artInstrumentationMethodExitFromCode)   // (Thread*, SP, gpr_res, fpr_res)
+
+    movq  %rax, %rdi          // Store return PC
+    movq  %rdx, %rsi          // Store second return PC in hidden arg.
+
+    movd (%rsp), %xmm0        // Restore floating-point result.
+    addq LITERAL(8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-8)
+    POP rax                   // Restore integer result.
+
+    addq LITERAL(FRAME_SIZE_REFS_ONLY_CALLEE_SAVE), %rsp   // Drop save frame and fake return pc.
+
+    jmp   *%rdi               // Return.
+END_FUNCTION art_quick_instrumentation_exit
 
     /*
      * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
      * will long jump to the upcall with a special exception of -1.
      */
-UNIMPLEMENTED art_quick_deoptimize
+DEFINE_FUNCTION art_quick_deoptimize
+    pushq %rsi                     // Fake that we were called. Use hidden arg.
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+                                   // Stack should be aligned now.
+    movq %rsp, %rsi                           // Pass SP.
+    movq %gs:THREAD_SELF_OFFSET, %rdi         // Pass Thread.
+    call PLT_SYMBOL(artDeoptimize) // artDeoptimize(Thread*, SP)
+    int3                           // Unreachable.
+END_FUNCTION art_quick_deoptimize
+
 
     /*
      * String's compareTo.
diff --git a/runtime/base/scoped_flock.cc b/runtime/base/scoped_flock.cc
new file mode 100644
index 0000000..c0bce84
--- /dev/null
+++ b/runtime/base/scoped_flock.cc
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scoped_flock.h"
+
+#include <sys/file.h>
+#include <sys/stat.h>
+
+#include "base/logging.h"
+#include "base/stringprintf.h"
+#include "base/unix_file/fd_file.h"
+
+namespace art {
+
+bool ScopedFlock::Init(const char* filename, std::string* error_msg) {
+  while (true) {
+    file_.reset(OS::OpenFileWithFlags(filename, O_CREAT | O_RDWR));
+    if (file_.get() == NULL) {
+      *error_msg = StringPrintf("Failed to open file '%s': %s", filename, strerror(errno));
+      return false;
+    }
+    int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_EX));
+    if (flock_result != 0) {
+      *error_msg = StringPrintf("Failed to lock file '%s': %s", filename, strerror(errno));
+      return false;
+    }
+    struct stat fstat_stat;
+    int fstat_result = TEMP_FAILURE_RETRY(fstat(file_->Fd(), &fstat_stat));
+    if (fstat_result != 0) {
+      *error_msg = StringPrintf("Failed to fstat file '%s': %s", filename, strerror(errno));
+      return false;
+    }
+    struct stat stat_stat;
+    int stat_result = TEMP_FAILURE_RETRY(stat(filename, &stat_stat));
+    if (stat_result != 0) {
+      PLOG(WARNING) << "Failed to stat, will retry: " << filename;
+      // ENOENT can happen if someone racing with us unlinks the file we created so just retry.
+      continue;
+    }
+    if (fstat_stat.st_dev != stat_stat.st_dev || fstat_stat.st_ino != stat_stat.st_ino) {
+      LOG(WARNING) << "File changed while locking, will retry: " << filename;
+      continue;
+    }
+    return true;
+  }
+}
+
+File* ScopedFlock::GetFile() {
+  CHECK(file_.get() != NULL);
+  return file_.get();
+}
+
+ScopedFlock::ScopedFlock() { }
+
+ScopedFlock::~ScopedFlock() {
+  if (file_.get() != NULL) {
+    int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_UN));
+    CHECK_EQ(0, flock_result);
+  }
+}
+
+}  // namespace art
diff --git a/runtime/base/scoped_flock.h b/runtime/base/scoped_flock.h
new file mode 100644
index 0000000..26b4eb0
--- /dev/null
+++ b/runtime/base/scoped_flock.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_SCOPED_FLOCK_H_
+#define ART_RUNTIME_BASE_SCOPED_FLOCK_H_
+
+#include <memory>
+#include <string>
+
+#include "base/macros.h"
+#include "os.h"
+
+namespace art {
+
+class ScopedFlock {
+ public:
+  ScopedFlock();
+
+  // Attempts to acquire an exclusive file lock (see flock(2)) on the file
+  // at filename, and blocks until it can do so.
+  //
+  // Returns true if the lock could be acquired, or false if an error
+  // occurred. It is an error if the file does not exist, or if its inode
+  // changed (usually due to a new file being created at the same path)
+  // between attempts to lock it.
+  bool Init(const char* filename, std::string* error_msg);
+
+  // Returns the (locked) file associated with this instance.
+  File* GetFile();
+  ~ScopedFlock();
+ private:
+  std::unique_ptr<File> file_;
+  DISALLOW_COPY_AND_ASSIGN(ScopedFlock);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_SCOPED_FLOCK_H_
diff --git a/runtime/base/scoped_flock_test.cc b/runtime/base/scoped_flock_test.cc
new file mode 100644
index 0000000..8fa181a
--- /dev/null
+++ b/runtime/base/scoped_flock_test.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scoped_flock.h"
+#include "common_runtime_test.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+class ScopedFlockTest : public CommonRuntimeTest {};
+
+TEST_F(ScopedFlockTest, TestLocking) {
+  ScratchFile scratch_file;
+  std::string error_msg;
+
+  // NOTE: Locks applied using flock(2) and fcntl(2) are oblivious
+  // to each other, so attempting to query locks set by flock using
+  // using fcntl(,F_GETLK,) will not work. see kernel doc at
+  // Documentation/filesystems/locks.txt.
+  ScopedFlock file_lock;
+  ASSERT_TRUE(file_lock.Init(scratch_file.GetFilename().c_str(),
+                             &error_msg));
+
+  ASSERT_FALSE(file_lock.Init("/guaranteed/not/to/exist", &error_msg));
+}
+
+}  // namespace art
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index a43fda1..7385382 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -27,6 +27,7 @@
 
 #include "base/casts.h"
 #include "base/logging.h"
+#include "base/scoped_flock.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker-inl.h"
@@ -701,60 +702,6 @@
   return dex_file;
 }
 
-class ScopedFlock {
- public:
-  ScopedFlock() {}
-
-  bool Init(const char* filename, std::string* error_msg) {
-    while (true) {
-      file_.reset(OS::OpenFileWithFlags(filename, O_CREAT | O_RDWR));
-      if (file_.get() == NULL) {
-        *error_msg = StringPrintf("Failed to open file '%s': %s", filename, strerror(errno));
-        return false;
-      }
-      int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_EX));
-      if (flock_result != 0) {
-        *error_msg = StringPrintf("Failed to lock file '%s': %s", filename, strerror(errno));
-        return false;
-      }
-      struct stat fstat_stat;
-      int fstat_result = TEMP_FAILURE_RETRY(fstat(file_->Fd(), &fstat_stat));
-      if (fstat_result != 0) {
-        *error_msg = StringPrintf("Failed to fstat file '%s': %s", filename, strerror(errno));
-        return false;
-      }
-      struct stat stat_stat;
-      int stat_result = TEMP_FAILURE_RETRY(stat(filename, &stat_stat));
-      if (stat_result != 0) {
-        PLOG(WARNING) << "Failed to stat, will retry: " << filename;
-        // ENOENT can happen if someone racing with us unlinks the file we created so just retry.
-        continue;
-      }
-      if (fstat_stat.st_dev != stat_stat.st_dev || fstat_stat.st_ino != stat_stat.st_ino) {
-        LOG(WARNING) << "File changed while locking, will retry: " << filename;
-        continue;
-      }
-      return true;
-    }
-  }
-
-  File& GetFile() {
-    return *file_;
-  }
-
-  ~ScopedFlock() {
-    if (file_.get() != NULL) {
-      int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_UN));
-      CHECK_EQ(0, flock_result);
-    }
-  }
-
- private:
-  std::unique_ptr<File> file_;
-
-  DISALLOW_COPY_AND_ASSIGN(ScopedFlock);
-};
-
 const DexFile* ClassLinker::FindOrCreateOatFileForDexLocation(
     const char* dex_location,
     uint32_t dex_location_checksum,
@@ -785,7 +732,7 @@
 
   // Generate the output oat file for the dex file
   VLOG(class_linker) << "Generating oat file " << oat_location << " for " << dex_location;
-  if (!GenerateOatFile(dex_location, scoped_flock.GetFile().Fd(), oat_location, &error_msg)) {
+  if (!GenerateOatFile(dex_location, scoped_flock.GetFile()->Fd(), oat_location, &error_msg)) {
     CHECK(!error_msg.empty());
     error_msgs->push_back(error_msg);
     return nullptr;
diff --git a/runtime/entrypoints/quick/callee_save_frame.h b/runtime/entrypoints/quick/callee_save_frame.h
index b582abb..e573d6d 100644
--- a/runtime/entrypoints/quick/callee_save_frame.h
+++ b/runtime/entrypoints/quick/callee_save_frame.h
@@ -18,8 +18,17 @@
 #define ART_RUNTIME_ENTRYPOINTS_QUICK_CALLEE_SAVE_FRAME_H_
 
 #include "base/mutex.h"
+#include "instruction_set.h"
 #include "thread-inl.h"
 
+// Specific frame size code is in architecture-specific files. We include this to compile-time
+// specialize the code.
+#include "arch/arm/quick_method_frame_info_arm.h"
+#include "arch/arm64/quick_method_frame_info_arm64.h"
+#include "arch/mips/quick_method_frame_info_mips.h"
+#include "arch/x86/quick_method_frame_info_x86.h"
+#include "arch/x86_64/quick_method_frame_info_x86_64.h"
+
 namespace art {
 namespace mirror {
 class ArtMethod;
@@ -36,6 +45,34 @@
   self->VerifyStack();
 }
 
+static constexpr size_t GetCalleeSaveFrameSize(InstructionSet isa, Runtime::CalleeSaveType type) {
+  // constexpr must be a return statement.
+  return (isa == kArm || isa == kThumb2) ? arm::ArmCalleeSaveFrameSize(type) :
+         isa == kArm64 ? arm64::Arm64CalleeSaveFrameSize(type) :
+         isa == kMips ? mips::MipsCalleeSaveFrameSize(type) :
+         isa == kX86 ? x86::X86CalleeSaveFrameSize(type) :
+         isa == kX86_64 ? x86_64::X86_64CalleeSaveFrameSize(type) :
+         isa == kNone ? (LOG(FATAL) << "kNone has no frame size", 0) :
+         (LOG(FATAL) << "Unknown instruction set" << isa, 0);
+}
+
+// Note: this specialized statement is sanity-checked in the quick-trampoline gtest.
+static constexpr size_t GetConstExprPointerSize(InstructionSet isa) {
+  // constexpr must be a return statement.
+  return (isa == kArm || isa == kThumb2) ? kArmPointerSize :
+         isa == kArm64 ? kArm64PointerSize :
+         isa == kMips ? kMipsPointerSize :
+         isa == kX86 ? kX86PointerSize :
+         isa == kX86_64 ? kX86_64PointerSize :
+         isa == kNone ? (LOG(FATAL) << "kNone has no pointer size", 0) :
+         (LOG(FATAL) << "Unknown instruction set" << isa, 0);
+}
+
+// Note: this specialized statement is sanity-checked in the quick-trampoline gtest.
+static constexpr size_t GetCalleeSavePCOffset(InstructionSet isa, Runtime::CalleeSaveType type) {
+  return GetCalleeSaveFrameSize(isa, type) - GetConstExprPointerSize(isa);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_CALLEE_SAVE_FRAME_H_
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 3178cde..5cb0f36 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -248,10 +248,7 @@
 extern "C" int artSet64InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint64_t new_value,
                                         Thread* self, StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  Runtime* runtime = Runtime::Current();
-  mirror::ArtMethod* callee_save = runtime->GetCalleeSaveMethod(Runtime::kRefsOnly);
-  uint32_t frame_size =
-      runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsOnly).FrameSizeInBytes();
+  constexpr size_t frame_size = GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kRefsOnly);
   mirror::ArtMethod* referrer =
       reinterpret_cast<StackReference<mirror::ArtMethod>*>(
           reinterpret_cast<uint8_t*>(sp) + frame_size)->AsMirrorPtr();
@@ -262,7 +259,7 @@
     field->Set64<false>(obj, new_value);
     return 0;  // success
   }
-  sp->Assign(callee_save);
+  sp->Assign(Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsOnly));
   self->SetTopOfStack(sp, 0);
   field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
                                                           sizeof(int64_t));
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
index 6ef075d..d161d0b 100644
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
@@ -15,6 +15,7 @@
  */
 
 #include "callee_save_frame.h"
+#include "instruction_set.h"
 #include "instrumentation.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/object-inl.h"
@@ -40,9 +41,10 @@
   return result;
 }
 
-extern "C" uint64_t artInstrumentationMethodExitFromCode(Thread* self,
-                                                         StackReference<mirror::ArtMethod>* sp,
-                                                         uint64_t gpr_result, uint64_t fpr_result)
+extern "C" TwoWordReturn artInstrumentationMethodExitFromCode(Thread* self,
+                                                              StackReference<mirror::ArtMethod>* sp,
+                                                              uint64_t gpr_result,
+                                                              uint64_t fpr_result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // TODO: use FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly) not the hand inlined below.
   //       We use the hand inline version to ensure the return_pc is assigned before verifying the
@@ -50,19 +52,16 @@
   // Be aware the store below may well stomp on an incoming argument.
   Locks::mutator_lock_->AssertSharedHeld(self);
   Runtime* runtime = Runtime::Current();
-  mirror::ArtMethod* callee_save = runtime->GetCalleeSaveMethod(Runtime::kRefsOnly);
-  sp->Assign(callee_save);
-  uint32_t return_pc_offset = callee_save->GetReturnPcOffsetInBytes(
-      runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsOnly).FrameSizeInBytes());
+  sp->Assign(runtime->GetCalleeSaveMethod(Runtime::kRefsOnly));
+  uint32_t return_pc_offset = GetCalleeSavePCOffset(kRuntimeISA, Runtime::kRefsOnly);
   uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<byte*>(sp) +
                                                       return_pc_offset);
   CHECK_EQ(*return_pc, 0U);
   self->SetTopOfStack(sp, 0);
   self->VerifyStack();
   instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-  uint64_t return_or_deoptimize_pc = instrumentation->PopInstrumentationStackFrame(self, return_pc,
-                                                                                   gpr_result,
-                                                                                   fpr_result);
+  TwoWordReturn return_or_deoptimize_pc = instrumentation->PopInstrumentationStackFrame(
+      self, return_pc, gpr_result, fpr_result);
   self->VerifyStack();
   return return_or_deoptimize_pc;
 }
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 05033fc..514d1aa 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -20,6 +20,7 @@
 #include "dex_instruction-inl.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "gc/accounting/card_table-inl.h"
+#include "instruction_set.h"
 #include "interpreter/interpreter.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
@@ -36,6 +37,9 @@
 class QuickArgumentVisitor {
   // Number of bytes for each out register in the caller method's frame.
   static constexpr size_t kBytesStackArgLocation = 4;
+  // Frame size in bytes of a callee-save frame for RefsAndArgs.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize =
+      GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kRefsAndArgs);
 #if defined(__arm__)
   // The callee save frame is pointed to by SP.
   // | argN       |  |
@@ -58,7 +62,6 @@
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0;  // Offset of first FPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 8;  // Offset of first GPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 44;  // Offset of return address.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize = 48;  // Frame size.
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA);
   }
@@ -86,10 +89,9 @@
   static constexpr bool kQuickSoftFloatAbi = false;  // This is a hard float ABI.
   static constexpr size_t kNumQuickGprArgs = 7;  // 7 arguments passed in GPRs.
   static constexpr size_t kNumQuickFprArgs = 8;  // 8 arguments passed in FPRs.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset =16;  // Offset of first FPR arg.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 16;  // Offset of first FPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 144;  // Offset of first GPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 296;  // Offset of return address.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize = 304;  // Frame size.
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA);
   }
@@ -114,7 +116,6 @@
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0;  // Offset of first FPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 4;  // Offset of first GPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 60;  // Offset of return address.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize = 64;  // Frame size.
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA);
   }
@@ -139,7 +140,6 @@
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0;  // Offset of first FPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 4;  // Offset of first GPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 28;  // Offset of return address.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize = 32;  // Frame size.
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA);
   }
@@ -172,16 +172,11 @@
   // | Padding         |
   // | RDI/Method*     |  <- sp
   static constexpr bool kQuickSoftFloatAbi = false;  // This is a hard float ABI.
-#ifdef TARGET_REX_SUPPORT
   static constexpr size_t kNumQuickGprArgs = 5;  // 5 arguments passed in GPRs.
-#else
-  static constexpr size_t kNumQuickGprArgs = 3;  // 3 arguments passed in GPRs if r8..r15 not enabled.
-#endif
   static constexpr size_t kNumQuickFprArgs = 8;  // 8 arguments passed in FPRs.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 16;  // Offset of first FPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 80;  // Offset of first GPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 168;  // Offset of return address.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize = 176;  // Frame size.
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     switch (gpr_index) {
       case 0: return (4 * GetBytesPerGprSpillLocation(kRuntimeISA));
@@ -223,10 +218,7 @@
       stack_args_(reinterpret_cast<byte*>(sp) + kQuickCalleeSaveFrame_RefAndArgs_FrameSize
                   + StackArgumentStartFromShorty(is_static, shorty, shorty_len)),
       gpr_index_(0), fpr_index_(0), stack_index_(0), cur_type_(Primitive::kPrimVoid),
-      is_split_long_or_double_(false) {
-    DCHECK_EQ(kQuickCalleeSaveFrame_RefAndArgs_FrameSize,
-              Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes());
-  }
+      is_split_long_or_double_(false) { }
 
   virtual ~QuickArgumentVisitor() {}
 
@@ -1203,10 +1195,8 @@
     size_t scope_and_method = handle_scope_size + sizeof(StackReference<mirror::ArtMethod>);
 
     sp8 -= scope_and_method;
-    // Align by kStackAlignment
-    uintptr_t sp_to_align = reinterpret_cast<uintptr_t>(sp8);
-    sp_to_align = RoundDown(sp_to_align, kStackAlignment);
-    sp8 = reinterpret_cast<uint8_t*>(sp_to_align);
+    // Align by kStackAlignment.
+    sp8 = reinterpret_cast<uint8_t*>(RoundDown(reinterpret_cast<uintptr_t>(sp8), kStackAlignment));
 
     uint8_t* sp8_table = sp8 + sizeof(StackReference<mirror::ArtMethod>);
     *table = reinterpret_cast<HandleScope*>(sp8_table);
@@ -1226,9 +1216,8 @@
 
     // Next comes the native call stack.
     sp8 -= GetStackSize();
-    // Now align the call stack below. This aligns by 16, as AArch64 seems to require.
-    uintptr_t mask = ~0x0F;
-    sp8 = reinterpret_cast<uint8_t*>(reinterpret_cast<uintptr_t>(sp8) & mask);
+    // Align by kStackAlignment.
+    sp8 = reinterpret_cast<uint8_t*>(RoundDown(reinterpret_cast<uintptr_t>(sp8), kStackAlignment));
     *start_stack = reinterpret_cast<uintptr_t*>(sp8);
 
     // put fprs and gprs below
@@ -1626,70 +1615,19 @@
   }
 }
 
-// The following definitions create return types for two word-sized entities that will be passed
-// in registers so that memory operations for the interface trampolines can be avoided. The entities
-// are the resolved method and the pointer to the code to be invoked.
+// We use TwoWordReturn to optimize scalar returns. We use the hi value for code, and the lo value
+// for the method pointer.
 //
-// On x86, ARM32 and MIPS, this is given for a *scalar* 64bit value. The definition thus *must* be
-// uint64_t or long long int. We use the upper 32b for code, and the lower 32b for the method.
-//
-// On x86_64 and ARM64, structs are decomposed for allocation, so we can create a structs of two
-// size_t-sized values.
-//
-// We need two operations:
-//
-// 1) A flag value that signals failure. The assembly stubs expect the method part to be "0".
-//    GetFailureValue() will return a value that has method == 0.
-//
-// 2) A value that combines a code pointer and a method pointer.
-//    GetSuccessValue() constructs this.
-
-#if defined(__i386__) || defined(__arm__) || defined(__mips__)
-typedef uint64_t MethodAndCode;
-
-// Encodes method_ptr==nullptr and code_ptr==nullptr
-static constexpr MethodAndCode GetFailureValue() {
-  return 0;
-}
-
-// Use the lower 32b for the method pointer and the upper 32b for the code pointer.
-static MethodAndCode GetSuccessValue(const void* code, mirror::ArtMethod* method) {
-  uint32_t method_uint = reinterpret_cast<uint32_t>(method);
-  uint64_t code_uint = reinterpret_cast<uint32_t>(code);
-  return ((code_uint << 32) | method_uint);
-}
-
-#elif defined(__x86_64__) || defined(__aarch64__)
-struct MethodAndCode {
-  uintptr_t method;
-  uintptr_t code;
-};
-
-// Encodes method_ptr==nullptr. Leaves random value in code pointer.
-static MethodAndCode GetFailureValue() {
-  MethodAndCode ret;
-  ret.method = 0;
-  return ret;
-}
-
-// Write values into their respective members.
-static MethodAndCode GetSuccessValue(const void* code, mirror::ArtMethod* method) {
-  MethodAndCode ret;
-  ret.method = reinterpret_cast<uintptr_t>(method);
-  ret.code = reinterpret_cast<uintptr_t>(code);
-  return ret;
-}
-#else
-#error "Unsupported architecture"
-#endif
+// It is valid to use this, as at the usage points here (returns from C functions) we are assuming
+// to hold the mutator lock (see SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) annotations).
 
 template<InvokeType type, bool access_check>
-static MethodAndCode artInvokeCommon(uint32_t method_idx, mirror::Object* this_object,
+static TwoWordReturn artInvokeCommon(uint32_t method_idx, mirror::Object* this_object,
                                      mirror::ArtMethod* caller_method,
                                      Thread* self, StackReference<mirror::ArtMethod>* sp);
 
 template<InvokeType type, bool access_check>
-static MethodAndCode artInvokeCommon(uint32_t method_idx, mirror::Object* this_object,
+static TwoWordReturn artInvokeCommon(uint32_t method_idx, mirror::Object* this_object,
                                      mirror::ArtMethod* caller_method,
                                      Thread* self, StackReference<mirror::ArtMethod>* sp) {
   mirror::ArtMethod* method = FindMethodFast(method_idx, this_object, caller_method, access_check,
@@ -1712,7 +1650,7 @@
 
     if (UNLIKELY(method == NULL)) {
       CHECK(self->IsExceptionPending());
-      return GetFailureValue();  // Failure.
+      return GetTwoWordFailureValue();  // Failure.
     }
   }
   DCHECK(!self->IsExceptionPending());
@@ -1722,13 +1660,14 @@
   DCHECK(code != nullptr) << "Code was NULL in method: " << PrettyMethod(method) << " location: "
       << method->GetDexFile()->GetLocation();
 
-  return GetSuccessValue(code, method);
+  return GetTwoWordSuccessValue(reinterpret_cast<uintptr_t>(code),
+                                reinterpret_cast<uintptr_t>(method));
 }
 
 // Explicit artInvokeCommon template function declarations to please analysis tool.
 #define EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(type, access_check)                                \
   template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                          \
-  MethodAndCode artInvokeCommon<type, access_check>(uint32_t method_idx,                        \
+  TwoWordReturn artInvokeCommon<type, access_check>(uint32_t method_idx,                        \
                                                     mirror::Object* this_object,                \
                                                     mirror::ArtMethod* caller_method,           \
                                                     Thread* self,                               \
@@ -1748,7 +1687,7 @@
 
 
 // See comments in runtime_support_asm.S
-extern "C" MethodAndCode artInvokeInterfaceTrampolineWithAccessCheck(uint32_t method_idx,
+extern "C" TwoWordReturn artInvokeInterfaceTrampolineWithAccessCheck(uint32_t method_idx,
     mirror::Object* this_object,
     mirror::ArtMethod* caller_method,
     Thread* self,
@@ -1757,7 +1696,7 @@
 }
 
 
-extern "C" MethodAndCode artInvokeDirectTrampolineWithAccessCheck(uint32_t method_idx,
+extern "C" TwoWordReturn artInvokeDirectTrampolineWithAccessCheck(uint32_t method_idx,
     mirror::Object* this_object,
     mirror::ArtMethod* caller_method,
     Thread* self,
@@ -1765,7 +1704,7 @@
   return artInvokeCommon<kDirect, true>(method_idx, this_object, caller_method, self, sp);
 }
 
-extern "C" MethodAndCode artInvokeStaticTrampolineWithAccessCheck(uint32_t method_idx,
+extern "C" TwoWordReturn artInvokeStaticTrampolineWithAccessCheck(uint32_t method_idx,
     mirror::Object* this_object,
     mirror::ArtMethod* caller_method,
     Thread* self,
@@ -1773,7 +1712,7 @@
   return artInvokeCommon<kStatic, true>(method_idx, this_object, caller_method, self, sp);
 }
 
-extern "C" MethodAndCode artInvokeSuperTrampolineWithAccessCheck(uint32_t method_idx,
+extern "C" TwoWordReturn artInvokeSuperTrampolineWithAccessCheck(uint32_t method_idx,
     mirror::Object* this_object,
     mirror::ArtMethod* caller_method,
     Thread* self,
@@ -1781,7 +1720,7 @@
   return artInvokeCommon<kSuper, true>(method_idx, this_object, caller_method, self, sp);
 }
 
-extern "C" MethodAndCode artInvokeVirtualTrampolineWithAccessCheck(uint32_t method_idx,
+extern "C" TwoWordReturn artInvokeVirtualTrampolineWithAccessCheck(uint32_t method_idx,
     mirror::Object* this_object,
     mirror::ArtMethod* caller_method,
     Thread* self,
@@ -1790,7 +1729,7 @@
 }
 
 // Determine target of interface dispatch. This object is known non-null.
-extern "C" MethodAndCode artInvokeInterfaceTrampoline(mirror::ArtMethod* interface_method,
+extern "C" TwoWordReturn artInvokeInterfaceTrampoline(mirror::ArtMethod* interface_method,
                                                       mirror::Object* this_object,
                                                       mirror::ArtMethod* caller_method,
                                                       Thread* self,
@@ -1803,71 +1742,17 @@
       FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs);
       ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(interface_method, this_object,
                                                                  caller_method);
-      return GetFailureValue();  // Failure.
+      return GetTwoWordFailureValue();  // Failure.
     }
   } else {
     FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs);
     DCHECK(interface_method == Runtime::Current()->GetResolutionMethod());
-    // Determine method index from calling dex instruction.
-#if defined(__arm__)
-    // On entry the stack pointed by sp is:
-    // | argN       |  |
-    // | ...        |  |
-    // | arg4       |  |
-    // | arg3 spill |  |  Caller's frame
-    // | arg2 spill |  |
-    // | arg1 spill |  |
-    // | Method*    | ---
-    // | LR         |
-    // | ...        |    callee saves
-    // | R3         |    arg3
-    // | R2         |    arg2
-    // | R1         |    arg1
-    // | R0         |
-    // | Method*    |  <- sp
-    DCHECK_EQ(48U, Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes());
-    uintptr_t* regs = reinterpret_cast<uintptr_t*>(reinterpret_cast<byte*>(sp) + kPointerSize);
-    uintptr_t caller_pc = regs[10];
-#elif defined(__i386__)
-    // On entry the stack pointed by sp is:
-    // | argN        |  |
-    // | ...         |  |
-    // | arg4        |  |
-    // | arg3 spill  |  |  Caller's frame
-    // | arg2 spill  |  |
-    // | arg1 spill  |  |
-    // | Method*     | ---
-    // | Return      |
-    // | EBP,ESI,EDI |    callee saves
-    // | EBX         |    arg3
-    // | EDX         |    arg2
-    // | ECX         |    arg1
-    // | EAX/Method* |  <- sp
-    DCHECK_EQ(32U, Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes());
-    uintptr_t* regs = reinterpret_cast<uintptr_t*>(reinterpret_cast<byte*>(sp));
-    uintptr_t caller_pc = regs[7];
-#elif defined(__mips__)
-    // On entry the stack pointed by sp is:
-    // | argN       |  |
-    // | ...        |  |
-    // | arg4       |  |
-    // | arg3 spill |  |  Caller's frame
-    // | arg2 spill |  |
-    // | arg1 spill |  |
-    // | Method*    | ---
-    // | RA         |
-    // | ...        |    callee saves
-    // | A3         |    arg3
-    // | A2         |    arg2
-    // | A1         |    arg1
-    // | A0/Method* |  <- sp
-    DCHECK_EQ(64U, Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes());
-    uintptr_t* regs = reinterpret_cast<uintptr_t*>(reinterpret_cast<byte*>(sp));
-    uintptr_t caller_pc = regs[15];
-#else
-    UNIMPLEMENTED(FATAL);
-    uintptr_t caller_pc = 0;
-#endif
+
+    // Find the caller PC.
+    constexpr size_t pc_offset = GetCalleeSavePCOffset(kRuntimeISA, Runtime::kRefsAndArgs);
+    uintptr_t caller_pc = *reinterpret_cast<uintptr_t*>(reinterpret_cast<byte*>(sp) + pc_offset);
+
+    // Map the caller PC to a dex PC.
     uint32_t dex_pc = caller_method->ToDexPc(caller_pc);
     const DexFile::CodeItem* code = caller_method->GetCodeItem();
     CHECK_LT(dex_pc, code->insns_size_in_code_units_);
@@ -1900,7 +1785,7 @@
 
     if (UNLIKELY(method == nullptr)) {
       CHECK(self->IsExceptionPending());
-      return GetFailureValue();  // Failure.
+      return GetTwoWordFailureValue();  // Failure.
     }
   }
   const void* code = method->GetEntryPointFromQuickCompiledCode();
@@ -1909,7 +1794,8 @@
   DCHECK(code != nullptr) << "Code was NULL in method: " << PrettyMethod(method) << " location: "
       << method->GetDexFile()->GetLocation();
 
-  return GetSuccessValue(code, method);
+  return GetTwoWordSuccessValue(reinterpret_cast<uintptr_t>(code),
+                                reinterpret_cast<uintptr_t>(method));
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
new file mode 100644
index 0000000..66ee218
--- /dev/null
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdint.h>
+
+#include "callee_save_frame.h"
+#include "common_runtime_test.h"
+#include "mirror/art_method-inl.h"
+#include "quick/quick_method_frame_info.h"
+
+namespace art {
+
+class QuickTrampolineEntrypointsTest : public CommonRuntimeTest {
+ protected:
+  static mirror::ArtMethod* CreateCalleeSaveMethod(InstructionSet isa,
+                                                   Runtime::CalleeSaveType type)
+      NO_THREAD_SAFETY_ANALYSIS {
+    Runtime* r = Runtime::Current();
+
+    Thread* t = Thread::Current();
+    t->TransitionFromSuspendedToRunnable();  // So we can create callee-save methods.
+
+    r->SetInstructionSet(isa);
+    mirror::ArtMethod* save_method = r->CreateCalleeSaveMethod(type);
+    r->SetCalleeSaveMethod(save_method, type);
+
+    t->TransitionFromRunnableToSuspended(ThreadState::kNative);  // So we can shut down.
+
+    return save_method;
+  }
+
+  static void CheckFrameSize(InstructionSet isa, Runtime::CalleeSaveType type, uint32_t save_size)
+      NO_THREAD_SAFETY_ANALYSIS {
+    mirror::ArtMethod* save_method = CreateCalleeSaveMethod(isa, type);
+    QuickMethodFrameInfo frame_info = save_method->GetQuickFrameInfo();
+    EXPECT_EQ(frame_info.FrameSizeInBytes(), save_size) << "Expected and real size differs for "
+        << type << " core spills=" << std::hex << frame_info.CoreSpillMask() << " fp spills="
+        << frame_info.FpSpillMask() << std::dec << " ISA " << isa;
+  }
+
+  static void CheckPCOffset(InstructionSet isa, Runtime::CalleeSaveType type, size_t pc_offset)
+      NO_THREAD_SAFETY_ANALYSIS {
+    mirror::ArtMethod* save_method = CreateCalleeSaveMethod(isa, type);
+    QuickMethodFrameInfo frame_info = save_method->GetQuickFrameInfo();
+    EXPECT_EQ(save_method->GetReturnPcOffsetInBytes(), pc_offset) << "Expected and real pc offset"
+        " differs for " << type << " core spills=" << std::hex << frame_info.CoreSpillMask() <<
+        " fp spills=" << frame_info.FpSpillMask() << std::dec << " ISA " << isa;
+  }
+};
+
+// Note: these tests are all runtime tests. They let the Runtime create the corresponding ArtMethod
+// and check against it. Technically we know and expect certain values, but the Runtime code is
+// not constexpr, so we cannot make this compile-time checks (and I want the Runtime code tested).
+
+// This test ensures that kQuickCalleeSaveFrame_RefAndArgs_FrameSize is correct.
+TEST_F(QuickTrampolineEntrypointsTest, FrameSize) {
+  // We have to use a define here as the callee_save_frame.h functions are constexpr.
+#define CHECK_FRAME_SIZE(isa)                                                                     \
+  CheckFrameSize(isa, Runtime::kRefsAndArgs, GetCalleeSaveFrameSize(isa, Runtime::kRefsAndArgs)); \
+  CheckFrameSize(isa, Runtime::kRefsOnly, GetCalleeSaveFrameSize(isa, Runtime::kRefsOnly));       \
+  CheckFrameSize(isa, Runtime::kSaveAll, GetCalleeSaveFrameSize(isa, Runtime::kSaveAll))
+
+  CHECK_FRAME_SIZE(kArm);
+  CHECK_FRAME_SIZE(kArm64);
+  CHECK_FRAME_SIZE(kMips);
+  CHECK_FRAME_SIZE(kX86);
+  CHECK_FRAME_SIZE(kX86_64);
+}
+
+// This test ensures that GetConstExprPointerSize is correct with respect to
+// GetInstructionSetPointerSize.
+TEST_F(QuickTrampolineEntrypointsTest, PointerSize) {
+  EXPECT_EQ(GetInstructionSetPointerSize(kArm), GetConstExprPointerSize(kArm));
+  EXPECT_EQ(GetInstructionSetPointerSize(kArm64), GetConstExprPointerSize(kArm64));
+  EXPECT_EQ(GetInstructionSetPointerSize(kMips), GetConstExprPointerSize(kMips));
+  EXPECT_EQ(GetInstructionSetPointerSize(kX86), GetConstExprPointerSize(kX86));
+  EXPECT_EQ(GetInstructionSetPointerSize(kX86_64), GetConstExprPointerSize(kX86_64));
+}
+
+// This test ensures that the constexpr specialization of the return PC offset computation in
+// GetCalleeSavePCOffset is correct.
+TEST_F(QuickTrampolineEntrypointsTest, ReturnPC) {
+  // Ensure that the computation in callee_save_frame.h correct.
+  // Note: we can only check against the kRuntimeISA, because the ArtMethod computation uses
+  // kPointerSize, which is wrong when the target bitwidth is not the same as the host's.
+  CheckPCOffset(kRuntimeISA, Runtime::kRefsAndArgs,
+                GetCalleeSavePCOffset(kRuntimeISA, Runtime::kRefsAndArgs));
+  CheckPCOffset(kRuntimeISA, Runtime::kRefsOnly,
+                GetCalleeSavePCOffset(kRuntimeISA, Runtime::kRefsOnly));
+  CheckPCOffset(kRuntimeISA, Runtime::kSaveAll,
+                GetCalleeSavePCOffset(kRuntimeISA, Runtime::kSaveAll));
+}
+
+}  // namespace art
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index d7b673e..f5f7a86 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -2464,7 +2464,7 @@
 }
 
 size_t Heap::GetPercentFree() {
-  return static_cast<size_t>(100.0f * static_cast<float>(GetFreeMemory()) / GetMaxMemory());
+  return static_cast<size_t>(100.0f * static_cast<float>(GetFreeMemory()) / max_allowed_footprint_);
 }
 
 void Heap::SetIdealFootprint(size_t max_allowed_footprint) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index e568b36..9b49373 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -408,7 +408,7 @@
 
   // Implements java.lang.Runtime.freeMemory.
   size_t GetFreeMemory() const {
-    return GetMaxMemory() - num_bytes_allocated_.LoadSequentiallyConsistent();
+    return max_allowed_footprint_ - num_bytes_allocated_.LoadSequentiallyConsistent();
   }
 
   // get the space that corresponds to an object's address. Current implementation searches all
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index a58df8e..7988af7 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -61,15 +61,20 @@
     }
     // Try to see if the referent is already marked by using the is_marked_callback. We can return
     // it to the mutator as long as the GC is not preserving references. If the GC is
-    // preserving references, the mutator could take a white field and move it somewhere else
-    // in the heap causing corruption since this field would get swept.
     IsMarkedCallback* const is_marked_callback = process_references_args_.is_marked_callback_;
-    if (!preserving_references_ && is_marked_callback != nullptr) {
+    if (LIKELY(is_marked_callback != nullptr)) {
       mirror::Object* const obj = is_marked_callback(referent, process_references_args_.arg_);
       // If it's null it means not marked, but it could become marked if the referent is reachable
-      // by finalizer referents. So we can not return in this case and must block.
+      // by finalizer referents. So we can not return in this case and must block. Otherwise, we
+      // can return it to the mutator as long as the GC is not preserving references, in which
+      // case only black nodes can be safely returned. If the GC is preserving references, the
+      // mutator could take a white field from a grey or white node and move it somewhere else
+      // in the heap causing corruption since this field would get swept.
       if (obj != nullptr) {
-        return obj;
+        if (!preserving_references_ ||
+           (LIKELY(!reference->IsFinalizerReferenceInstance()) && !reference->IsEnqueued())) {
+          return obj;
+        }
       }
     }
     condition_.WaitHoldingLocks(self);
@@ -113,14 +118,14 @@
   timings->StartSplit(concurrent ? "ProcessReferences" : "(Paused)ProcessReferences");
   // Unless required to clear soft references with white references, preserve some white referents.
   if (!clear_soft_references) {
-    TimingLogger::ScopedSplit split(concurrent ? "PreserveSomeSoftReferences" :
-        "(Paused)PreserveSomeSoftReferences", timings);
+    TimingLogger::ScopedSplit split(concurrent ? "ForwardSoftReferences" :
+        "(Paused)ForwardSoftReferences", timings);
     if (concurrent) {
       StartPreservingReferences(self);
     }
-    // References with a marked referent are removed from the list.
-    soft_reference_queue_.PreserveSomeSoftReferences(&PreserveSoftReferenceCallback,
-                                                     &process_references_args_);
+
+    soft_reference_queue_.ForwardSoftReferences(&PreserveSoftReferenceCallback,
+                                                &process_references_args_);
     process_mark_stack_callback(arg);
     if (concurrent) {
       StopPreservingReferences(self);
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index caacef5..3910c29 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -160,22 +160,23 @@
   }
 }
 
-void ReferenceQueue::PreserveSomeSoftReferences(IsMarkedCallback* preserve_callback, void* arg) {
-  ReferenceQueue cleared;
-  while (!IsEmpty()) {
-    mirror::Reference* ref = DequeuePendingReference();
+void ReferenceQueue::ForwardSoftReferences(IsMarkedCallback* preserve_callback,
+                                                void* arg) {
+  if (UNLIKELY(IsEmpty())) {
+    return;
+  }
+  mirror::Reference* const head = list_;
+  mirror::Reference* ref = head;
+  do {
     mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
     if (referent != nullptr) {
       mirror::Object* forward_address = preserve_callback(referent, arg);
-      if (forward_address == nullptr) {
-        // Either the reference isn't marked or we don't wish to preserve it.
-        cleared.EnqueuePendingReference(ref);
-      } else if (forward_address != referent) {
+      if (forward_address != nullptr && forward_address != referent) {
         ref->SetReferent<false>(forward_address);
       }
     }
-  }
-  list_ = cleared.GetList();
+    ref = ref->GetPendingNext();
+  } while (LIKELY(ref != head));
 }
 
 }  // namespace gc
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index 4f223e2..1d8cc1a 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -65,7 +65,7 @@
   // Walks the reference list marking any references subject to the reference clearing policy.
   // References with a black referent are removed from the list.  References with white referents
   // biased toward saving are blackened and also removed from the list.
-  void PreserveSomeSoftReferences(IsMarkedCallback* preserve_callback, void* arg)
+  void ForwardSoftReferences(IsMarkedCallback* preserve_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Unlink the reference list clearing references objects with white referents.  Cleared references
   // registered to a reference queue are scheduled for appending by the heap worker thread.
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 3d35c00..61633cd 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -18,6 +18,7 @@
 
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
+#include "base/scoped_flock.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "mirror/art_method.h"
 #include "mirror/class-inl.h"
@@ -148,7 +149,17 @@
   std::string image_filename;
   std::string error_msg;
   bool is_system = false;
-  if (FindImageFilename(image_location, image_isa, &image_filename, &is_system)) {
+  const bool found_image = FindImageFilename(image_location, image_isa, &image_filename,
+                                             &is_system);
+
+  // Note that we must not use the file descriptor associated with
+  // ScopedFlock::GetFile to Init the image file. We want the file
+  // descriptor (and the associated exclusive lock) to be released when
+  // we leave Create.
+  ScopedFlock image_lock;
+  image_lock.Init(image_filename.c_str(), &error_msg);
+
+  if (found_image) {
     ImageSpace* space = ImageSpace::Init(image_filename.c_str(), image_location, !is_system,
                                          &error_msg);
     if (space != nullptr) {
diff --git a/runtime/globals.h b/runtime/globals.h
index 07fadb9..58c2118 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -36,13 +36,6 @@
 static constexpr size_t kWordSize = sizeof(word);
 static constexpr size_t kPointerSize = sizeof(void*);
 
-// Architecture-specific pointer sizes
-static constexpr size_t kArmPointerSize = 4;
-static constexpr size_t kArm64PointerSize = 8;
-static constexpr size_t kMipsPointerSize = 4;
-static constexpr size_t kX86PointerSize = 4;
-static constexpr size_t kX86_64PointerSize = 8;
-
 static constexpr size_t kBitsPerByte = 8;
 static constexpr size_t kBitsPerByteLog2 = 3;
 static constexpr int kBitsPerWord = kWordSize * kBitsPerByte;
@@ -51,20 +44,6 @@
 // Required stack alignment
 static constexpr size_t kStackAlignment = 16;
 
-// ARM instruction alignment. ARM processors require code to be 4-byte aligned,
-// but ARM ELF requires 8..
-static constexpr size_t kArmAlignment = 8;
-
-// ARM64 instruction alignment. This is the recommended alignment for maximum performance.
-static constexpr size_t kArm64Alignment = 16;
-
-// MIPS instruction alignment.  MIPS processors require code to be 4-byte aligned.
-// TODO: Can this be 4?
-static constexpr size_t kMipsAlignment = 8;
-
-// X86 instruction alignment. This is the recommended alignment for maximum performance.
-static constexpr size_t kX86Alignment = 16;
-
 // System page size. We check this against sysconf(_SC_PAGE_SIZE) at runtime, but use a simple
 // compile-time constant so the compiler can generate better code.
 static constexpr int kPageSize = 4096;
diff --git a/runtime/instruction_set.cc b/runtime/instruction_set.cc
index c1931a9..5b60396 100644
--- a/runtime/instruction_set.cc
+++ b/runtime/instruction_set.cc
@@ -16,9 +16,6 @@
 
 #include "instruction_set.h"
 
-#include "globals.h"
-#include "base/logging.h"  // Logging is required for FATAL in the helper functions.
-
 namespace art {
 
 const char* GetInstructionSetString(const InstructionSet isa) {
@@ -63,75 +60,6 @@
   return kNone;
 }
 
-size_t GetInstructionSetPointerSize(InstructionSet isa) {
-  switch (isa) {
-    case kArm:
-      // Fall-through.
-    case kThumb2:
-      return kArmPointerSize;
-    case kArm64:
-      return kArm64PointerSize;
-    case kX86:
-      return kX86PointerSize;
-    case kX86_64:
-      return kX86_64PointerSize;
-    case kMips:
-      return kMipsPointerSize;
-    case kNone:
-      LOG(FATAL) << "ISA kNone does not have pointer size.";
-      return 0;
-    default:
-      LOG(FATAL) << "Unknown ISA " << isa;
-      return 0;
-  }
-}
-
-size_t GetBytesPerGprSpillLocation(InstructionSet isa) {
-  switch (isa) {
-    case kArm:
-      // Fall-through.
-    case kThumb2:
-      return 4;
-    case kArm64:
-      return 8;
-    case kX86:
-      return 4;
-    case kX86_64:
-      return 8;
-    case kMips:
-      return 4;
-    case kNone:
-      LOG(FATAL) << "ISA kNone does not have spills.";
-      return 0;
-    default:
-      LOG(FATAL) << "Unknown ISA " << isa;
-      return 0;
-  }
-}
-
-size_t GetBytesPerFprSpillLocation(InstructionSet isa) {
-  switch (isa) {
-    case kArm:
-      // Fall-through.
-    case kThumb2:
-      return 4;
-    case kArm64:
-      return 8;
-    case kX86:
-      return 8;
-    case kX86_64:
-      return 8;
-    case kMips:
-      return 4;
-    case kNone:
-      LOG(FATAL) << "ISA kNone does not have spills.";
-      return 0;
-    default:
-      LOG(FATAL) << "Unknown ISA " << isa;
-      return 0;
-  }
-}
-
 size_t GetInstructionSetAlignment(InstructionSet isa) {
   switch (isa) {
     case kArm:
@@ -155,27 +83,6 @@
   }
 }
 
-bool Is64BitInstructionSet(InstructionSet isa) {
-  switch (isa) {
-    case kArm:
-    case kThumb2:
-    case kX86:
-    case kMips:
-      return false;
-
-    case kArm64:
-    case kX86_64:
-      return true;
-
-    case kNone:
-      LOG(FATAL) << "ISA kNone does not have bit width.";
-      return 0;
-    default:
-      LOG(FATAL) << "Unknown ISA " << isa;
-      return 0;
-  }
-}
-
 std::string InstructionSetFeatures::GetFeatureString() const {
   std::string result;
   if ((mask_ & kHwDiv) != 0) {
diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h
index 679c575..67e7100 100644
--- a/runtime/instruction_set.h
+++ b/runtime/instruction_set.h
@@ -20,6 +20,7 @@
 #include <iosfwd>
 #include <string>
 
+#include "base/logging.h"  // Logging is required for FATAL in the helper functions.
 #include "base/macros.h"
 
 namespace art {
@@ -35,14 +36,122 @@
 };
 std::ostream& operator<<(std::ostream& os, const InstructionSet& rhs);
 
+// Architecture-specific pointer sizes
+static constexpr size_t kArmPointerSize = 4;
+static constexpr size_t kArm64PointerSize = 8;
+static constexpr size_t kMipsPointerSize = 4;
+static constexpr size_t kX86PointerSize = 4;
+static constexpr size_t kX86_64PointerSize = 8;
+
+// ARM instruction alignment. ARM processors require code to be 4-byte aligned,
+// but ARM ELF requires 8..
+static constexpr size_t kArmAlignment = 8;
+
+// ARM64 instruction alignment. This is the recommended alignment for maximum performance.
+static constexpr size_t kArm64Alignment = 16;
+
+// MIPS instruction alignment.  MIPS processors require code to be 4-byte aligned.
+// TODO: Can this be 4?
+static constexpr size_t kMipsAlignment = 8;
+
+// X86 instruction alignment. This is the recommended alignment for maximum performance.
+static constexpr size_t kX86Alignment = 16;
+
+
 const char* GetInstructionSetString(InstructionSet isa);
 InstructionSet GetInstructionSetFromString(const char* instruction_set);
 
-size_t GetInstructionSetPointerSize(InstructionSet isa);
+static inline size_t GetInstructionSetPointerSize(InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+      // Fall-through.
+    case kThumb2:
+      return kArmPointerSize;
+    case kArm64:
+      return kArm64PointerSize;
+    case kX86:
+      return kX86PointerSize;
+    case kX86_64:
+      return kX86_64PointerSize;
+    case kMips:
+      return kMipsPointerSize;
+    case kNone:
+      LOG(FATAL) << "ISA kNone does not have pointer size.";
+      return 0;
+    default:
+      LOG(FATAL) << "Unknown ISA " << isa;
+      return 0;
+  }
+}
+
 size_t GetInstructionSetAlignment(InstructionSet isa);
-bool Is64BitInstructionSet(InstructionSet isa);
-size_t GetBytesPerGprSpillLocation(InstructionSet isa);
-size_t GetBytesPerFprSpillLocation(InstructionSet isa);
+
+static inline bool Is64BitInstructionSet(InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+    case kX86:
+    case kMips:
+      return false;
+
+    case kArm64:
+    case kX86_64:
+      return true;
+
+    case kNone:
+      LOG(FATAL) << "ISA kNone does not have bit width.";
+      return 0;
+    default:
+      LOG(FATAL) << "Unknown ISA " << isa;
+      return 0;
+  }
+}
+
+static inline size_t GetBytesPerGprSpillLocation(InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+      // Fall-through.
+    case kThumb2:
+      return 4;
+    case kArm64:
+      return 8;
+    case kX86:
+      return 4;
+    case kX86_64:
+      return 8;
+    case kMips:
+      return 4;
+    case kNone:
+      LOG(FATAL) << "ISA kNone does not have spills.";
+      return 0;
+    default:
+      LOG(FATAL) << "Unknown ISA " << isa;
+      return 0;
+  }
+}
+
+static inline size_t GetBytesPerFprSpillLocation(InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+      // Fall-through.
+    case kThumb2:
+      return 4;
+    case kArm64:
+      return 8;
+    case kX86:
+      return 8;
+    case kX86_64:
+      return 8;
+    case kMips:
+      return 4;
+    case kNone:
+      LOG(FATAL) << "ISA kNone does not have spills.";
+      return 0;
+    default:
+      LOG(FATAL) << "Unknown ISA " << isa;
+      return 0;
+  }
+}
 
 #if defined(__arm__)
 static constexpr InstructionSet kRuntimeISA = kArm;
@@ -107,6 +216,68 @@
   uint32_t mask_;
 };
 
+// The following definitions create return types for two word-sized entities that will be passed
+// in registers so that memory operations for the interface trampolines can be avoided. The entities
+// are the resolved method and the pointer to the code to be invoked.
+//
+// On x86, ARM32 and MIPS, this is given for a *scalar* 64bit value. The definition thus *must* be
+// uint64_t or long long int.
+//
+// On x86_64 and ARM64, structs are decomposed for allocation, so we can create a structs of two
+// size_t-sized values.
+//
+// We need two operations:
+//
+// 1) A flag value that signals failure. The assembly stubs expect the lower part to be "0".
+//    GetTwoWordFailureValue() will return a value that has lower part == 0.
+//
+// 2) A value that combines two word-sized values.
+//    GetTwoWordSuccessValue() constructs this.
+//
+// IMPORTANT: If you use this to transfer object pointers, it is your responsibility to ensure
+//            that the object does not move or the value is updated. Simple use of this is NOT SAFE
+//            when the garbage collector can move objects concurrently. Ensure that required locks
+//            are held when using!
+
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+typedef uint64_t TwoWordReturn;
+
+// Encodes method_ptr==nullptr and code_ptr==nullptr
+static inline constexpr TwoWordReturn GetTwoWordFailureValue() {
+  return 0;
+}
+
+// Use the lower 32b for the method pointer and the upper 32b for the code pointer.
+static inline TwoWordReturn GetTwoWordSuccessValue(uintptr_t hi, uintptr_t lo) {
+  uint32_t lo32 = static_cast<uint32_t>(lo);
+  uint64_t hi64 = static_cast<uint64_t>(hi);
+  return ((hi64 << 32) | lo32);
+}
+
+#elif defined(__x86_64__) || defined(__aarch64__)
+struct TwoWordReturn {
+  uintptr_t lo;
+  uintptr_t hi;
+};
+
+// Encodes method_ptr==nullptr. Leaves random value in code pointer.
+static inline TwoWordReturn GetTwoWordFailureValue() {
+  TwoWordReturn ret;
+  ret.lo = 0;
+  return ret;
+}
+
+// Write values into their respective members.
+static inline TwoWordReturn GetTwoWordSuccessValue(uintptr_t hi, uintptr_t lo) {
+  TwoWordReturn ret;
+  ret.lo = lo;
+  ret.hi = hi;
+  return ret;
+}
+#else
+#error "Unsupported architecture"
+#endif
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_INSTRUCTION_SET_H_
diff --git a/runtime/instruction_set_test.cc b/runtime/instruction_set_test.cc
index cd6337c..ece3238 100644
--- a/runtime/instruction_set_test.cc
+++ b/runtime/instruction_set_test.cc
@@ -45,4 +45,8 @@
   EXPECT_EQ(kRuntimeISA, GetInstructionSetFromString(GetInstructionSetString(kRuntimeISA)));
 }
 
+TEST_F(InstructionSetTest, PointerSize) {
+  EXPECT_EQ(kPointerSize, GetInstructionSetPointerSize(kRuntimeISA));
+}
+
 }  // namespace art
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 8e23d1b..261c241 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -846,8 +846,9 @@
   MethodEnterEvent(self, this_object, method, 0);
 }
 
-uint64_t Instrumentation::PopInstrumentationStackFrame(Thread* self, uintptr_t* return_pc,
-                                                       uint64_t gpr_result, uint64_t fpr_result) {
+TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, uintptr_t* return_pc,
+                                                            uint64_t gpr_result,
+                                                            uint64_t fpr_result) {
   // Do the pop.
   std::deque<instrumentation::InstrumentationStackFrame>* stack = self->GetInstrumentationStack();
   CHECK_GT(stack->size(), 0U);
@@ -890,14 +891,14 @@
                 << " result is " << std::hex << return_value.GetJ();
     }
     self->SetDeoptimizationReturnValue(return_value);
-    return static_cast<uint64_t>(GetQuickDeoptimizationEntryPoint()) |
-        (static_cast<uint64_t>(*return_pc) << 32);
+    return GetTwoWordSuccessValue(*return_pc,
+                                  reinterpret_cast<uintptr_t>(GetQuickDeoptimizationEntryPoint()));
   } else {
     if (kVerboseInstrumentation) {
       LOG(INFO) << "Returning from " << PrettyMethod(method)
                 << " to PC " << reinterpret_cast<void*>(*return_pc);
     }
-    return *return_pc;
+    return GetTwoWordSuccessValue(0, *return_pc);
   }
 }
 
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 2dd2cd7..6625801 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -22,6 +22,7 @@
 #include <list>
 
 #include "atomic.h"
+#include "instruction_set.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "object_callbacks.h"
@@ -311,8 +312,8 @@
 
   // Called when an instrumented method is exited. Removes the pushed instrumentation frame
   // returning the intended link register. Generates method exit events.
-  uint64_t PopInstrumentationStackFrame(Thread* self, uintptr_t* return_pc, uint64_t gpr_result,
-                                        uint64_t fpr_result)
+  TwoWordReturn PopInstrumentationStackFrame(Thread* self, uintptr_t* return_pc,
+                                             uint64_t gpr_result, uint64_t fpr_result)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Pops an instrumentation frame from the current thread and generate an unwind event.
diff --git a/test/107-int-math2/src/Main.java b/test/107-int-math2/src/Main.java
index 1ce4a04..f0fe934 100644
--- a/test/107-int-math2/src/Main.java
+++ b/test/107-int-math2/src/Main.java
@@ -979,7 +979,7 @@
         if (lres == 0x96deff00aa010000L) {
             System.out.println("longShiftTest PASSED");
         } else {
-            System.out.println("longShiftTest FAILED: " + res);
+            System.out.println("longShiftTest FAILED: " + lres);
             failure = true;
         }