Merge "ART: Dex file verifier can't blindly use GetDescriptor"
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index d453797..d40917b 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -756,7 +756,7 @@
       support_list_size = arraysize(x86_64_support_list);
     }
 
-    for (int idx = 0; idx < cu.mir_graph->GetNumBlocks(); idx++) {
+    for (unsigned int idx = 0; idx < cu.mir_graph->GetNumBlocks(); idx++) {
       BasicBlock* bb = cu.mir_graph->GetBasicBlock(idx);
       if (bb == NULL) continue;
       if (bb->block_type == kDead) continue;
@@ -885,15 +885,13 @@
         (1 << kBBOpt) |
         (1 << kMatch) |
         (1 << kPromoteCompilerTemps));
-  }
-
-  if (cu.instruction_set == kArm64 || cu.instruction_set == kX86_64) {
-    // TODO(Arm64): enable optimizations once backend is mature enough.
+  } else if (cu.instruction_set == kX86_64) {
     // TODO(X86_64): enable optimizations once backend is mature enough.
     cu.disable_opt = ~(uint32_t)0;
-    if (cu.instruction_set == kArm64) {
-      cu.enable_debug |= (1 << kDebugCodegenDump);
-    }
+  } else if (cu.instruction_set == kArm64) {
+    // TODO(Arm64): enable optimizations once backend is mature enough.
+    cu.disable_opt = ~(uint32_t)0;
+    cu.enable_debug |= (1 << kDebugCodegenDump);
   }
 
   cu.StartTimingSplit("BuildMIRGraph");
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index a2676c8..63a5570 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -586,7 +586,7 @@
   if (current_method_ == 0) {
     DCHECK(entry_block_ == NULL);
     DCHECK(exit_block_ == NULL);
-    DCHECK_EQ(num_blocks_, 0);
+    DCHECK_EQ(num_blocks_, 0U);
     // Use id 0 to represent a null block.
     BasicBlock* null_block = NewMemBB(kNullBlock, num_blocks_++);
     DCHECK_EQ(null_block->id, NullBasicBlockId);
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index b6cec66..27b8ca4 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -587,7 +587,7 @@
     return m_units_[m_unit_index]->GetCodeItem()->insns_;
   }
 
-  int GetNumBlocks() const {
+  unsigned int GetNumBlocks() const {
     return num_blocks_;
   }
 
@@ -607,7 +607,7 @@
     return exit_block_;
   }
 
-  BasicBlock* GetBasicBlock(int block_id) const {
+  BasicBlock* GetBasicBlock(unsigned int block_id) const {
     return (block_id == NullBasicBlockId) ? NULL : block_list_.Get(block_id);
   }
 
@@ -1149,7 +1149,7 @@
   ArenaBitVector* try_block_addr_;
   BasicBlock* entry_block_;
   BasicBlock* exit_block_;
-  int num_blocks_;
+  unsigned int num_blocks_;
   const DexFile::CodeItem* current_code_item_;
   GrowableArray<uint16_t> dex_pc_to_block_map_;  // FindBlock lookup cache.
   std::vector<DexCompilationUnit*> m_units_;     // List of methods included in this graph
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index a895e6e..5083bbc 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -1628,7 +1628,7 @@
   CreateNativeGcMap();
 }
 
-int ArmMir2Lir::GetInsnSize(LIR* lir) {
+size_t ArmMir2Lir::GetInsnSize(LIR* lir) {
   DCHECK(!IsPseudoLirOp(lir->opcode));
   return EncodingMap[lir->opcode].size;
 }
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 8db7d4e..95bcfbd 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -87,7 +87,7 @@
     std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
     ResourceMask GetPCUseDefEncoding() const OVERRIDE;
     uint64_t GetTargetInstFlags(int opcode);
-    int GetInsnSize(LIR* lir);
+    size_t GetInsnSize(LIR* lir) OVERRIDE;
     bool IsUnconditionalBranch(LIR* lir);
 
     // Check support for volatile load/store of a given size.
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 2c4f262..9362147 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -887,7 +887,7 @@
   CreateNativeGcMap();
 }
 
-int Arm64Mir2Lir::GetInsnSize(LIR* lir) {
+size_t Arm64Mir2Lir::GetInsnSize(LIR* lir) {
   ArmOpcode opcode = UNWIDE(lir->opcode);
   DCHECK(!IsPseudoLirOp(opcode));
   return EncodingMap[opcode].size;
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index bf09b86..9a80c69 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -86,7 +86,7 @@
     std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
     ResourceMask GetPCUseDefEncoding() const OVERRIDE;
     uint64_t GetTargetInstFlags(int opcode);
-    int GetInsnSize(LIR* lir);
+    size_t GetInsnSize(LIR* lir) OVERRIDE;
     bool IsUnconditionalBranch(LIR* lir);
 
     // Check support for volatile load/store of a given size.
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 8f6d716..f9081ce 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -456,7 +456,8 @@
      * this is an uncommon operation and isn't especially performance
      * critical.
      */
-    RegStorage r_src = AllocTemp();
+    // This is addressing the stack, which may be out of the 4G area.
+    RegStorage r_src = cu_->target64 ? AllocTempWide() : AllocTemp();
     RegStorage r_dst = AllocTemp();
     RegStorage r_idx = AllocTemp();
     RegStorage r_val;
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 2af847c..a90a06e 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -155,7 +155,12 @@
   if (arg0.wide == 0) {
     LoadValueDirectFixed(arg0, TargetReg(kArg0));
   } else {
-    RegStorage r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
+    RegStorage r_tmp;
+    if (cu_->instruction_set == kX86_64) {
+      r_tmp = RegStorage::Solo64(TargetReg(kArg0).GetReg());
+    } else {
+      r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
+    }
     LoadValueDirectWideFixed(arg0, r_tmp);
   }
   ClobberCallerSave();
@@ -181,7 +186,12 @@
   if (arg1.wide == 0) {
     LoadValueDirectFixed(arg1, TargetReg(kArg1));
   } else {
-    RegStorage r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
+    RegStorage r_tmp;
+    if (cu_->instruction_set == kX86_64) {
+      r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
+    } else {
+      r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
+    }
     LoadValueDirectWideFixed(arg1, r_tmp);
   }
   LoadConstant(TargetReg(kArg0), arg0);
@@ -279,6 +289,12 @@
         LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg1));
       } else if (cu_->instruction_set == kArm64) {
         LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg1));
+      } else if (cu_->instruction_set == kX86_64) {
+        if (arg0.fp) {
+          LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg0));
+        } else {
+          LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg0) : TargetReg(kArg1));
+        }
       } else {
         LoadValueDirectFixed(arg1, TargetReg(kArg1));
       }
@@ -423,7 +439,12 @@
   if (arg2.wide == 0) {
     LoadValueDirectFixed(arg2, TargetReg(kArg2));
   } else {
-    RegStorage r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+    RegStorage r_tmp;
+    if (cu_->instruction_set == kX86_64) {
+      r_tmp = RegStorage::Solo64(TargetReg(kArg2).GetReg());
+    } else {
+      r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+    }
     LoadValueDirectWideFixed(arg2, r_tmp);
   }
   LoadConstant(TargetReg(kArg0), arg0);
diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc
index b26ab57..c7e9190 100644
--- a/compiler/dex/quick/mips/assemble_mips.cc
+++ b/compiler/dex/quick/mips/assemble_mips.cc
@@ -709,7 +709,7 @@
   return res;
 }
 
-int MipsMir2Lir::GetInsnSize(LIR* lir) {
+size_t MipsMir2Lir::GetInsnSize(LIR* lir) {
   DCHECK(!IsPseudoLirOp(lir->opcode));
   return EncodingMap[lir->opcode].size;
 }
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index 62a7f24..571adac 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -85,7 +85,7 @@
     std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
     ResourceMask GetPCUseDefEncoding() const OVERRIDE;
     uint64_t GetTargetInstFlags(int opcode);
-    int GetInsnSize(LIR* lir);
+    size_t GetInsnSize(LIR* lir) OVERRIDE;
     bool IsUnconditionalBranch(LIR* lir);
 
     // Check support for volatile load/store of a given size.
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index ca4d0e4..9155677 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -1162,7 +1162,7 @@
     virtual std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) = 0;
     virtual ResourceMask GetPCUseDefEncoding() const = 0;
     virtual uint64_t GetTargetInstFlags(int opcode) = 0;
-    virtual int GetInsnSize(LIR* lir) = 0;
+    virtual size_t GetInsnSize(LIR* lir) = 0;
     virtual bool IsUnconditionalBranch(LIR* lir) = 0;
 
     // Check support for volatile load/store of a given size.
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index d37ee67..c7e289d 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -506,9 +506,80 @@
   return low_reg;
 }
 
+static bool HasModrm(const X86EncodingMap* entry) {
+  switch (entry->kind) {
+    case kNullary: return false;
+    case kRegOpcode: return false;
+    default: return true;
+  }
+}
+
+static bool HasSib(const X86EncodingMap* entry) {
+  switch (entry->kind) {
+    case kArray: return true;
+    case kArrayReg: return true;
+    case kRegArray: return true;
+    case kArrayImm: return true;
+    case kRegArrayImm: return true;
+    case kShiftArrayImm: return true;
+    case kShiftArrayCl: return true;
+    case kArrayCond: return true;
+    case kCall:
+      switch (entry->opcode) {
+        case kX86CallA: return true;
+        default: return false;
+      }
+    case kPcRel: return true;
+       switch (entry->opcode) {
+         case kX86PcRelLoadRA: return true;
+         default: return false;
+        }
+    default: return false;
+  }
+}
+
+static bool ModrmIsRegReg(const X86EncodingMap* entry) {
+  switch (entry->kind) {
+    // There is no modrm for this kind of instruction, therefore the reg doesn't form part of the
+    // modrm:
+    case kNullary: return true;
+    case kRegOpcode: return true;
+    case kMovRegImm: return true;
+    // Regular modrm value of 3 cases, when there is one register the other register holds an
+    // opcode so the base register is special.
+    case kReg: return true;
+    case kRegReg: return true;
+    case kRegRegStore: return true;
+    case kRegImm: return true;
+    case kRegRegImm: return true;
+    case kRegRegImmStore: return true;
+    case kShiftRegImm: return true;
+    case kShiftRegCl: return true;
+    case kRegCond: return true;
+    case kRegRegCond: return true;
+    case kJmp:
+      switch (entry->opcode) {
+        case kX86JmpR: return true;
+        default: return false;
+      }
+    case kCall:
+      switch (entry->opcode) {
+        case kX86CallR: return true;
+        default: return false;
+      }
+    default: return false;
+  }
+}
+
 size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_index,
-                               int32_t raw_base, bool has_sib, bool r8_form, bool r8_reg_reg_form,
-                               int32_t displacement) {
+                               int32_t raw_base, int32_t displacement) {
+  bool has_modrm = HasModrm(entry);
+  bool has_sib = HasSib(entry);
+  bool r8_form = entry->skeleton.r8_form;
+  bool modrm_is_reg_reg = ModrmIsRegReg(entry);
+  if (has_sib) {
+    DCHECK(!modrm_is_reg_reg);
+  }
   size_t size = 0;
   if (entry->skeleton.prefix1 > 0) {
     ++size;
@@ -517,15 +588,19 @@
     }
   }
   if (Gen64Bit() || kIsDebugBuild) {
-    bool registers_need_rex_prefix =
-        NeedsRex(raw_reg) || NeedsRex(raw_index) || NeedsRex(raw_base) ||
-        (r8_form && RegStorage::RegNum(raw_reg) > 4) ||
-        (r8_reg_reg_form && RegStorage::RegNum(raw_base) > 4);
-    if (registers_need_rex_prefix &&
-        entry->skeleton.prefix1 != REX_W && entry->skeleton.prefix2 != REX_W) {
-      DCHECK(Gen64Bit()) << "Attempt to use " << entry->name << " on a non-byte register "
-          << RegStorage::RegNum(raw_reg);
-      ++size;  // rex
+    bool registers_need_rex_prefix = NeedsRex(raw_reg) || NeedsRex(raw_index) || NeedsRex(raw_base);
+    if (r8_form) {
+      // Do we need an empty REX prefix to normalize byte registers?
+      registers_need_rex_prefix = registers_need_rex_prefix || (RegStorage::RegNum(raw_reg) >= 4);
+      registers_need_rex_prefix = registers_need_rex_prefix ||
+          (modrm_is_reg_reg && (RegStorage::RegNum(raw_base) >= 4));
+    }
+    if (registers_need_rex_prefix) {
+      DCHECK(Gen64Bit()) << "Attempt to use a 64-bit only addressable register "
+          << RegStorage::RegNum(raw_reg) << " with instruction " << entry->name;
+      if (entry->skeleton.prefix1 != REX_W && entry->skeleton.prefix2 != REX_W) {
+        ++size;  // rex
+      }
     }
   }
   ++size;  // opcode
@@ -535,89 +610,72 @@
       ++size;
     }
   }
-  ++size;  // modrm
-  if (has_sib || LowRegisterBits(raw_base) == rs_rX86_SP.GetRegNum()
-      || (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX)) {
-    // SP requires a SIB byte.
-    // GS access also needs a SIB byte for absolute adressing in 64-bit mode.
-    ++size;
+  if (has_modrm) {
+    ++size;  // modrm
   }
-  if (displacement != 0 || LowRegisterBits(raw_base) == rs_rBP.GetRegNum()) {
-    // BP requires an explicit displacement, even when it's 0.
-    if (entry->opcode != kX86Lea32RA && entry->opcode != kX86Lea64RA) {
-      DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), UINT64_C(0)) << entry->name;
+  if (!modrm_is_reg_reg) {
+    if (has_sib || LowRegisterBits(raw_base) == rs_rX86_SP.GetRegNum()
+        || (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX)) {
+      // SP requires a SIB byte.
+      // GS access also needs a SIB byte for absolute adressing in 64-bit mode.
+      ++size;
     }
-    size += IS_SIMM8(displacement) ? 1 : 4;
+    if (displacement != 0 || LowRegisterBits(raw_base) == rs_rBP.GetRegNum()) {
+      // BP requires an explicit displacement, even when it's 0.
+      if (entry->opcode != kX86Lea32RA && entry->opcode != kX86Lea64RA) {
+        DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), UINT64_C(0)) << entry->name;
+      }
+      size += IS_SIMM8(displacement) ? 1 : 4;
+    }
   }
   size += entry->skeleton.immediate_bytes;
   return size;
 }
 
-int X86Mir2Lir::GetInsnSize(LIR* lir) {
+size_t X86Mir2Lir::GetInsnSize(LIR* lir) {
   DCHECK(!IsPseudoLirOp(lir->opcode));
   const X86EncodingMap* entry = &X86Mir2Lir::EncodingMap[lir->opcode];
   DCHECK_EQ(entry->opcode, lir->opcode) << entry->name;
+
   switch (entry->kind) {
     case kData:
       return 4;  // 4 bytes of data.
     case kNop:
       return lir->operands[0];  // Length of nop is sole operand.
     case kNullary:
-      // Substract 1 for modrm which isn't used.
-      DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, NO_REG, NO_REG, NO_REG, false, false, false, 0) - 1;
+      return ComputeSize(entry, NO_REG, NO_REG, NO_REG, 0);
     case kRegOpcode:  // lir operands - 0: reg
-      // Substract 1 for modrm  which isn't used.
-      DCHECK_EQ(false, entry->skeleton.r8_form);
-      // Note: RegOpcode form passes reg as REX_R but encodes it as REX_B.
-      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, false, false, false, 0) - 1;
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], 0);
     case kReg:  // lir operands - 0: reg
-      // Note: Reg form passes reg as REX_R but encodes it as REX_B.
-      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG,
-                         false, entry->skeleton.r8_form, false, 0);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], 0);
     case kMem:  // lir operands - 0: base, 1: disp
-      DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], false, false, false,
-                         lir->operands[1]);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]);
     case kArray:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
-      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], true, false, false,
-                         lir->operands[3]);
+      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]);
     case kMemReg:  // lir operands - 0: base, 1: disp, 2: reg
-      return ComputeSize(entry, lir->operands[2], NO_REG, lir->operands[0],
-                         false, entry->skeleton.r8_form, false, lir->operands[1]);
+      return ComputeSize(entry, lir->operands[2], NO_REG, lir->operands[0], lir->operands[1]);
     case kMemRegImm:  // lir operands - 0: base, 1: disp, 2: reg 3: immediate
-      return ComputeSize(entry, lir->operands[2], NO_REG, lir->operands[0],
-                         false, entry->skeleton.r8_form, false, lir->operands[1]);
+      return ComputeSize(entry, lir->operands[2], NO_REG, lir->operands[0], lir->operands[1]);
     case kArrayReg:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
       return ComputeSize(entry, lir->operands[4], lir->operands[1], lir->operands[0],
-                         true, entry->skeleton.r8_form, false, lir->operands[3]);
+                         lir->operands[3]);
     case kThreadReg:  // lir operands - 0: disp, 1: reg
-      DCHECK_EQ(false, entry->skeleton.r8_form);
       // Thread displacement size is always 32bit.
-      return ComputeSize(entry, lir->operands[1], NO_REG, NO_REG, false, false, false,
-                         0x12345678);
+      return ComputeSize(entry, lir->operands[1], NO_REG, NO_REG, 0x12345678);
     case kRegReg:  // lir operands - 0: reg1, 1: reg2
-      // Note: RegReg form passes reg2 as index but encodes it using base.
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG,
-                         false, entry->skeleton.r8_form, entry->skeleton.r8_form, 0);
+      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], 0);
     case kRegRegStore:  // lir operands - 0: reg2, 1: reg1
-      // Note: RegRegStore form passes reg1 as index but encodes it using base.
-      return ComputeSize(entry, lir->operands[1], lir->operands[0], NO_REG,
-                         false, entry->skeleton.r8_form, entry->skeleton.r8_form, 0);
+      return ComputeSize(entry, lir->operands[1], NO_REG, lir->operands[0], 0);
     case kRegMem:  // lir operands - 0: reg, 1: base, 2: disp
-      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1],
-                         false, entry->skeleton.r8_form, false, lir->operands[2]);
+      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], lir->operands[2]);
     case kRegArray:   // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
       return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1],
-                         true, entry->skeleton.r8_form, false, lir->operands[4]);
+                         lir->operands[4]);
     case kRegThread:  // lir operands - 0: reg, 1: disp
       // Thread displacement size is always 32bit.
-      DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, false, false, false,
-                         0x12345678);
+      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, 0x12345678);
     case kRegImm: {  // lir operands - 0: reg, 1: immediate
-      size_t size = ComputeSize(entry, lir->operands[0], NO_REG, NO_REG,
-                         false, entry->skeleton.r8_form, false, 0);
+      size_t size = ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, 0);
       // AX opcodes don't require the modrm byte.
       if (entry->skeleton.ax_opcode == 0) {
         return size;
@@ -626,83 +684,62 @@
       }
     }
     case kMemImm:  // lir operands - 0: base, 1: disp, 2: immediate
-      DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0],
-                         false, false, false, lir->operands[1]);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]);
     case kArrayImm:  // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
-      DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0],
-                         true, false, false, lir->operands[3]);
+      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]);
     case kThreadImm:  // lir operands - 0: disp, 1: imm
       // Thread displacement size is always 32bit.
-      DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, NO_REG, NO_REG, NO_REG, false, false, false, 0x12345678);
+      return ComputeSize(entry, NO_REG, NO_REG, NO_REG, 0x12345678);
     case kRegRegImm:  // lir operands - 0: reg1, 1: reg2, 2: imm
       // Note: RegRegImm form passes reg2 as index but encodes it using base.
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG,
-                         false, entry->skeleton.r8_form, entry->skeleton.r8_form, 0);
+      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, 0);
     case kRegRegImmStore:  // lir operands - 0: reg2, 1: reg1, 2: imm
       // Note: RegRegImmStore form passes reg1 as index but encodes it using base.
-      return ComputeSize(entry, lir->operands[1], lir->operands[0], NO_REG,
-                         false, entry->skeleton.r8_form, entry->skeleton.r8_form, 0);
+      return ComputeSize(entry, lir->operands[1], lir->operands[0], NO_REG, 0);
     case kRegMemImm:  // lir operands - 0: reg, 1: base, 2: disp, 3: imm
-      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1],
-                         false, entry->skeleton.r8_form, false, lir->operands[2]);
+      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], lir->operands[2]);
     case kRegArrayImm:  // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp, 5: imm
       return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1],
-                         true, entry->skeleton.r8_form, false, lir->operands[4]);
+                         lir->operands[4]);
     case kMovRegImm:  // lir operands - 0: reg, 1: immediate
       return ((entry->skeleton.prefix1 != 0 || NeedsRex(lir->operands[0])) ? 1 : 0) + 1 +
           entry->skeleton.immediate_bytes;
     case kShiftRegImm:  // lir operands - 0: reg, 1: immediate
       // Shift by immediate one has a shorter opcode.
-      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG,
-                         false, entry->skeleton.r8_form, false, 0) -
+      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, 0) -
           (lir->operands[1] == 1 ? 1 : 0);
     case kShiftMemImm:  // lir operands - 0: base, 1: disp, 2: immediate
       // Shift by immediate one has a shorter opcode.
-      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0],
-                         false, entry->skeleton.r8_form, false, lir->operands[1]) -
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]) -
           (lir->operands[2] == 1 ? 1 : 0);
     case kShiftArrayImm:  // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
       // Shift by immediate one has a shorter opcode.
-      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0],
-                         true, entry->skeleton.r8_form, false, lir->operands[3]) -
+      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]) -
           (lir->operands[4] == 1 ? 1 : 0);
     case kShiftRegCl:  // lir operands - 0: reg, 1: cl
       DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[1]));
       // Note: ShiftRegCl form passes reg as reg but encodes it using base.
-      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG,
-                         false, entry->skeleton.r8_form, false, 0);
+      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, 0);
     case kShiftMemCl:  // lir operands - 0: base, 1: disp, 2: cl
-      DCHECK_EQ(false, entry->skeleton.r8_form);
       DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[2]));
-      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0],
-                         false, false, false, lir->operands[1]);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]);
     case kShiftArrayCl:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cl
-      DCHECK_EQ(false, entry->skeleton.r8_form);
       DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[4]));
       return ComputeSize(entry, lir->operands[4], lir->operands[1], lir->operands[0],
-                         true, false, false, lir->operands[3]);
+                         lir->operands[3]);
     case kRegCond:  // lir operands - 0: reg, 1: cond
-      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG,
-                         false, entry->skeleton.r8_form, false, 0);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], 0);
     case kMemCond:  // lir operands - 0: base, 1: disp, 2: cond
-      DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], false, false, false,
-                         lir->operands[1]);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]);
     case kArrayCond:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cond
       DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], true, false, false,
-                         lir->operands[3]);
+      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]);
     case kRegRegCond:  // lir operands - 0: reg1, 1: reg2, 2: cond
-      // Note: RegRegCond form passes reg2 as index but encodes it using base.
       DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, false, false, false, 0);
+      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], 0);
     case kRegMemCond:  // lir operands - 0: reg, 1: base, 2: disp, 3:cond
       DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], false, false, false,
-                         lir->operands[2]);
+      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], lir->operands[2]);
     case kJcc:
       if (lir->opcode == kX86Jcc8) {
         return 2;  // opcode + rel8
@@ -717,7 +754,7 @@
         return 5;  // opcode + rel32
       } else if (lir->opcode == kX86JmpT) {
         // Thread displacement size is always 32bit.
-        return ComputeSize(entry, NO_REG, NO_REG, NO_REG, false, false, false, 0x12345678);
+        return ComputeSize(entry, NO_REG, NO_REG, NO_REG, 0x12345678);
       } else {
         DCHECK(lir->opcode == kX86JmpR);
         if (NeedsRex(lir->operands[0])) {
@@ -731,14 +768,12 @@
         case kX86CallI: return 5;  // opcode 0:disp
         case kX86CallR: return 2;  // opcode modrm
         case kX86CallM:  // lir operands - 0: base, 1: disp
-          return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], false, false, false,
-                             lir->operands[1]);
+          return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]);
         case kX86CallA:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
-          return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], true, false, false,
-                             lir->operands[3]);
+          return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]);
         case kX86CallT:  // lir operands - 0: disp
           // Thread displacement size is always 32bit.
-          return ComputeSize(entry, NO_REG, NO_REG, NO_REG, false, false, false, 0x12345678);
+          return ComputeSize(entry, NO_REG, NO_REG, NO_REG, 0x12345678);
         default:
           break;
       }
@@ -748,7 +783,7 @@
         // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table
         // Force the displacement size to 32bit, it will hold a computed offset later.
         return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1],
-                           true, false, false, 0x12345678);
+                           0x12345678);
       } else {
         DCHECK_EQ(entry->opcode, kX86PcRelAdr);
         return 5;  // opcode with reg + 4 byte immediate
@@ -757,7 +792,7 @@
       DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod));
       return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ +
           ComputeSize(&X86Mir2Lir::EncodingMap[Gen64Bit() ? kX86Sub64RI : kX86Sub32RI],
-                      lir->operands[0], NO_REG, NO_REG, false, false, false, 0) -
+                      lir->operands[0], NO_REG, NO_REG, 0) -
               // Shorter ax encoding.
               (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum()  ? 1 : 0);
     case kUnimplemented:
@@ -801,8 +836,7 @@
 }
 
 void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry,
-                            int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b,
-                            bool r8_form) {
+                            int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b) {
   // REX.WRXB
   // W - 64-bit operand
   // R - MODRM.reg
@@ -812,9 +846,17 @@
   bool r = NeedsRex(raw_reg_r);
   bool x = NeedsRex(raw_reg_x);
   bool b = NeedsRex(raw_reg_b);
+  bool r8_form = entry->skeleton.r8_form;
+  bool modrm_is_reg_reg = ModrmIsRegReg(entry);
+
   uint8_t rex = 0;
-  if (r8_form && RegStorage::RegNum(raw_reg_r) > 4) {
-    rex |= 0x40;  // REX.0000
+  if (r8_form) {
+    // Do we need an empty REX prefix to normalize byte register addressing?
+    if (RegStorage::RegNum(raw_reg_r) >= 4) {
+      rex |= 0x40;  // REX.0000
+    } else if (modrm_is_reg_reg && RegStorage::RegNum(raw_reg_b) >= 4) {
+      rex |= 0x40;  // REX.0000
+    }
   }
   if (w) {
     rex |= 0x48;  // REX.W000
@@ -875,9 +917,8 @@
 }
 
 void X86Mir2Lir::EmitPrefixAndOpcode(const X86EncodingMap* entry,
-                                     int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b,
-                                     bool r8_form) {
-  EmitPrefix(entry, raw_reg_r, raw_reg_x, raw_reg_b, r8_form);
+                                     int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b) {
+  EmitPrefix(entry, raw_reg_r, raw_reg_x, raw_reg_b);
   EmitOpcode(entry);
 }
 
@@ -971,7 +1012,7 @@
 
 void X86Mir2Lir::EmitNullary(const X86EncodingMap* entry) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
@@ -979,7 +1020,7 @@
 
 void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, int32_t raw_reg) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg, false);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg);
   // There's no 3-byte instruction with +rd
   DCHECK(entry->skeleton.opcode != 0x0F ||
          (entry->skeleton.extra_opcode1 != 0x38 && entry->skeleton.extra_opcode1 != 0x3A));
@@ -992,7 +1033,7 @@
 
 void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, int32_t raw_reg) {
   CheckValidByteRegister(entry, raw_reg);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg);
   uint8_t low_reg = LowRegisterBits(raw_reg);
   uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
   code_buffer_.push_back(modrm);
@@ -1002,7 +1043,7 @@
 
 void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_base, false);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_base);
   code_buffer_.push_back(entry->skeleton.opcode);
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
@@ -1016,7 +1057,7 @@
 void X86Mir2Lir::EmitOpArray(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index,
                              int scale, int32_t disp) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base, false);
+  EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base);
   uint8_t low_index = LowRegisterBits(raw_index);
   uint8_t low_base = LowRegisterBits(raw_base);
   EmitModrmSibDisp(entry->skeleton.modrm_opcode, low_base, low_index, scale, disp);
@@ -1027,7 +1068,7 @@
 void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
                             int32_t raw_reg) {
   CheckValidByteRegister(entry, raw_reg);
-  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base);
   uint8_t low_reg = LowRegisterBits(raw_reg);
   uint8_t low_base = LowRegisterBits(raw_base);
   EmitModrmDisp(low_reg, low_base, disp);
@@ -1045,7 +1086,7 @@
 void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base,
                               int32_t raw_index, int scale, int32_t disp) {
   CheckValidByteRegister(entry, raw_reg);
-  EmitPrefixAndOpcode(entry, raw_reg, raw_index, raw_base, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, raw_reg, raw_index, raw_base);
   uint8_t low_reg = LowRegisterBits(raw_reg);
   uint8_t low_index = LowRegisterBits(raw_index);
   uint8_t low_base = LowRegisterBits(raw_base);
@@ -1064,7 +1105,7 @@
 void X86Mir2Lir::EmitMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
                             int32_t imm) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base, false);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base);
   uint8_t low_base = LowRegisterBits(raw_base);
   EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
@@ -1075,7 +1116,7 @@
                               int32_t raw_base, int32_t raw_index, int scale, int32_t disp,
                               int32_t imm) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base, false);
+  EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base);
   uint8_t low_index = LowRegisterBits(raw_index);
   uint8_t low_base = LowRegisterBits(raw_base);
   EmitModrmSibDisp(entry->skeleton.modrm_opcode, low_base, low_index, scale, disp);
@@ -1086,7 +1127,7 @@
 void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, int32_t raw_reg, int32_t disp) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
   DCHECK_NE(entry->skeleton.prefix1, 0);
-  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, NO_REG, false);
+  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, NO_REG);
   uint8_t low_reg = LowRegisterBits(raw_reg);
   EmitModrmThread(low_reg);
   code_buffer_.push_back(disp & 0xFF);
@@ -1101,7 +1142,7 @@
 void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2) {
   CheckValidByteRegister(entry, raw_reg1);
   CheckValidByteRegister(entry, raw_reg2);
-  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2);
   uint8_t low_reg1 = LowRegisterBits(raw_reg1);
   uint8_t low_reg2 = LowRegisterBits(raw_reg2);
   uint8_t modrm = (3 << 6) | (low_reg1 << 3) | low_reg2;
@@ -1114,7 +1155,7 @@
 void X86Mir2Lir::EmitRegRegImm(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2,
                                int32_t imm) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2, false);
+  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2);
   uint8_t low_reg1 = LowRegisterBits(raw_reg1);
   uint8_t low_reg2 = LowRegisterBits(raw_reg2);
   uint8_t modrm = (3 << 6) | (low_reg1 << 3) | low_reg2;
@@ -1128,7 +1169,7 @@
                                int32_t raw_reg, int32_t raw_base, int disp, int32_t imm) {
   DCHECK(!RegStorage::IsFloat(raw_reg));
   CheckValidByteRegister(entry, raw_reg);
-  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base);
   uint8_t low_reg = LowRegisterBits(raw_reg);
   uint8_t low_base = LowRegisterBits(raw_base);
   EmitModrmDisp(low_reg, low_base, disp);
@@ -1145,7 +1186,7 @@
 
 void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm) {
   CheckValidByteRegister(entry, raw_reg);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg);
   if (RegStorage::RegNum(raw_reg) == rs_rAX.GetRegNum() && entry->skeleton.ax_opcode != 0) {
     code_buffer_.push_back(entry->skeleton.ax_opcode);
   } else {
@@ -1158,7 +1199,8 @@
 }
 
 void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int32_t disp, int32_t imm) {
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false);
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG);
   EmitModrmThread(entry->skeleton.modrm_opcode);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
@@ -1170,7 +1212,7 @@
 
 void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, int32_t raw_reg, int64_t imm) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, false);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg);
   uint8_t low_reg = LowRegisterBits(raw_reg);
   code_buffer_.push_back(0xB8 + low_reg);
   switch (entry->skeleton.immediate_bytes) {
@@ -1198,7 +1240,7 @@
 
 void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm) {
   CheckValidByteRegister(entry, raw_reg);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg);
   if (imm != 1) {
     code_buffer_.push_back(entry->skeleton.opcode);
   } else {
@@ -1221,7 +1263,7 @@
 void X86Mir2Lir::EmitShiftRegCl(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_cl) {
   CheckValidByteRegister(entry, raw_reg);
   DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(raw_cl));
-  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg);
   code_buffer_.push_back(entry->skeleton.opcode);
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
@@ -1237,7 +1279,7 @@
                                 int32_t displacement, int32_t raw_cl) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
   DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(raw_cl));
-  EmitPrefix(entry, NO_REG, NO_REG, raw_base, false);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_base);
   code_buffer_.push_back(entry->skeleton.opcode);
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
@@ -1251,7 +1293,7 @@
 void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
                                  int32_t imm) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_base, false);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_base);
   if (imm != 1) {
     code_buffer_.push_back(entry->skeleton.opcode);
   } else {
@@ -1272,7 +1314,7 @@
 
 void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, int32_t raw_reg, int32_t cc) {
   CheckValidByteRegister(entry, raw_reg);
-  EmitPrefix(entry, raw_reg, NO_REG, NO_REG, entry->skeleton.r8_form);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0x0F, entry->skeleton.opcode);
   code_buffer_.push_back(0x0F);
@@ -1315,7 +1357,7 @@
                                 int32_t cc) {
   // Generate prefix and opcode without the condition.
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2, false);
+  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2);
 
   // Now add the condition. The last byte of opcode is the one that receives it.
   DCHECK_GE(cc, 0);
@@ -1341,7 +1383,7 @@
                                 int32_t disp, int32_t cc) {
   // Generate prefix and opcode without the condition.
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_base, false);
+  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_base);
 
   // Now add the condition. The last byte of opcode is the one that receives it.
   DCHECK_GE(cc, 0);
@@ -1376,7 +1418,7 @@
   } else {
     DCHECK(entry->opcode == kX86JmpR);
     DCHECK_EQ(false, entry->skeleton.r8_form);
-    EmitPrefix(entry, NO_REG, NO_REG, rel, false);
+    EmitPrefix(entry, NO_REG, NO_REG, rel);
     code_buffer_.push_back(entry->skeleton.opcode);
     uint8_t low_reg = LowRegisterBits(rel);
     uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
@@ -1404,7 +1446,7 @@
 
 void X86Mir2Lir::EmitCallMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base, false);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base);
   uint8_t low_base = LowRegisterBits(raw_base);
   EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
@@ -1413,7 +1455,7 @@
 
 void X86Mir2Lir::EmitCallImmediate(const X86EncodingMap* entry, int32_t disp) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG);
   DCHECK_EQ(4, entry->skeleton.immediate_bytes);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
@@ -1425,7 +1467,7 @@
 void X86Mir2Lir::EmitCallThread(const X86EncodingMap* entry, int32_t disp) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
   DCHECK_NE(entry->skeleton.prefix1, 0);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG);
   EmitModrmThread(entry->skeleton.modrm_opcode);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
@@ -1450,7 +1492,7 @@
   }
   if (entry->opcode == kX86PcRelLoadRA) {
     DCHECK_EQ(false, entry->skeleton.r8_form);
-    EmitPrefix(entry, raw_reg, raw_index, raw_base_or_table, false);
+    EmitPrefix(entry, raw_reg, raw_index, raw_base_or_table);
     code_buffer_.push_back(entry->skeleton.opcode);
     DCHECK_NE(0x0F, entry->skeleton.opcode);
     DCHECK_EQ(0, entry->skeleton.extra_opcode1);
@@ -1479,7 +1521,7 @@
 void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t offset) {
   DCHECK_EQ(entry->opcode, kX86StartOfMethod) << entry->name;
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefix(entry, raw_reg, NO_REG, NO_REG, false);
+  EmitPrefix(entry, raw_reg, NO_REG, NO_REG);
   code_buffer_.push_back(0xE8);  // call +0
   code_buffer_.push_back(0);
   code_buffer_.push_back(0);
@@ -1496,7 +1538,7 @@
 void X86Mir2Lir::EmitUnimplemented(const X86EncodingMap* entry, LIR* lir) {
   UNIMPLEMENTED(WARNING) << "encoding kind for " << entry->name << " "
                          << BuildInsnString(entry->fmt, lir, 0);
-  for (int i = 0; i < GetInsnSize(lir); ++i) {
+  for (size_t i = 0; i < GetInsnSize(lir); ++i) {
     code_buffer_.push_back(0xCC);  // push breakpoint instruction - int 3
   }
 }
@@ -1793,8 +1835,8 @@
         EmitUnimplemented(entry, lir);
         break;
     }
-    CHECK_EQ(static_cast<size_t>(GetInsnSize(lir)),
-             code_buffer_.size() - starting_cbuf_size)
+    DCHECK_EQ(lir->flags.size, GetInsnSize(lir));
+    CHECK_EQ(lir->flags.size, code_buffer_.size() - starting_cbuf_size)
         << "Instruction size mismatch for entry: " << X86Mir2Lir::EncodingMap[lir->opcode].name;
   }
   return res;
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 6ae553d..3540843 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -121,7 +121,7 @@
   std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
   ResourceMask GetPCUseDefEncoding() const OVERRIDE;
   uint64_t GetTargetInstFlags(int opcode);
-  int GetInsnSize(LIR* lir);
+  size_t GetInsnSize(LIR* lir) OVERRIDE;
   bool IsUnconditionalBranch(LIR* lir);
 
   // Check support for volatile load/store of a given size.
@@ -392,15 +392,13 @@
 
  protected:
   size_t ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_index,
-                     int32_t raw_base, bool has_sib, bool r8_form, bool r8_reg_reg_form,
-                     int32_t displacement);
+                     int32_t raw_base, int32_t displacement);
   void CheckValidByteRegister(const X86EncodingMap* entry, int32_t raw_reg);
   void EmitPrefix(const X86EncodingMap* entry,
-                  int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b,
-                  bool r8_form);
+                  int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b);
   void EmitOpcode(const X86EncodingMap* entry);
   void EmitPrefixAndOpcode(const X86EncodingMap* entry,
-                           int32_t reg_r, int32_t reg_x, int32_t reg_b, bool r8_form);
+                           int32_t reg_r, int32_t reg_x, int32_t reg_b);
   void EmitDisp(uint8_t base, int32_t disp);
   void EmitModrmThread(uint8_t reg_or_opcode);
   void EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int32_t disp);
@@ -464,6 +462,12 @@
   virtual RegStorage AllocateByteRegister();
 
   /*
+   * @brief Check if a register is byte addressable.
+   * @returns true if a register is byte addressable.
+   */
+  bool IsByteRegister(RegStorage reg);
+
+  /*
    * @brief generate inline code for fast case of Strng.indexOf.
    * @param info Call parameters
    * @param zero_based 'true' if the index into the string is 0.
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index ced6400..f6f0617 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -381,7 +381,7 @@
     branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
   }
   // If the result reg can't be byte accessed, use a jump and move instead of a set.
-  if (rl_result.reg.GetReg() >= rs_rX86_SP.GetReg()) {
+  if (!IsByteRegister(rl_result.reg)) {
     LIR* branch2 = NULL;
     if (unordered_gt) {
       branch2 = NewLIR2(kX86Jcc8, 0, kX86CondA);
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 4a77df2..05b5e43 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -882,10 +882,9 @@
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage result_reg = rl_result.reg;
 
-  // SETcc only works with EAX..EDX.
-  if (result_reg.GetRegNum() >= rs_rX86_SP.GetRegNum()) {
+  // For 32-bit, SETcc only works with EAX..EDX.
+  if (!IsByteRegister(result_reg)) {
     result_reg = AllocateByteRegister();
-    DCHECK_LT(result_reg.GetRegNum(), rs_rX86_SP.GetRegNum());
   }
   NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondZ);
   NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), result_reg.GetReg());
@@ -1386,9 +1385,9 @@
   if (!Gen64Bit()) {
     x86op = GetOpcode(op, rl_dest, rl_src, true);
     lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET);
+    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
+                            true /* is_load */, true /* is64bit */);
   }
-  AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
-                          true /* is_load */, true /* is64bit */);
 }
 
 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
@@ -1423,11 +1422,11 @@
   if (!Gen64Bit()) {
     x86op = GetOpcode(op, rl_dest, rl_src, true);
     lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg());
+    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
+                            true /* is_load */, true /* is64bit */);
+    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
+                            false /* is_load */, true /* is64bit */);
   }
-  AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
-                          true /* is_load */, true /* is64bit */);
-  AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
-                          false /* is_load */, true /* is64bit */);
   FreeTemp(rl_src.reg);
 }
 
@@ -1760,8 +1759,7 @@
     rl_src = LoadValue(rl_src, reg_class);
   }
   // If the src reg can't be byte accessed, move it to a temp first.
-  if ((size == kSignedByte || size == kUnsignedByte) &&
-      rl_src.reg.GetRegNum() >= rs_rX86_SP.GetRegNum()) {
+  if ((size == kSignedByte || size == kUnsignedByte) && !IsByteRegister(rl_src.reg)) {
     RegStorage temp = AllocTemp();
     OpRegCopy(temp, rl_src.reg);
     StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size);
@@ -2240,10 +2238,9 @@
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage result_reg = rl_result.reg;
 
-  // SETcc only works with EAX..EDX.
-  if (result_reg == object.reg || result_reg.GetRegNum() >= rs_rX86_SP.GetRegNum()) {
+  // For 32-bit, SETcc only works with EAX..EDX.
+  if (result_reg == object.reg || !IsByteRegister(result_reg)) {
     result_reg = AllocateByteRegister();
-    DCHECK_LT(result_reg.GetRegNum(), rs_rX86_SP.GetRegNum());
   }
 
   // Assume that there is no match.
@@ -2355,7 +2352,7 @@
   /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result. */
   RegLocation rl_result = GetReturn(kRefReg);
 
-  // SETcc only works with EAX..EDX.
+  // For 32-bit, SETcc only works with EAX..EDX.
   DCHECK_LT(rl_result.reg.GetRegNum(), 4);
 
   // Is the class NULL?
@@ -2655,6 +2652,7 @@
     Mir2Lir::GenIntToLong(rl_dest, rl_src);
     return;
   }
+  rl_src = UpdateLoc(rl_src);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   if (rl_src.location == kLocPhysReg) {
     NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index d1ba239..483d8cf 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -424,7 +424,15 @@
 }
 
 RegStorage X86Mir2Lir::AllocateByteRegister() {
-  return AllocTypedTemp(false, kCoreReg);
+  RegStorage reg = AllocTypedTemp(false, kCoreReg);
+  if (!Gen64Bit()) {
+    DCHECK_LT(reg.GetRegNum(), rs_rX86_SP.GetRegNum());
+  }
+  return reg;
+}
+
+bool X86Mir2Lir::IsByteRegister(RegStorage reg) {
+  return Gen64Bit() || reg.GetRegNum() < rs_rX86_SP.GetRegNum();
 }
 
 /* Clobber all regs that might be used by an external C call */
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index c72e8cd..b93e3e8 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -138,6 +138,7 @@
       case kOpLsl: opcode = kX86Sal64RI; break;
       case kOpLsr: opcode = kX86Shr64RI; break;
       case kOpAsr: opcode = kX86Sar64RI; break;
+      case kOpCmp: opcode = byte_imm ? kX86Cmp64RI8 : kX86Cmp64RI; break;
       default:
         LOG(FATAL) << "Bad case in OpRegImm (64-bit) " << op;
     }
@@ -505,7 +506,7 @@
       return NewLIR5(kX86Lea32RA, r_dest.GetReg(),  r5sib_no_base /* base */,
                      r_src.GetReg() /* index */, value /* scale */, 0 /* disp */);
     } else if (op == kOpAdd) {  // lea add special case
-      return NewLIR5(Gen64Bit() ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(),
+      return NewLIR5(r_dest.Is64Bit() ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(),
                      r_src.GetReg() /* base */, rs_rX86_SP.GetReg()/*r4sib_no_index*/ /* index */,
                      0 /* scale */, value /* disp */);
     }
diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc
index 4324325..e26745a 100644
--- a/compiler/dex/ssa_transformation.cc
+++ b/compiler/dex/ssa_transformation.cc
@@ -117,6 +117,16 @@
   RecordDFSOrders(GetEntryBlock());
 
   num_reachable_blocks_ = dfs_order_->Size();
+
+  if (num_reachable_blocks_ != num_blocks_) {
+    // Hide all unreachable blocks.
+    AllNodesIterator iter(this);
+    for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
+      if (!bb->visited) {
+        bb->Hide(cu_);
+      }
+    }
+  }
 }
 
 /*
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 16c1e00..3e326f0 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1918,7 +1918,7 @@
     }
   }
   uint64_t duration_ns = NanoTime() - start_ns;
-  if (duration_ns > MsToNs(compiler_->GetMaximumCompilationTimeBeforeWarning())) {
+  if (duration_ns > MsToNs(compiler_->GetMaximumCompilationTimeBeforeWarning()) && !kIsDebugBuild) {
     LOG(WARNING) << "Compilation of " << PrettyMethod(method_idx, dex_file)
                  << " took " << PrettyDuration(duration_ns);
   }
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index fad6798..9903421 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -28,6 +28,7 @@
 #include "compiled_method.h"
 #include "compiler.h"
 #include "dex_file.h"
+#include "driver/compiler_options.h"
 #include "instruction_set.h"
 #include "invoke_type.h"
 #include "method_reference.h"
@@ -105,8 +106,7 @@
                           InstructionSetFeatures instruction_set_features,
                           bool image, DescriptorSet* image_classes,
                           size_t thread_count, bool dump_stats, bool dump_passes,
-                          CumulativeLogger* timer,
-                          std::string profile_file = "");
+                          CumulativeLogger* timer, std::string profile_file = "");
 
   ~CompilerDriver();
 
@@ -394,6 +394,10 @@
     return dump_passes_;
   }
 
+  bool DidIncludeDebugSymbols() const {
+    return compiler_options_->GetIncludeDebugSymbols();
+  }
+
   CumulativeLogger* GetTimingsLogger() const {
     return timings_logger_;
   }
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 05a9ac7..5d1c5da 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -42,6 +42,7 @@
   static const size_t kDefaultTinyMethodThreshold = 20;
   static const size_t kDefaultNumDexMethodsThreshold = 900;
   static constexpr double kDefaultTopKProfileThreshold = 90.0;
+  static const bool kDefaultIncludeDebugSymbols = kIsDebugBuild;
 
   CompilerOptions() :
     compiler_filter_(kDefaultCompilerFilter),
@@ -51,7 +52,8 @@
     tiny_method_threshold_(kDefaultTinyMethodThreshold),
     num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold),
     generate_gdb_information_(false),
-    top_k_profile_threshold_(kDefaultTopKProfileThreshold)
+    top_k_profile_threshold_(kDefaultTopKProfileThreshold),
+    include_debug_symbols_(kDefaultIncludeDebugSymbols)
 #ifdef ART_SEA_IR_MODE
     , sea_ir_mode_(false)
 #endif
@@ -64,7 +66,8 @@
                   size_t tiny_method_threshold,
                   size_t num_dex_methods_threshold,
                   bool generate_gdb_information,
-                  double top_k_profile_threshold
+                  double top_k_profile_threshold,
+                  bool include_debug_symbols
 #ifdef ART_SEA_IR_MODE
                   , bool sea_ir_mode
 #endif
@@ -76,7 +79,8 @@
     tiny_method_threshold_(tiny_method_threshold),
     num_dex_methods_threshold_(num_dex_methods_threshold),
     generate_gdb_information_(generate_gdb_information),
-    top_k_profile_threshold_(top_k_profile_threshold)
+    top_k_profile_threshold_(top_k_profile_threshold),
+    include_debug_symbols_(include_debug_symbols)
 #ifdef ART_SEA_IR_MODE
     , sea_ir_mode_(sea_ir_mode)
 #endif
@@ -139,6 +143,10 @@
     return top_k_profile_threshold_;
   }
 
+  bool GetIncludeDebugSymbols() const {
+    return include_debug_symbols_;
+  }
+
 #ifdef ART_SEA_IR_MODE
   bool GetSeaIrMode();
 #endif
@@ -157,6 +165,7 @@
   bool generate_gdb_information_;
   // When using a profile file only the top K% of the profiled samples will be compiled.
   double top_k_profile_threshold_;
+  bool include_debug_symbols_;
 #ifdef ART_SEA_IR_MODE
   bool sea_ir_mode_;
 #endif
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index cb66e48..78757ec 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -807,12 +807,17 @@
                            const std::string& android_root_unused,
                            bool is_host_unused) {
   const bool debug = false;
+  const bool add_symbols = oat_writer->DidAddSymbols();
   const OatHeader& oat_header = oat_writer->GetOatHeader();
   Elf32_Word oat_data_size = oat_header.GetExecutableOffset();
   uint32_t oat_exec_size = oat_writer->GetSize() - oat_data_size;
 
   ElfBuilder builder(oat_writer, elf_file_, compiler_driver_->GetInstructionSet(), 0,
-                     oat_data_size, oat_data_size, oat_exec_size, false, debug);
+                     oat_data_size, oat_data_size, oat_exec_size, add_symbols, debug);
+
+  if (add_symbols) {
+    AddDebugSymbols(builder, oat_writer, debug);
+  }
 
   bool generateDebugInformation = compiler_driver_->GetCallFrameInformation() != nullptr;
   if (generateDebugInformation) {
@@ -833,6 +838,15 @@
   return builder.Write();
 }
 
+void ElfWriterQuick::AddDebugSymbols(ElfBuilder& builder, OatWriter* oat_writer, bool debug) {
+  const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetCFIMethodInfo();
+  ElfSymtabBuilder* symtab = &builder.symtab_builder_;
+  for (auto it = method_info.begin(); it != method_info.end(); ++it) {
+    symtab->AddSymbol(it->method_name_, &builder.text_builder_, it->low_pc_, true,
+                      it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC);
+  }
+}
+
 static void UpdateWord(std::vector<uint8_t>*buf, int offset, int data) {
   (*buf)[offset+0] = data;
   (*buf)[offset+1] = data >> 8;
diff --git a/compiler/elf_writer_quick.h b/compiler/elf_writer_quick.h
index f687d2e..dbdccfc 100644
--- a/compiler/elf_writer_quick.h
+++ b/compiler/elf_writer_quick.h
@@ -48,6 +48,10 @@
   ~ElfWriterQuick() {}
 
   class ElfBuilder;
+  void AddDebugSymbols(ElfBuilder& builder,
+                       OatWriter* oat_writer,
+                       bool debug);
+
   class ElfSectionBuilder {
    public:
     ElfSectionBuilder(const std::string& sec_name, Elf32_Word type, Elf32_Word flags,
@@ -235,7 +239,6 @@
     ~ElfBuilder() {}
 
     bool Write();
-    ElfSymtabBuilder* GetDefaultDynsymBuilder() { return &dynsym_builder_; }
 
     // Adds the given raw section to the builder. This will copy it. The caller
     // is responsible for deallocating their copy.
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 5d532ab..65bc318 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -350,31 +350,14 @@
         uint32_t thumb_offset = compiled_method->CodeDelta();
         quick_code_offset = offset_ + sizeof(OatQuickMethodHeader) + thumb_offset;
 
-        std::vector<uint8_t>* cfi_info = writer_->compiler_driver_->GetCallFrameInformation();
-        if (cfi_info != nullptr) {
-          // Copy in the FDE, if present
-          const std::vector<uint8_t>* fde = compiled_method->GetCFIInfo();
-          if (fde != nullptr) {
-            // Copy the information into cfi_info and then fix the address in the new copy.
-            int cur_offset = cfi_info->size();
-            cfi_info->insert(cfi_info->end(), fde->begin(), fde->end());
-
-            // Set the 'initial_location' field to address the start of the method.
-            uint32_t new_value = quick_code_offset - writer_->oat_header_->GetExecutableOffset();
-            uint32_t offset_to_update = cur_offset + 2*sizeof(uint32_t);
-            (*cfi_info)[offset_to_update+0] = new_value;
-            (*cfi_info)[offset_to_update+1] = new_value >> 8;
-            (*cfi_info)[offset_to_update+2] = new_value >> 16;
-            (*cfi_info)[offset_to_update+3] = new_value >> 24;
-            std::string name = PrettyMethod(it.GetMemberIndex(), *dex_file_, false);
-            writer_->method_info_.push_back(DebugInfo(name, new_value, new_value + code_size));
-          }
-        }
+        bool force_debug_capture = false;
+        bool deduped = false;
 
         // Deduplicate code arrays.
         auto code_iter = dedupe_map_.find(compiled_method);
         if (code_iter != dedupe_map_.end()) {
           quick_code_offset = code_iter->second;
+          deduped = true;
         } else {
           dedupe_map_.Put(compiled_method, quick_code_offset);
         }
@@ -409,6 +392,41 @@
           writer_->oat_header_->UpdateChecksum(&(*quick_code)[0], code_size);
           offset_ += code_size;
         }
+
+        uint32_t quick_code_start = quick_code_offset - writer_->oat_header_->GetExecutableOffset();
+        std::vector<uint8_t>* cfi_info = writer_->compiler_driver_->GetCallFrameInformation();
+        if (cfi_info != nullptr) {
+          // Copy in the FDE, if present
+          const std::vector<uint8_t>* fde = compiled_method->GetCFIInfo();
+          if (fde != nullptr) {
+            // Copy the information into cfi_info and then fix the address in the new copy.
+            int cur_offset = cfi_info->size();
+            cfi_info->insert(cfi_info->end(), fde->begin(), fde->end());
+
+            // Set the 'initial_location' field to address the start of the method.
+            uint32_t offset_to_update = cur_offset + 2*sizeof(uint32_t);
+            (*cfi_info)[offset_to_update+0] = quick_code_start;
+            (*cfi_info)[offset_to_update+1] = quick_code_start >> 8;
+            (*cfi_info)[offset_to_update+2] = quick_code_start >> 16;
+            (*cfi_info)[offset_to_update+3] = quick_code_start >> 24;
+            force_debug_capture = true;
+          }
+        }
+
+
+        if (writer_->compiler_driver_->DidIncludeDebugSymbols() || force_debug_capture) {
+          // Record debug information for this function if we are doing that or
+          // we have CFI and so need it.
+          std::string name = PrettyMethod(it.GetMemberIndex(), *dex_file_, true);
+          if (deduped) {
+            // TODO We should place the DEDUPED tag on the first instance of a
+            // deduplicated symbol so that it will show up in a debuggerd crash
+            // report.
+            name += " [ DEDUPED ]";
+          }
+          writer_->method_info_.push_back(DebugInfo(name, quick_code_start,
+                                                    quick_code_start + code_size));
+        }
       }
 
       if (kIsDebugBuild) {
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 8c20aa8..dbecb95 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -108,6 +108,10 @@
     return method_info_;
   }
 
+  bool DidAddSymbols() const {
+    return compiler_driver_->DidIncludeDebugSymbols();
+  }
+
  private:
   // The DataAccess classes are helper classes that provide access to members related to
   // a given map, i.e. GC map, mapping table or vmap table. By abstracting these away
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 35149cf..e2943d3 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -203,6 +203,10 @@
   UsageError("");
   UsageError("  --dump-timing: display a breakdown of where time was spent");
   UsageError("");
+  UsageError("  --include-debug-symbols: Include ELF symbols in this oat file");
+  UsageError("");
+  UsageError("  --no-include-debug-symbols: Do not include ELF symbols in this oat file");
+  UsageError("");
   UsageError("  --runtime-arg <argument>: used to specify various arguments for the runtime,");
   UsageError("      such as initial heap size, maximum heap size, and verbose output.");
   UsageError("      Use a separate --runtime-arg switch for each argument.");
@@ -816,6 +820,7 @@
   bool dump_stats = false;
   bool dump_timing = false;
   bool dump_passes = false;
+  bool include_debug_symbols = kIsDebugBuild;
   bool dump_slow_timing = kIsDebugBuild;
   bool watch_dog_enabled = !kIsTargetBuild;
   bool generate_gdb_information = kIsDebugBuild;
@@ -969,6 +974,10 @@
       dump_passes = true;
     } else if (option == "--dump-stats") {
       dump_stats = true;
+    } else if (option == "--include-debug-symbols" || option == "--no-strip-symbols") {
+      include_debug_symbols = true;
+    } else if (option == "--no-include-debug-symbols" || option == "--strip-symbols") {
+      include_debug_symbols = false;
     } else if (option.starts_with("--profile-file=")) {
       profile_file = option.substr(strlen("--profile-file=")).data();
       VLOG(compiler) << "dex2oat: profile file is " << profile_file;
@@ -1122,7 +1131,8 @@
                                    tiny_method_threshold,
                                    num_dex_methods_threshold,
                                    generate_gdb_information,
-                                   top_k_profile_threshold
+                                   top_k_profile_threshold,
+                                   include_debug_symbols
 #ifdef ART_SEA_IR_MODE
                                    , compiler_options.sea_ir_ = true;
 #endif
@@ -1409,16 +1419,20 @@
   }
 
 #if ART_USE_PORTABLE_COMPILER  // We currently only generate symbols on Portable
-  timings.NewSplit("dex2oat ElfStripper");
-  // Strip unneeded sections for target
-  off_t seek_actual = lseek(oat_file->Fd(), 0, SEEK_SET);
-  CHECK_EQ(0, seek_actual);
-  std::string error_msg;
-  CHECK(ElfStripper::Strip(oat_file.get(), &error_msg)) << error_msg;
+  if (!compiler_options.GetIncludeDebugSymbols()) {
+    timings.NewSplit("dex2oat ElfStripper");
+    // Strip unneeded sections for target
+    off_t seek_actual = lseek(oat_file->Fd(), 0, SEEK_SET);
+    CHECK_EQ(0, seek_actual);
+    std::string error_msg;
+    CHECK(ElfStripper::Strip(oat_file.get(), &error_msg)) << error_msg;
 
 
-  // We wrote the oat file successfully, and want to keep it.
-  VLOG(compiler) << "Oat file written successfully (stripped): " << oat_location;
+    // We wrote the oat file successfully, and want to keep it.
+    VLOG(compiler) << "Oat file written successfully (stripped): " << oat_location;
+  } else {
+    VLOG(compiler) << "Oat file written successfully without stripping: " << oat_location;
+  }
 #endif  // ART_USE_PORTABLE_COMPILER
 
   timings.EndSplit();
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 349700a..50e9624 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -903,7 +903,7 @@
                                       std::vector<uint32_t>& stack_depths) {
   struct OwnedMonitorVisitor : public StackVisitor {
     OwnedMonitorVisitor(Thread* thread, Context* context,
-                        std::vector<mirror::Object*>* monitor_vector,
+                        std::vector<JDWP::ObjectId>* monitor_vector,
                         std::vector<uint32_t>* stack_depth_vector)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       : StackVisitor(thread, context), current_stack_depth(0),
@@ -919,23 +919,22 @@
       return true;
     }
 
-    static void AppendOwnedMonitors(mirror::Object* owned_monitor, void* arg) {
+    static void AppendOwnedMonitors(mirror::Object* owned_monitor, void* arg)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
       OwnedMonitorVisitor* visitor = reinterpret_cast<OwnedMonitorVisitor*>(arg);
-      visitor->monitors->push_back(owned_monitor);
+      visitor->monitors->push_back(gRegistry->Add(owned_monitor));
       visitor->stack_depths->push_back(visitor->current_stack_depth);
     }
 
     size_t current_stack_depth;
-    std::vector<mirror::Object*>* monitors;
+    std::vector<JDWP::ObjectId>* monitors;
     std::vector<uint32_t>* stack_depths;
   };
 
-  std::vector<mirror::Object*> monitor_vector;
-  std::vector<uint32_t> stack_depth_vector;
   ScopedObjectAccessUnchecked soa(Thread::Current());
+  Thread* thread;
   {
     MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
-    Thread* thread;
     JDWP::JdwpError error = DecodeThread(soa, thread_id, thread);
     if (error != JDWP::ERR_NONE) {
       return error;
@@ -943,18 +942,10 @@
     if (!IsSuspendedForDebugger(soa, thread)) {
       return JDWP::ERR_THREAD_NOT_SUSPENDED;
     }
-    std::unique_ptr<Context> context(Context::Create());
-    OwnedMonitorVisitor visitor(thread, context.get(), &monitor_vector, &stack_depth_vector);
-    visitor.WalkStack();
   }
-
-  // Add() requires the thread_list_lock_ not held to avoid the lock
-  // level violation.
-  for (size_t i = 0; i < monitor_vector.size(); ++i) {
-    monitors.push_back(gRegistry->Add(monitor_vector[i]));
-    stack_depths.push_back(stack_depth_vector[i]);
-  }
-
+  std::unique_ptr<Context> context(Context::Create());
+  OwnedMonitorVisitor visitor(thread, context.get(), &monitors, &stack_depths);
+  visitor.WalkStack();
   return JDWP::ERR_NONE;
 }
 
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 1d04151..2f4e805 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -274,7 +274,7 @@
     result = kHardFailure;
   }
   uint64_t duration_ns = NanoTime() - start_ns;
-  if (duration_ns > MsToNs(100)) {
+  if (duration_ns > MsToNs(100) && !kIsDebugBuild) {
     LOG(WARNING) << "Verification of " << PrettyMethod(method_idx, *dex_file)
                  << " took " << PrettyDuration(duration_ns);
   }