Merge "Re-apply: Implement cumulative timings for CompilerDriver."
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 661050f..fdc609a 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -127,15 +127,19 @@
   }
 }
 
-MIR* ArmMir2Lir::SpecialIGet(BasicBlock** bb, MIR* mir,
-                             OpSize size, bool long_or_double, bool is_object) {
-  int32_t field_offset;
-  bool is_volatile;
-  uint32_t field_idx = mir->dalvikInsn.vC;
-  bool fast_path = FastInstance(field_idx, false, &field_offset, &is_volatile);
-  if (!fast_path || !(mir->optimization_flags & MIR_IGNORE_NULL_CHECK)) {
-    return NULL;
+MIR* ArmMir2Lir::SpecialIGet(BasicBlock** bb, MIR* mir, const InlineMethod& special) {
+  // FastInstance() already checked by DexFileMethodInliner.
+  const InlineIGetIPutData& data = special.d.ifield_data;
+  if (data.method_is_static || data.object_arg != 0) {
+    return NULL;  // The object is not "this" and has to be null-checked.
   }
+
+  OpSize size = static_cast<OpSize>(data.op_size);
+  DCHECK_NE(data.op_size, kDouble);  // The inliner doesn't distinguish kDouble, uses kLong.
+  bool long_or_double = (data.op_size == kLong);
+  bool is_object = data.is_object;
+
+  // TODO: Generate the method using only the data in special.
   RegLocation rl_obj = mir_graph_->GetSrc(mir, 0);
   LockLiveArgs(mir);
   rl_obj = ArmMir2Lir::ArgLoc(rl_obj);
@@ -148,19 +152,24 @@
   // Point of no return - no aborts after this
   ArmMir2Lir::GenPrintLabel(mir);
   rl_obj = LoadArg(rl_obj);
+  uint32_t field_idx = mir->dalvikInsn.vC;
   GenIGet(field_idx, mir->optimization_flags, size, rl_dest, rl_obj, long_or_double, is_object);
   return GetNextMir(bb, mir);
 }
 
-MIR* ArmMir2Lir::SpecialIPut(BasicBlock** bb, MIR* mir,
-                             OpSize size, bool long_or_double, bool is_object) {
-  int32_t field_offset;
-  bool is_volatile;
-  uint32_t field_idx = mir->dalvikInsn.vC;
-  bool fast_path = FastInstance(field_idx, false, &field_offset, &is_volatile);
-  if (!fast_path || !(mir->optimization_flags & MIR_IGNORE_NULL_CHECK)) {
-    return NULL;
+MIR* ArmMir2Lir::SpecialIPut(BasicBlock** bb, MIR* mir, const InlineMethod& special) {
+  // FastInstance() already checked by DexFileMethodInliner.
+  const InlineIGetIPutData& data = special.d.ifield_data;
+  if (data.method_is_static || data.object_arg != 0) {
+    return NULL;  // The object is not "this" and has to be null-checked.
   }
+
+  OpSize size = static_cast<OpSize>(data.op_size);
+  DCHECK_NE(data.op_size, kDouble);  // The inliner doesn't distinguish kDouble, uses kLong.
+  bool long_or_double = (data.op_size == kLong);
+  bool is_object = data.is_object;
+
+  // TODO: Generate the method using only the data in special.
   RegLocation rl_src;
   RegLocation rl_obj;
   LockLiveArgs(mir);
@@ -174,7 +183,7 @@
   rl_src = ArmMir2Lir::ArgLoc(rl_src);
   rl_obj = ArmMir2Lir::ArgLoc(rl_obj);
   // Reject if source is split across registers & frame
-  if (rl_obj.location == kLocInvalid) {
+  if (rl_src.location == kLocInvalid) {
     ResetRegPool();
     return NULL;
   }
@@ -182,6 +191,7 @@
   ArmMir2Lir::GenPrintLabel(mir);
   rl_obj = LoadArg(rl_obj);
   rl_src = LoadArg(rl_src);
+  uint32_t field_idx = mir->dalvikInsn.vC;
   GenIPut(field_idx, mir->optimization_flags, size, rl_src, rl_obj, long_or_double, is_object);
   return GetNextMir(bb, mir);
 }
@@ -219,8 +229,6 @@
  */
 void ArmMir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir,
                                 const InlineMethod& special) {
-  // TODO: Generate the method using only the data in special. (Requires FastInstance() field
-  // validation in DexFileMethodInliner::AnalyseIGetMethod()/AnalyseIPutMethod().)
   DCHECK(special.flags & kInlineSpecial);
   current_dalvik_offset_ = mir->offset;
   MIR* next_mir = NULL;
@@ -231,30 +239,17 @@
       break;
     case kInlineOpConst:
       ArmMir2Lir::GenPrintLabel(mir);
-      LoadConstant(rARM_RET0, special.data);
+      LoadConstant(rARM_RET0, static_cast<int>(special.d.data));
       next_mir = GetNextMir(&bb, mir);
       break;
-    case kInlineOpIGet: {
-      InlineIGetIPutData data;
-      data.data = special.data;
-      OpSize op_size = static_cast<OpSize>(data.d.op_size);
-      DCHECK_NE(data.d.op_size, kDouble);  // The inliner doesn't distinguish kDouble, uses kLong.
-      bool long_or_double = (data.d.op_size == kLong);
-      bool is_object = data.d.is_object;
-      next_mir = SpecialIGet(&bb, mir, op_size, long_or_double, is_object);
+    case kInlineOpIGet:
+      next_mir = SpecialIGet(&bb, mir, special);
       break;
-    }
-    case kInlineOpIPut: {
-      InlineIGetIPutData data;
-      data.data = special.data;
-      OpSize op_size = static_cast<OpSize>(data.d.op_size);
-      DCHECK_NE(data.d.op_size, kDouble);  // The inliner doesn't distinguish kDouble, uses kLong.
-      bool long_or_double = (data.d.op_size == kLong);
-      bool is_object = data.d.is_object;
-      next_mir = SpecialIPut(&bb, mir, op_size, long_or_double, is_object);
+    case kInlineOpIPut:
+      next_mir = SpecialIPut(&bb, mir, special);
       break;
-    }
     case kInlineOpReturnArg:
+      // TODO: Generate the method using only the data in special.
       next_mir = SpecialIdentity(mir);
       break;
     default:
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 0ed4576..598da89 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -190,8 +190,8 @@
     RegLocation LoadArg(RegLocation loc);
     void LockLiveArgs(MIR* mir);
     MIR* GetNextMir(BasicBlock** p_bb, MIR* mir);
-    MIR* SpecialIGet(BasicBlock** bb, MIR* mir, OpSize size, bool long_or_double, bool is_object);
-    MIR* SpecialIPut(BasicBlock** bb, MIR* mir, OpSize size, bool long_or_double, bool is_object);
+    MIR* SpecialIGet(BasicBlock** bb, MIR* mir, const InlineMethod& special);
+    MIR* SpecialIPut(BasicBlock** bb, MIR* mir, const InlineMethod& special);
     MIR* SpecialIdentity(MIR* mir);
     LIR* LoadFPConstantValue(int r_dest, int value);
     void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 5e0fed7..05eb360 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1186,4 +1186,37 @@
 void Mir2Lir::AddSlowPath(LIRSlowPath* slowpath) {
   slow_paths_.Insert(slowpath);
 }
+
+void Mir2Lir::LoadCodeAddress(int dex_method_index, InvokeType type, SpecialTargetRegister symbolic_reg) {
+  LIR* data_target = ScanLiteralPool(code_literal_list_, dex_method_index, 0);
+  if (data_target == NULL) {
+    data_target = AddWordData(&code_literal_list_, dex_method_index);
+    data_target->operands[1] = type;
+  }
+  LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg), data_target);
+  AppendLIR(load_pc_rel);
+  DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target);
+}
+
+void Mir2Lir::LoadMethodAddress(int dex_method_index, InvokeType type, SpecialTargetRegister symbolic_reg) {
+  LIR* data_target = ScanLiteralPool(method_literal_list_, dex_method_index, 0);
+  if (data_target == NULL) {
+    data_target = AddWordData(&method_literal_list_, dex_method_index);
+    data_target->operands[1] = type;
+  }
+  LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg), data_target);
+  AppendLIR(load_pc_rel);
+  DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target);
+}
+
+void Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) {
+  // Use the literal pool and a PC-relative load from a data word.
+  LIR* data_target = ScanLiteralPool(class_literal_list_, type_idx, 0);
+  if (data_target == nullptr) {
+    data_target = AddWordData(&class_literal_list_, type_idx);
+  }
+  LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg), data_target);
+  AppendLIR(load_pc_rel);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 0937be3..389dd9a 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -24,11 +24,27 @@
 #include "dex/mir_graph.h"
 #include "dex_instruction.h"
 #include "dex_instruction-inl.h"
+#include "verifier/method_verifier.h"
+#include "verifier/method_verifier-inl.h"
 
 #include "dex_file_method_inliner.h"
 
 namespace art {
 
+namespace {  // anonymous namespace
+
+constexpr uint8_t kIGetIPutOpSizes[] = {
+    kWord,          // IGET, IPUT
+    kLong,          // IGET_WIDE, IPUT_WIDE
+    kWord,          // IGET_OBJECT, IPUT_OBJECT
+    kSignedByte,    // IGET_BOOLEAN, IPUT_BOOLEAN
+    kSignedByte,    // IGET_BYTE, IPUT_BYTE
+    kUnsignedHalf,  // IGET_CHAR, IPUT_CHAR
+    kSignedHalf,    // IGET_SHORT, IPUT_SHORT
+};
+
+}  // anonymous namespace
+
 const uint32_t DexFileMethodInliner::kIndexUnresolved;
 const char* const DexFileMethodInliner::kClassCacheNames[] = {
     "Z",                       // kClassCacheBoolean
@@ -107,6 +123,8 @@
     { kClassCacheShort, 1, { kClassCacheShort } },
     // kProtoCacheD_D
     { kClassCacheDouble, 1, { kClassCacheDouble } },
+    // kProtoCacheF_F
+    { kClassCacheFloat, 1, { kClassCacheFloat } },
     // kProtoCacheD_J
     { kClassCacheLong, 1, { kClassCacheDouble } },
     // kProtoCacheJ_D
@@ -167,7 +185,7 @@
 
 const DexFileMethodInliner::IntrinsicDef DexFileMethodInliner::kIntrinsicMethods[] = {
 #define INTRINSIC(c, n, p, o, d) \
-    { { kClassCache ## c, kNameCache ## n, kProtoCache ## p }, { o, kInlineIntrinsic, d } }
+    { { kClassCache ## c, kNameCache ## n, kProtoCache ## p }, { o, kInlineIntrinsic, { d } } }
 
     INTRINSIC(JavaLangDouble, DoubleToRawLongBits, D_J, kIntrinsicDoubleCvt, 0),
     INTRINSIC(JavaLangDouble, LongBitsToDouble, J_D, kIntrinsicDoubleCvt, 0),
@@ -182,6 +200,10 @@
     INTRINSIC(JavaLangStrictMath, Abs, I_I, kIntrinsicAbsInt, 0),
     INTRINSIC(JavaLangMath,       Abs, J_J, kIntrinsicAbsLong, 0),
     INTRINSIC(JavaLangStrictMath, Abs, J_J, kIntrinsicAbsLong, 0),
+    INTRINSIC(JavaLangMath,       Abs, F_F, kIntrinsicAbsFloat, 0),
+    INTRINSIC(JavaLangStrictMath, Abs, F_F, kIntrinsicAbsFloat, 0),
+    INTRINSIC(JavaLangMath,       Abs, D_D, kIntrinsicAbsDouble, 0),
+    INTRINSIC(JavaLangStrictMath, Abs, D_D, kIntrinsicAbsDouble, 0),
     INTRINSIC(JavaLangMath,       Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
     INTRINSIC(JavaLangStrictMath, Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
     INTRINSIC(JavaLangMath,       Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
@@ -248,57 +270,58 @@
 DexFileMethodInliner::~DexFileMethodInliner() {
 }
 
-bool DexFileMethodInliner::AnalyseMethodCode(uint32_t method_idx,
-                                             const DexFile::CodeItem* code_item) {
+bool DexFileMethodInliner::AnalyseMethodCode(verifier::MethodVerifier* verifier) {
   // We currently support only plain return or 2-instruction methods.
 
+  const DexFile::CodeItem* code_item = verifier->CodeItem();
   DCHECK_NE(code_item->insns_size_in_code_units_, 0u);
   const Instruction* instruction = Instruction::At(code_item->insns_);
   Instruction::Code opcode = instruction->Opcode();
 
+  InlineMethod method;
+  bool success;
   switch (opcode) {
     case Instruction::RETURN_VOID:
-      return AddInlineMethod(method_idx, kInlineOpNop, kInlineSpecial, 0);
+      method.opcode = kInlineOpNop;
+      method.flags = kInlineSpecial;
+      method.d.data = 0u;
+      success = true;
+      break;
     case Instruction::RETURN:
     case Instruction::RETURN_OBJECT:
-      return AnalyseReturnMethod(method_idx, code_item, kWord);
     case Instruction::RETURN_WIDE:
-      return AnalyseReturnMethod(method_idx, code_item, kLong);
+      success = AnalyseReturnMethod(code_item, &method);
+      break;
     case Instruction::CONST:
     case Instruction::CONST_4:
     case Instruction::CONST_16:
     case Instruction::CONST_HIGH16:
       // TODO: Support wide constants (RETURN_WIDE).
-      return AnalyseConstMethod(method_idx, code_item);
+      success = AnalyseConstMethod(code_item, &method);
+      break;
     case Instruction::IGET:
-      return AnalyseIGetMethod(method_idx, code_item, kWord, false);
     case Instruction::IGET_OBJECT:
-      return AnalyseIGetMethod(method_idx, code_item, kWord, true);
     case Instruction::IGET_BOOLEAN:
     case Instruction::IGET_BYTE:
-      return AnalyseIGetMethod(method_idx, code_item, kSignedByte, false);
     case Instruction::IGET_CHAR:
-      return AnalyseIGetMethod(method_idx, code_item, kUnsignedHalf, false);
     case Instruction::IGET_SHORT:
-      return AnalyseIGetMethod(method_idx, code_item, kSignedHalf, false);
     case Instruction::IGET_WIDE:
-      return AnalyseIGetMethod(method_idx, code_item, kLong, false);
+      success = AnalyseIGetMethod(verifier, &method);
+      break;
     case Instruction::IPUT:
-      return AnalyseIPutMethod(method_idx, code_item, kWord, false);
     case Instruction::IPUT_OBJECT:
-      return AnalyseIPutMethod(method_idx, code_item, kWord, true);
     case Instruction::IPUT_BOOLEAN:
     case Instruction::IPUT_BYTE:
-      return AnalyseIPutMethod(method_idx, code_item, kSignedByte, false);
     case Instruction::IPUT_CHAR:
-      return AnalyseIPutMethod(method_idx, code_item, kUnsignedHalf, false);
     case Instruction::IPUT_SHORT:
-      return AnalyseIPutMethod(method_idx, code_item, kSignedHalf, false);
     case Instruction::IPUT_WIDE:
-      return AnalyseIPutMethod(method_idx, code_item, kLong, false);
+      success = AnalyseIPutMethod(verifier, &method);
+      break;
     default:
-      return false;
-    }
+      success = false;
+      break;
+  }
+  return success && AddInlineMethod(verifier->GetMethodReference().dex_method_index, method);
 }
 
 bool DexFileMethodInliner::IsIntrinsic(uint32_t method_index) {
@@ -323,13 +346,17 @@
     case kIntrinsicFloatCvt:
       return backend->GenInlinedFloatCvt(info);
     case kIntrinsicReverseBytes:
-      return backend->GenInlinedReverseBytes(info, static_cast<OpSize>(intrinsic.data));
+      return backend->GenInlinedReverseBytes(info, static_cast<OpSize>(intrinsic.d.data));
     case kIntrinsicAbsInt:
       return backend->GenInlinedAbsInt(info);
     case kIntrinsicAbsLong:
       return backend->GenInlinedAbsLong(info);
+    case kIntrinsicAbsFloat:
+      return backend->GenInlinedAbsFloat(info);
+    case kIntrinsicAbsDouble:
+      return backend->GenInlinedAbsDouble(info);
     case kIntrinsicMinMaxInt:
-      return backend->GenInlinedMinMaxInt(info, intrinsic.data & kIntrinsicFlagMin);
+      return backend->GenInlinedMinMaxInt(info, intrinsic.d.data & kIntrinsicFlagMin);
     case kIntrinsicSqrt:
       return backend->GenInlinedSqrt(info);
     case kIntrinsicCharAt:
@@ -337,26 +364,27 @@
     case kIntrinsicCompareTo:
       return backend->GenInlinedStringCompareTo(info);
     case kIntrinsicIsEmptyOrLength:
-      return backend->GenInlinedStringIsEmptyOrLength(info, intrinsic.data & kIntrinsicFlagIsEmpty);
+      return backend->GenInlinedStringIsEmptyOrLength(
+          info, intrinsic.d.data & kIntrinsicFlagIsEmpty);
     case kIntrinsicIndexOf:
-      return backend->GenInlinedIndexOf(info, intrinsic.data & kIntrinsicFlagBase0);
+      return backend->GenInlinedIndexOf(info, intrinsic.d.data & kIntrinsicFlagBase0);
     case kIntrinsicCurrentThread:
       return backend->GenInlinedCurrentThread(info);
     case kIntrinsicPeek:
-      return backend->GenInlinedPeek(info, static_cast<OpSize>(intrinsic.data));
+      return backend->GenInlinedPeek(info, static_cast<OpSize>(intrinsic.d.data));
     case kIntrinsicPoke:
-      return backend->GenInlinedPoke(info, static_cast<OpSize>(intrinsic.data));
+      return backend->GenInlinedPoke(info, static_cast<OpSize>(intrinsic.d.data));
     case kIntrinsicCas:
-      return backend->GenInlinedCas(info, intrinsic.data & kIntrinsicFlagIsLong,
-                                    intrinsic.data & kIntrinsicFlagIsObject);
+      return backend->GenInlinedCas(info, intrinsic.d.data & kIntrinsicFlagIsLong,
+                                    intrinsic.d.data & kIntrinsicFlagIsObject);
     case kIntrinsicUnsafeGet:
-      return backend->GenInlinedUnsafeGet(info, intrinsic.data & kIntrinsicFlagIsLong,
-                                          intrinsic.data & kIntrinsicFlagIsVolatile);
+      return backend->GenInlinedUnsafeGet(info, intrinsic.d.data & kIntrinsicFlagIsLong,
+                                          intrinsic.d.data & kIntrinsicFlagIsVolatile);
     case kIntrinsicUnsafePut:
-      return backend->GenInlinedUnsafePut(info, intrinsic.data & kIntrinsicFlagIsLong,
-                                          intrinsic.data & kIntrinsicFlagIsObject,
-                                          intrinsic.data & kIntrinsicFlagIsVolatile,
-                                          intrinsic.data & kIntrinsicFlagIsOrdered);
+      return backend->GenInlinedUnsafePut(info, intrinsic.d.data & kIntrinsicFlagIsLong,
+                                          intrinsic.d.data & kIntrinsicFlagIsObject,
+                                          intrinsic.d.data & kIntrinsicFlagIsVolatile,
+                                          intrinsic.d.data & kIntrinsicFlagIsOrdered);
     default:
       LOG(FATAL) << "Unexpected intrinsic opcode: " << intrinsic.opcode;
       return false;  // avoid warning "control reaches end of non-void function"
@@ -505,12 +533,10 @@
   dex_file_ = dex_file;
 }
 
-bool DexFileMethodInliner::AddInlineMethod(int32_t method_idx, InlineMethodOpcode opcode,
-                                           InlineMethodFlags flags, uint32_t data) {
+bool DexFileMethodInliner::AddInlineMethod(int32_t method_idx, const InlineMethod& method) {
   WriterMutexLock mu(Thread::Current(), lock_);
   if (LIKELY(inline_methods_.find(method_idx) == inline_methods_.end())) {
-    InlineMethod im = {opcode, flags, data};
-    inline_methods_.Put(method_idx, im);
+    inline_methods_.Put(method_idx, method);
     return true;
   } else {
     if (PrettyMethod(method_idx, *dex_file_) == "int java.lang.String.length()") {
@@ -522,26 +548,30 @@
   }
 }
 
-bool DexFileMethodInliner::AnalyseReturnMethod(int32_t method_idx,
-                                               const DexFile::CodeItem* code_item, OpSize size) {
+bool DexFileMethodInliner::AnalyseReturnMethod(const DexFile::CodeItem* code_item,
+                                               InlineMethod* result) {
   const Instruction* return_instruction = Instruction::At(code_item->insns_);
-  if (return_instruction->Opcode() == Instruction::RETURN_VOID) {
-    return AddInlineMethod(method_idx, kInlineOpNop, kInlineSpecial, 0);
-  }
+  Instruction::Code return_opcode = return_instruction->Opcode();
+  uint16_t size = (return_opcode == Instruction::RETURN_WIDE) ? kLong : kWord;
+  uint16_t is_object = (return_opcode == Instruction::RETURN_OBJECT) ? 1u : 0u;
   uint32_t reg = return_instruction->VRegA_11x();
   uint32_t arg_start = code_item->registers_size_ - code_item->ins_size_;
   DCHECK_GE(reg, arg_start);
   DCHECK_LT(size == kLong ? reg + 1 : reg, code_item->registers_size_);
 
-  InlineReturnArgData data;
-  data.d.arg = reg - arg_start;
-  data.d.op_size = size;
-  data.d.reserved = 0;
-  return AddInlineMethod(method_idx, kInlineOpReturnArg, kInlineSpecial, data.data);
+  result->opcode = kInlineOpReturnArg;
+  result->flags = kInlineSpecial;
+  InlineReturnArgData* data = &result->d.return_data;
+  data->arg = reg - arg_start;
+  data->op_size = size;
+  data->is_object = is_object;
+  data->reserved = 0u;
+  data->reserved2 = 0u;
+  return true;
 }
 
-bool DexFileMethodInliner::AnalyseConstMethod(int32_t method_idx,
-                                              const DexFile::CodeItem* code_item) {
+bool DexFileMethodInliner::AnalyseConstMethod(const DexFile::CodeItem* code_item,
+                                              InlineMethod* result) {
   const Instruction* instruction = Instruction::At(code_item->insns_);
   const Instruction* return_instruction = instruction->Next();
   Instruction::Code return_opcode = return_instruction->Opcode();
@@ -566,13 +596,20 @@
   if (return_opcode == Instruction::RETURN_OBJECT && vB != 0) {
     return false;  // Returning non-null reference constant?
   }
-  return AddInlineMethod(method_idx, kInlineOpConst, kInlineSpecial, vB);
+  result->opcode = kInlineOpConst;
+  result->flags = kInlineSpecial;
+  result->d.data = static_cast<uint64_t>(vB);
+  return true;
 }
 
-bool DexFileMethodInliner::AnalyseIGetMethod(int32_t method_idx, const DexFile::CodeItem* code_item,
-                                             OpSize size, bool is_object) {
+bool DexFileMethodInliner::AnalyseIGetMethod(verifier::MethodVerifier* verifier,
+                                             InlineMethod* result) {
+  const DexFile::CodeItem* code_item = verifier->CodeItem();
   const Instruction* instruction = Instruction::At(code_item->insns_);
   Instruction::Code opcode = instruction->Opcode();
+  DCHECK_LT(static_cast<size_t>(opcode - Instruction::IGET), arraysize(kIGetIPutOpSizes));
+  uint16_t size = kIGetIPutOpSizes[opcode - Instruction::IGET];
+
   const Instruction* return_instruction = instruction->Next();
   Instruction::Code return_opcode = return_instruction->Opcode();
   if (!(return_opcode == Instruction::RETURN && size != kLong) &&
@@ -585,61 +622,74 @@
   DCHECK_LT(return_opcode == Instruction::RETURN_WIDE ? return_reg + 1 : return_reg,
             code_item->registers_size_);
 
-  uint32_t vA, vB, vC;
-  uint64_t dummy_wide;
-  instruction->Decode(vA, vB, dummy_wide, vC, nullptr);
+  uint32_t dst_reg = instruction->VRegA_22c();
+  uint32_t object_reg = instruction->VRegB_22c();
+  uint32_t field_idx = instruction->VRegC_22c();
   uint32_t arg_start = code_item->registers_size_ - code_item->ins_size_;
-  DCHECK_GE(vB, arg_start);
-  DCHECK_LT(vB, code_item->registers_size_);
-  DCHECK_LT(size == kLong ? vA + 1 : vA, code_item->registers_size_);
-  if (vA != return_reg) {
-    return false;  // Not returning the value retrieved by iget?
+  DCHECK_GE(object_reg, arg_start);
+  DCHECK_LT(object_reg, code_item->registers_size_);
+  DCHECK_LT(size == kLong ? dst_reg + 1 : dst_reg, code_item->registers_size_);
+  if (dst_reg != return_reg) {
+    return false;  // Not returning the value retrieved by IGET?
   }
 
-  // TODO: Check that the field is FastInstance().
+  if (!CompilerDriver::ComputeSpecialAccessorInfo(field_idx, false, verifier,
+                                                  &result->d.ifield_data)) {
+    return false;
+  }
 
-  InlineIGetIPutData data;
-  data.d.field = vC;
-  data.d.op_size = size;
-  data.d.is_object = is_object;
-  data.d.object_arg = vB - arg_start;  // Allow iget on any register, not just "this"
-  data.d.src_arg = 0;
-  data.d.reserved = 0;
-  return AddInlineMethod(method_idx, kInlineOpIGet, kInlineSpecial, data.data);
+  result->opcode = kInlineOpIGet;
+  result->flags = kInlineSpecial;
+  InlineIGetIPutData* data = &result->d.ifield_data;
+  data->op_size = size;
+  data->is_object = (opcode == Instruction::IGET_OBJECT) ? 1u : 0u;
+  data->object_arg = object_reg - arg_start;  // Allow IGET on any register, not just "this".
+  data->src_arg = 0;
+  data->reserved = 0;
+  return true;
 }
 
-bool DexFileMethodInliner::AnalyseIPutMethod(int32_t method_idx, const DexFile::CodeItem* code_item,
-                                             OpSize size, bool is_object) {
+bool DexFileMethodInliner::AnalyseIPutMethod(verifier::MethodVerifier* verifier,
+                                             InlineMethod* result) {
+  const DexFile::CodeItem* code_item = verifier->CodeItem();
   const Instruction* instruction = Instruction::At(code_item->insns_);
+  Instruction::Code opcode = instruction->Opcode();
+  DCHECK_LT(static_cast<size_t>(opcode - Instruction::IPUT), arraysize(kIGetIPutOpSizes));
+  uint16_t size = kIGetIPutOpSizes[opcode - Instruction::IPUT];
+
   const Instruction* return_instruction = instruction->Next();
   if (return_instruction->Opcode() != Instruction::RETURN_VOID) {
     // TODO: Support returning an argument.
     // This is needed by builder classes and generated accessor setters.
     //    builder.setX(value): iput value, this, fieldX; return-object this;
     //    object.access$nnn(value): iput value, this, fieldX; return value;
-    // Use InlineIGetIPutData::d::reserved to hold the information.
+    // Use InlineIGetIPutData::reserved to hold the information.
     return false;
   }
 
-  uint32_t vA, vB, vC;
-  uint64_t dummy_wide;
-  instruction->Decode(vA, vB, dummy_wide, vC, nullptr);
+  uint32_t src_reg = instruction->VRegA_22c();
+  uint32_t object_reg = instruction->VRegB_22c();
+  uint32_t field_idx = instruction->VRegC_22c();
   uint32_t arg_start = code_item->registers_size_ - code_item->ins_size_;
-  DCHECK_GE(vB, arg_start);
-  DCHECK_GE(vA, arg_start);
-  DCHECK_LT(vB, code_item->registers_size_);
-  DCHECK_LT(size == kLong ? vA + 1 : vA, code_item->registers_size_);
+  DCHECK_GE(object_reg, arg_start);
+  DCHECK_LT(object_reg, code_item->registers_size_);
+  DCHECK_GE(src_reg, arg_start);
+  DCHECK_LT(size == kLong ? src_reg + 1 : src_reg, code_item->registers_size_);
 
-  // TODO: Check that the field (vC) is FastInstance().
+  if (!CompilerDriver::ComputeSpecialAccessorInfo(field_idx, true, verifier,
+                                                  &result->d.ifield_data)) {
+    return false;
+  }
 
-  InlineIGetIPutData data;
-  data.d.field = vC;
-  data.d.op_size = size;
-  data.d.is_object = is_object;
-  data.d.object_arg = vB - arg_start;  // Allow iput on any register, not just "this"
-  data.d.src_arg = vA - arg_start;
-  data.d.reserved = 0;
-  return AddInlineMethod(method_idx, kInlineOpIPut, kInlineSpecial, data.data);
+  result->opcode = kInlineOpIPut;
+  result->flags = kInlineSpecial;
+  InlineIGetIPutData* data = &result->d.ifield_data;
+  data->op_size = size;
+  data->is_object = (opcode == Instruction::IPUT_OBJECT) ? 1u : 0u;
+  data->object_arg = object_reg - arg_start;  // Allow IPUT on any register, not just "this".
+  data->src_arg = src_reg - arg_start;
+  data->reserved = 0;
+  return true;
 }
 
 }  // namespace art
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index 6e81303..fb7528e 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -27,6 +27,10 @@
 
 namespace art {
 
+namespace verifier {
+class MethodVerifier;
+}  // namespace verifier
+
 class CallInfo;
 class Mir2Lir;
 
@@ -36,6 +40,8 @@
   kIntrinsicReverseBytes,
   kIntrinsicAbsInt,
   kIntrinsicAbsLong,
+  kIntrinsicAbsFloat,
+  kIntrinsicAbsDouble,
   kIntrinsicMinMaxInt,
   kIntrinsicSqrt,
   kIntrinsicCharAt,
@@ -62,13 +68,7 @@
   kInlineSpecial       = 0x0002,
 };
 
-struct InlineMethod {
-  InlineMethodOpcode opcode;
-  InlineMethodFlags flags;
-  uint32_t data;
-};
-
-// IntrinsicFlags are stored in InlineMethod::data
+// IntrinsicFlags are stored in InlineMethod::d::raw_data
 enum IntrinsicFlags {
   kIntrinsicFlagNone = 0,
 
@@ -97,28 +97,37 @@
 COMPILE_ASSERT(kWord < 8 && kLong < 8 && kSingle < 8 && kDouble < 8 && kUnsignedHalf < 8 &&
                kSignedHalf < 8 && kUnsignedByte < 8 && kSignedByte < 8, op_size_field_too_narrow);
 
-union InlineIGetIPutData {
-  uint32_t data;
-  struct {
-    uint16_t field;
-    uint32_t op_size : 3;  // OpSize
-    uint32_t is_object : 1;
-    uint32_t object_arg : 4;
-    uint32_t src_arg : 4;  // iput only
-    uint32_t reserved : 4;
-  } d;
+struct InlineIGetIPutData {
+  uint16_t op_size : 3;  // OpSize
+  uint16_t is_object : 1;
+  uint16_t object_arg : 4;
+  uint16_t src_arg : 4;  // iput only
+  uint16_t method_is_static : 1;
+  uint16_t reserved : 3;
+  uint16_t field_idx;
+  uint32_t is_volatile : 1;
+  uint32_t field_offset : 31;
 };
-COMPILE_ASSERT(sizeof(InlineIGetIPutData) == sizeof(uint32_t), InvalidSizeOfInlineIGetIPutData);
+COMPILE_ASSERT(sizeof(InlineIGetIPutData) == sizeof(uint64_t), InvalidSizeOfInlineIGetIPutData);
 
-union InlineReturnArgData {
-  uint32_t data;
-  struct {
-    uint16_t arg;
-    uint32_t op_size : 3;  // OpSize
-    uint32_t reserved : 13;
+struct InlineReturnArgData {
+  uint16_t arg;
+  uint16_t op_size : 3;  // OpSize
+  uint16_t is_object : 1;
+  uint16_t reserved : 12;
+  uint32_t reserved2;
+};
+COMPILE_ASSERT(sizeof(InlineReturnArgData) == sizeof(uint64_t), InvalidSizeOfInlineReturnArgData);
+
+struct InlineMethod {
+  InlineMethodOpcode opcode;
+  InlineMethodFlags flags;
+  union {
+    uint64_t data;
+    InlineIGetIPutData ifield_data;
+    InlineReturnArgData return_data;
   } d;
 };
-COMPILE_ASSERT(sizeof(InlineReturnArgData) == sizeof(uint32_t), InvalidSizeOfInlineReturnArgData);
 
 /**
  * Handles inlining of methods from a particular DexFile.
@@ -144,8 +153,8 @@
      * @param method_idx the index of the inlining candidate.
      * @param code_item a previously verified code item of the method.
      */
-    bool AnalyseMethodCode(uint32_t method_idx,
-                           const DexFile::CodeItem* code_item) LOCKS_EXCLUDED(lock_);
+    bool AnalyseMethodCode(verifier::MethodVerifier* verifier)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_);
 
     /**
      * Check whether a particular method index corresponds to an intrinsic function.
@@ -261,6 +270,7 @@
       kProtoCacheJ_J,
       kProtoCacheS_S,
       kProtoCacheD_D,
+      kProtoCacheF_F,
       kProtoCacheD_J,
       kProtoCacheJ_D,
       kProtoCacheF_I,
@@ -369,17 +379,14 @@
 
     friend class DexFileToMethodInlinerMap;
 
-    bool AddInlineMethod(int32_t method_idx, InlineMethodOpcode opcode,
-                         InlineMethodFlags flags, uint32_t data) LOCKS_EXCLUDED(lock_);
+    bool AddInlineMethod(int32_t method_idx, const InlineMethod& method) LOCKS_EXCLUDED(lock_);
 
-    bool AnalyseReturnMethod(int32_t method_idx, const DexFile::CodeItem* code_item,
-                             OpSize size) LOCKS_EXCLUDED(lock_);
-    bool AnalyseConstMethod(int32_t method_idx, const DexFile::CodeItem* code_item)
-                            LOCKS_EXCLUDED(lock_);
-    bool AnalyseIGetMethod(int32_t method_idx, const DexFile::CodeItem* code_item,
-                           OpSize size, bool is_object) LOCKS_EXCLUDED(lock_);
-    bool AnalyseIPutMethod(int32_t method_idx, const DexFile::CodeItem* code_item,
-                           OpSize size, bool is_object) LOCKS_EXCLUDED(lock_);
+    static bool AnalyseReturnMethod(const DexFile::CodeItem* code_item, InlineMethod* result);
+    static bool AnalyseConstMethod(const DexFile::CodeItem* code_item, InlineMethod* result);
+    static bool AnalyseIGetMethod(verifier::MethodVerifier* verifier, InlineMethod* result)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+    static bool AnalyseIPutMethod(verifier::MethodVerifier* verifier, InlineMethod* result)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
     ReaderWriterMutex lock_;
     /*
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index c59f3b8..0533fbf 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -220,13 +220,7 @@
                                    &is_type_initialized, &use_direct_type_ptr, &direct_type_ptr)) {
       // The fast path.
       if (!use_direct_type_ptr) {
-        // Use the literal pool and a PC-relative load from a data word.
-        LIR* data_target = ScanLiteralPool(class_literal_list_, type_idx, 0);
-        if (data_target == nullptr) {
-          data_target = AddWordData(&class_literal_list_, type_idx);
-        }
-        LIR* load_pc_rel = OpPcRelLoad(TargetReg(kArg0), data_target);
-        AppendLIR(load_pc_rel);
+        LoadClassType(type_idx, kArg0);
         func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocArrayResolved);
         CallRuntimeHelperRegMethodRegLocation(func_offset, TargetReg(kArg0), rl_src, true);
       } else {
@@ -994,13 +988,7 @@
                                    &is_type_initialized, &use_direct_type_ptr, &direct_type_ptr)) {
       // The fast path.
       if (!use_direct_type_ptr) {
-        // Use the literal pool and a PC-relative load from a data word.
-        LIR* data_target = ScanLiteralPool(class_literal_list_, type_idx, 0);
-        if (data_target == nullptr) {
-          data_target = AddWordData(&class_literal_list_, type_idx);
-        }
-        LIR* load_pc_rel = OpPcRelLoad(TargetReg(kArg0), data_target);
-        AppendLIR(load_pc_rel);
+        LoadClassType(type_idx, kArg0);
         if (!is_type_initialized) {
           func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObjectResolved);
           CallRuntimeHelperRegMethod(func_offset, TargetReg(kArg0), true);
@@ -1100,6 +1088,9 @@
                                          bool can_assume_type_is_in_dex_cache,
                                          uint32_t type_idx, RegLocation rl_dest,
                                          RegLocation rl_src) {
+  // X86 has its own implementation.
+  DCHECK_NE(cu_->instruction_set, kX86);
+
   FlushAllRegs();
   // May generate a call - use explicit registers
   LockCallTemps();
@@ -1181,15 +1172,10 @@
         LoadConstant(rl_result.low_reg, 1);     // assume true
         branchover = OpCmpBranch(kCondEq, TargetReg(kArg1), TargetReg(kArg2), NULL);
       }
-      if (cu_->instruction_set != kX86) {
-        int r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(pInstanceofNonTrivial));
-        OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));    // .ne case - arg0 <= class
-        OpReg(kOpBlx, r_tgt);    // .ne case: helper(class, ref->class)
-        FreeTemp(r_tgt);
-      } else {
-        OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));
-        OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(pInstanceofNonTrivial));
-      }
+      int r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(pInstanceofNonTrivial));
+      OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));    // .ne case - arg0 <= class
+      OpReg(kOpBlx, r_tgt);    // .ne case: helper(class, ref->class)
+      FreeTemp(r_tgt);
     }
   }
   // TODO: only clobber when type isn't final?
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 6aaad66..ee61c8b 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -384,31 +384,15 @@
         if (cu->instruction_set != kX86) {
           cg->LoadConstant(cg->TargetReg(kInvokeTgt), direct_code);
         }
-      } else {
+      } else if (cu->instruction_set != kX86) {
         CHECK_EQ(cu->dex_file, target_method.dex_file);
-        LIR* data_target = cg->ScanLiteralPool(cg->code_literal_list_,
-                                               target_method.dex_method_index, 0);
-        if (data_target == NULL) {
-          data_target = cg->AddWordData(&cg->code_literal_list_, target_method.dex_method_index);
-          data_target->operands[1] = type;
-        }
-        LIR* load_pc_rel = cg->OpPcRelLoad(cg->TargetReg(kInvokeTgt), data_target);
-        cg->AppendLIR(load_pc_rel);
-        DCHECK_EQ(cu->instruction_set, kThumb2) << reinterpret_cast<void*>(data_target);
+        cg->LoadCodeAddress(target_method.dex_method_index, type, kInvokeTgt);
       }
       if (direct_method != static_cast<unsigned int>(-1)) {
         cg->LoadConstant(cg->TargetReg(kArg0), direct_method);
       } else {
         CHECK_EQ(cu->dex_file, target_method.dex_file);
-        LIR* data_target = cg->ScanLiteralPool(cg->method_literal_list_,
-                                               target_method.dex_method_index, 0);
-        if (data_target == NULL) {
-          data_target = cg->AddWordData(&cg->method_literal_list_, target_method.dex_method_index);
-          data_target->operands[1] = type;
-        }
-        LIR* load_pc_rel = cg->OpPcRelLoad(cg->TargetReg(kArg0), data_target);
-        cg->AppendLIR(load_pc_rel);
-        DCHECK_EQ(cu->instruction_set, kThumb2) << reinterpret_cast<void*>(data_target);
+        cg->LoadMethodAddress(target_method.dex_method_index, type, kArg0);
       }
       break;
     default:
@@ -427,18 +411,10 @@
       if (direct_code != 0) {
         if (direct_code != static_cast<unsigned int>(-1)) {
           cg->LoadConstant(cg->TargetReg(kInvokeTgt), direct_code);
-        } else {
+        } else if (cu->instruction_set != kX86) {
           CHECK_EQ(cu->dex_file, target_method.dex_file);
           CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
-          LIR* data_target = cg->ScanLiteralPool(cg->code_literal_list_,
-                                                 target_method.dex_method_index, 0);
-          if (data_target == NULL) {
-            data_target = cg->AddWordData(&cg->code_literal_list_, target_method.dex_method_index);
-            data_target->operands[1] = type;
-          }
-          LIR* load_pc_rel = cg->OpPcRelLoad(cg->TargetReg(kInvokeTgt), data_target);
-          cg->AppendLIR(load_pc_rel);
-          DCHECK_EQ(cu->instruction_set, kThumb2) << reinterpret_cast<void*>(data_target);
+          cg->LoadCodeAddress(target_method.dex_method_index, type, kInvokeTgt);
         }
       }
       break;
@@ -1001,7 +977,10 @@
   RegLocation rl_obj = info->args[0];
   RegLocation rl_idx = info->args[1];
   rl_obj = LoadValue(rl_obj, kCoreReg);
-  rl_idx = LoadValue(rl_idx, kCoreReg);
+  // X86 wants to avoid putting a constant index into a register.
+  if (!(cu_->instruction_set == kX86 && rl_idx.is_const)) {
+    rl_idx = LoadValue(rl_idx, kCoreReg);
+  }
   int reg_max;
   GenNullCheck(rl_obj.s_reg_low, rl_obj.low_reg, info->opt_flags);
   bool range_check = (!(info->opt_flags & MIR_IGNORE_RANGE_CHECK));
@@ -1025,29 +1004,43 @@
       FreeTemp(reg_max);
       OpCondBranch(kCondUge, launch_pad);
     }
+    OpRegImm(kOpAdd, reg_ptr, data_offset);
   } else {
     if (range_check) {
-      reg_max = AllocTemp();
-      LoadWordDisp(rl_obj.low_reg, count_offset, reg_max);
+      // On x86, we can compare to memory directly
       // Set up a launch pad to allow retry in case of bounds violation */
       launch_pad = RawLIR(0, kPseudoIntrinsicRetry, WrapPointer(info));
       intrinsic_launchpads_.Insert(launch_pad);
-      OpRegReg(kOpCmp, rl_idx.low_reg, reg_max);
-      FreeTemp(reg_max);
-      OpCondBranch(kCondUge, launch_pad);
+      if (rl_idx.is_const) {
+        OpCmpMemImmBranch(kCondUlt, INVALID_REG, rl_obj.low_reg, count_offset,
+                          mir_graph_->ConstantValue(rl_idx.orig_sreg), launch_pad);
+      } else {
+        OpRegMem(kOpCmp, rl_idx.low_reg, rl_obj.low_reg, count_offset);
+        OpCondBranch(kCondUge, launch_pad);
+      }
     }
     reg_off = AllocTemp();
     reg_ptr = AllocTemp();
     LoadWordDisp(rl_obj.low_reg, offset_offset, reg_off);
     LoadWordDisp(rl_obj.low_reg, value_offset, reg_ptr);
   }
-  OpRegImm(kOpAdd, reg_ptr, data_offset);
-  OpRegReg(kOpAdd, reg_off, rl_idx.low_reg);
+  if (rl_idx.is_const) {
+    OpRegImm(kOpAdd, reg_off, mir_graph_->ConstantValue(rl_idx.orig_sreg));
+  } else {
+    OpRegReg(kOpAdd, reg_off, rl_idx.low_reg);
+  }
   FreeTemp(rl_obj.low_reg);
-  FreeTemp(rl_idx.low_reg);
+  if (rl_idx.low_reg != INVALID_REG) {
+    FreeTemp(rl_idx.low_reg);
+  }
   RegLocation rl_dest = InlineTarget(info);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  LoadBaseIndexed(reg_ptr, reg_off, rl_result.low_reg, 1, kUnsignedHalf);
+  if (cu_->instruction_set != kX86) {
+    LoadBaseIndexed(reg_ptr, reg_off, rl_result.low_reg, 1, kUnsignedHalf);
+  } else {
+    LoadBaseIndexedDisp(reg_ptr, reg_off, 1, data_offset, rl_result.low_reg,
+                        INVALID_REG, kUnsignedHalf, INVALID_SREG);
+  }
   FreeTemp(reg_off);
   FreeTemp(reg_ptr);
   StoreValue(rl_dest, rl_result);
@@ -1094,7 +1087,7 @@
     return false;
   }
   RegLocation rl_src_i = info->args[0];
-  RegLocation rl_dest = InlineTarget(info);  // result reg
+  RegLocation rl_dest = (size == kLong) ? InlineTargetWide(info) : InlineTarget(info);  // result reg
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   if (size == kLong) {
     RegLocation rl_i = LoadValueWide(rl_src_i, kCoreReg);
@@ -1179,6 +1172,43 @@
   }
 }
 
+bool Mir2Lir::GenInlinedAbsFloat(CallInfo* info) {
+  if (cu_->instruction_set == kMips) {
+    // TODO - add Mips implementation
+    return false;
+  }
+  RegLocation rl_src = info->args[0];
+  rl_src = LoadValue(rl_src, kCoreReg);
+  RegLocation rl_dest = InlineTarget(info);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  int signMask = AllocTemp();
+  LoadConstant(signMask, 0x7fffffff);
+  OpRegRegReg(kOpAnd, rl_result.low_reg, rl_src.low_reg, signMask);
+  FreeTemp(signMask);
+  StoreValue(rl_dest, rl_result);
+  return true;
+}
+
+bool Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
+  if (cu_->instruction_set == kMips) {
+    // TODO - add Mips implementation
+    return false;
+  }
+  RegLocation rl_src = info->args[0];
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+  RegLocation rl_dest = InlineTargetWide(info);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  OpRegCopyWide(rl_result.low_reg, rl_result.high_reg, rl_src.low_reg, rl_src.high_reg);
+  FreeTemp(rl_src.low_reg);
+  FreeTemp(rl_src.high_reg);
+  int signMask = AllocTemp();
+  LoadConstant(signMask, 0x7fffffff);
+  OpRegReg(kOpAnd, rl_result.high_reg, signMask);
+  FreeTemp(signMask);
+  StoreValueWide(rl_dest, rl_result);
+  return true;
+}
+
 bool Mir2Lir::GenInlinedFloatCvt(CallInfo* info) {
   if (cu_->instruction_set == kMips) {
     // TODO - add Mips implementation
@@ -1308,7 +1338,7 @@
   RegLocation rl_src_obj = info->args[1];  // Object
   RegLocation rl_src_offset = info->args[2];  // long low
   rl_src_offset.wide = 0;  // ignore high half in info->args[3]
-  RegLocation rl_dest = InlineTarget(info);  // result reg
+  RegLocation rl_dest = is_long ? InlineTargetWide(info) : InlineTarget(info);  // result reg
   if (is_volatile) {
     GenMemBarrier(kLoadLoad);
   }
@@ -1436,8 +1466,15 @@
     call_inst = OpReg(kOpBlx, TargetReg(kInvokeTgt));
   } else {
     if (fast_path) {
-      call_inst = OpMem(kOpBlx, TargetReg(kArg0),
-                        mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
+      if (direct_code == static_cast<unsigned int>(-1)) {
+        // We can have the linker fixup a call relative.
+        call_inst =
+          reinterpret_cast<X86Mir2Lir*>(this)->CallWithLinkerFixup(
+              target_method.dex_method_index, info->type);
+      } else {
+        call_inst = OpMem(kOpBlx, TargetReg(kArg0),
+                          mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
+      }
     } else {
       ThreadOffset trampoline(-1);
       switch (info->type) {
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 3a68044..6115953 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -417,7 +417,7 @@
     bool EvaluateBranch(Instruction::Code opcode, int src1, int src2);
     bool IsInexpensiveConstant(RegLocation rl_src);
     ConditionCode FlipComparisonOrder(ConditionCode before);
-    void InstallLiteralPools();
+    virtual void InstallLiteralPools();
     void InstallSwitchTables();
     void InstallFillArrayData();
     bool VerifyCatchEntries();
@@ -663,6 +663,8 @@
     bool GenInlinedReverseBytes(CallInfo* info, OpSize size);
     bool GenInlinedAbsInt(CallInfo* info);
     bool GenInlinedAbsLong(CallInfo* info);
+    bool GenInlinedAbsFloat(CallInfo* info);
+    bool GenInlinedAbsDouble(CallInfo* info);
     bool GenInlinedFloatCvt(CallInfo* info);
     bool GenInlinedDoubleCvt(CallInfo* info);
     bool GenInlinedIndexOf(CallInfo* info, bool zero_based);
@@ -737,6 +739,34 @@
     void SpecialMIR2LIR(const InlineMethod& special);
     void MethodMIR2LIR();
 
+    /*
+     * @brief Load the address of the dex method into the register.
+     * @param dex_method_index The index of the method to be invoked.
+     * @param type How the method will be invoked.
+     * @param register that will contain the code address.
+     * @note register will be passed to TargetReg to get physical register.
+     */
+    void LoadCodeAddress(int dex_method_index, InvokeType type,
+                         SpecialTargetRegister symbolic_reg);
+
+    /*
+     * @brief Load the Method* of a dex method into the register.
+     * @param dex_method_index The index of the method to be invoked.
+     * @param type How the method will be invoked.
+     * @param register that will contain the code address.
+     * @note register will be passed to TargetReg to get physical register.
+     */
+    virtual void LoadMethodAddress(int dex_method_index, InvokeType type,
+                                   SpecialTargetRegister symbolic_reg);
+
+    /*
+     * @brief Load the Class* of a Dex Class type into the register.
+     * @param type How the method will be invoked.
+     * @param register that will contain the code address.
+     * @note register will be passed to TargetReg to get physical register.
+     */
+    virtual void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg);
+
     // Routines that work for the generic case, but may be overriden by target.
     /*
      * @brief Compare memory to immediate, and branch if condition true.
@@ -1046,13 +1076,13 @@
 
     void AddSlowPath(LIRSlowPath* slowpath);
 
-  private:
-    void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
-                                    bool type_known_abstract, bool use_declaring_class,
-                                    bool can_assume_type_is_in_dex_cache,
-                                    uint32_t type_idx, RegLocation rl_dest,
-                                    RegLocation rl_src);
+    virtual void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
+                                            bool type_known_abstract, bool use_declaring_class,
+                                            bool can_assume_type_is_in_dex_cache,
+                                            uint32_t type_idx, RegLocation rl_dest,
+                                            RegLocation rl_src);
 
+  private:
     void ClobberBody(RegisterInfo* p);
     void ResetDefBody(RegisterInfo* p) {
       p->def_start = NULL;
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index ae53ddb..321c6a7 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -354,6 +354,7 @@
   { kX86CallM, kCall, IS_BINARY_OP | IS_BRANCH | IS_LOAD | REG_USE0,        { 0,             0, 0xFF, 0,    0, 2, 0, 0 }, "CallM", "[!0r+!1d]" },
   { kX86CallA, kCall, IS_QUAD_OP   | IS_BRANCH | IS_LOAD | REG_USE01,       { 0,             0, 0xFF, 0,    0, 2, 0, 0 }, "CallA", "[!0r+!1r<<!2d+!3d]" },
   { kX86CallT, kCall, IS_UNARY_OP  | IS_BRANCH | IS_LOAD,                   { THREAD_PREFIX, 0, 0xFF, 0,    0, 2, 0, 0 }, "CallT", "fs:[!0d]" },
+  { kX86CallI, kCall, IS_UNARY_OP  | IS_BRANCH,                             { 0,             0, 0xE8, 0,    0, 0, 0, 4 }, "CallI", "!0d" },
   { kX86Ret,   kNullary, NO_OPERAND | IS_BRANCH,                            { 0,             0, 0xC3, 0,    0, 0, 0, 0 }, "Ret", "" },
 
   { kX86StartOfMethod, kMacro,  IS_UNARY_OP | SETS_CCODES,             { 0, 0, 0,    0, 0, 0, 0, 0 }, "StartOfMethod", "!0r" },
@@ -494,6 +495,7 @@
       }
     case kCall:
       switch (lir->opcode) {
+        case kX86CallI: return 5;  // opcode 0:disp
         case kX86CallR: return 2;  // opcode modrm
         case kX86CallM:  // lir operands - 0: base, 1: disp
           return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
@@ -985,6 +987,16 @@
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
+void X86Mir2Lir::EmitCallImmediate(const X86EncodingMap* entry, int disp) {
+  EmitPrefixAndOpcode(entry);
+  DCHECK_EQ(4, entry->skeleton.immediate_bytes);
+  code_buffer_.push_back(disp & 0xFF);
+  code_buffer_.push_back((disp >> 8) & 0xFF);
+  code_buffer_.push_back((disp >> 16) & 0xFF);
+  code_buffer_.push_back((disp >> 24) & 0xFF);
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+}
+
 void X86Mir2Lir::EmitCallThread(const X86EncodingMap* entry, int disp) {
   DCHECK_NE(entry->skeleton.prefix1, 0);
   EmitPrefixAndOpcode(entry);
@@ -1290,6 +1302,9 @@
         break;
       case kCall:
         switch (entry->opcode) {
+          case kX86CallI:  // lir operands - 0: disp
+            EmitCallImmediate(entry, lir->operands[0]);
+            break;
           case kX86CallM:  // lir operands - 0: base, 1: disp
             EmitCallMem(entry, lir->operands[0], lir->operands[1]);
             break;
@@ -1375,6 +1390,13 @@
  */
 void X86Mir2Lir::AssembleLIR() {
   cu_->NewTimingSplit("Assemble");
+
+  // We will remove the method address if we never ended up using it
+  if (store_method_addr_ && !store_method_addr_used_) {
+    setup_method_address_[0]->flags.is_nop = true;
+    setup_method_address_[1]->flags.is_nop = true;
+  }
+
   AssignOffsets();
   int assembler_retries = 0;
   /*
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 93875c9..7f646e0 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -93,6 +93,7 @@
     RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
     rl_method = LoadValue(rl_method, kCoreReg);
     start_of_method_reg = rl_method.low_reg;
+    store_method_addr_used_ = true;
   } else {
     start_of_method_reg = AllocTemp();
     NewLIR1(kX86StartOfMethod, start_of_method_reg);
@@ -155,6 +156,7 @@
     // We can use the saved value.
     RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
     LoadValueDirect(rl_method, rX86_ARG2);
+    store_method_addr_used_ = true;
   } else {
     NewLIR1(kX86StartOfMethod, rX86_ARG2);
   }
@@ -228,9 +230,9 @@
 
   if (base_of_code_ != nullptr) {
     // We have been asked to save the address of the method start for later use.
-    NewLIR1(kX86StartOfMethod, rX86_ARG0);
+    setup_method_address_[0] = NewLIR1(kX86StartOfMethod, rX86_ARG0);
     int displacement = SRegOffset(base_of_code_->s_reg_low);
-    StoreBaseDisp(rX86_SP, displacement, rX86_ARG0, kWord);
+    setup_method_address_[1] = StoreBaseDisp(rX86_SP, displacement, rX86_ARG0, kWord);
   }
 
   FreeTemp(rX86_ARG0);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 4c1c171..22e36d5 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -189,6 +189,24 @@
      */
     void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
                             RegLocation rl_dest, RegLocation rl_src);
+    /*
+     *
+     * @brief Implement Set up instanceof a class with x86 specific code.
+     * @param needs_access_check 'true' if we must check the access.
+     * @param type_known_final 'true' if the type is known to be a final class.
+     * @param type_known_abstract 'true' if the type is known to be an abstract class.
+     * @param use_declaring_class 'true' if the type can be loaded off the current Method*.
+     * @param can_assume_type_is_in_dex_cache 'true' if the type is known to be in the cache.
+     * @param type_idx Type index to use if use_declaring_class is 'false'.
+     * @param rl_dest Result to be set to 0 or 1.
+     * @param rl_src Object to be tested.
+     */
+    void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
+                                    bool type_known_abstract, bool use_declaring_class,
+                                    bool can_assume_type_is_in_dex_cache,
+                                    uint32_t type_idx, RegLocation rl_dest,
+                                    RegLocation rl_src);
+
     // Single operation generators.
     LIR* OpUnconditionalBranch(LIR* target);
     LIR* OpCmpBranch(ConditionCode cond, int src1, int src2, LIR* target);
@@ -245,6 +263,43 @@
     void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
                        RegLocation rl_lhs, RegLocation rl_rhs);
 
+    /*
+     * @brief Dump a RegLocation using printf
+     * @param loc Register location to dump
+     */
+    static void DumpRegLocation(RegLocation loc);
+
+    /*
+     * @brief Load the Method* of a dex method into the register.
+     * @param dex_method_index The index of the method to be invoked.
+     * @param type How the method will be invoked.
+     * @param register that will contain the code address.
+     * @note register will be passed to TargetReg to get physical register.
+     */
+    void LoadMethodAddress(int dex_method_index, InvokeType type,
+                           SpecialTargetRegister symbolic_reg);
+
+    /*
+     * @brief Load the Class* of a Dex Class type into the register.
+     * @param type How the method will be invoked.
+     * @param register that will contain the code address.
+     * @note register will be passed to TargetReg to get physical register.
+     */
+    void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg);
+
+    /*
+     * @brief Generate a relative call to the method that will be patched at link time.
+     * @param dex_method_index The index of the method to be invoked.
+     * @param type How the method will be invoked.
+     * @returns Call instruction
+     */
+    LIR * CallWithLinkerFixup(int dex_method_index, InvokeType type);
+
+    /*
+     * @brief Handle x86 specific literals
+     */
+    void InstallLiteralPools();
+
   private:
     void EmitPrefix(const X86EncodingMap* entry);
     void EmitOpcode(const X86EncodingMap* entry);
@@ -290,6 +345,7 @@
     void EmitJmp(const X86EncodingMap* entry, int rel);
     void EmitJcc(const X86EncodingMap* entry, int rel, uint8_t cc);
     void EmitCallMem(const X86EncodingMap* entry, uint8_t base, int disp);
+    void EmitCallImmediate(const X86EncodingMap* entry, int disp);
     void EmitCallThread(const X86EncodingMap* entry, int disp);
     void EmitPcRel(const X86EncodingMap* entry, uint8_t reg, int base_or_table, uint8_t index,
                    int scale, int table_or_disp);
@@ -330,12 +386,6 @@
      */
     bool IsNoOp(Instruction::Code op, int32_t value);
 
-    /*
-     * @brief Dump a RegLocation using printf
-     * @param loc Register location to dump
-     */
-    static void DumpRegLocation(RegLocation loc);
-
     /**
      * @brief Calculate magic number and shift for a given divisor
      * @param divisor divisor number for calculation
@@ -459,11 +509,26 @@
 
     // Information derived from analysis of MIR
 
+    // The compiler temporary for the code address of the method.
+    CompilerTemp *base_of_code_;
+
     // Have we decided to compute a ptr to code and store in temporary VR?
     bool store_method_addr_;
 
-    // The compiler temporary for the code address of the method.
-    CompilerTemp *base_of_code_;
+    // Have we used the stored method address?
+    bool store_method_addr_used_;
+
+    // Instructions to remove if we didn't use the stored method address.
+    LIR* setup_method_address_[2];
+
+    // Instructions needing patching with Method* values.
+    GrowableArray<LIR*> method_address_insns_;
+
+    // Instructions needing patching with Class Type* values.
+    GrowableArray<LIR*> class_type_address_insns_;
+
+    // Instructions needing patching with PC relative code addresses.
+    GrowableArray<LIR*> call_method_insns_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index a567a8a..9dd6116 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -670,7 +670,7 @@
 bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
   RegLocation rl_src_address = info->args[0];  // long address
   rl_src_address.wide = 0;  // ignore high half in info->args[1]
-  RegLocation rl_dest = InlineTarget(info);
+  RegLocation rl_dest = size == kLong ? InlineTargetWide(info) : InlineTarget(info);
   RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   if (size == kLong) {
@@ -780,8 +780,23 @@
 }
 
 LIR* X86Mir2Lir::OpPcRelLoad(int reg, LIR* target) {
-  LOG(FATAL) << "Unexpected use of OpPcRelLoad for x86";
-  return NULL;
+  CHECK(base_of_code_ != nullptr);
+
+  // Address the start of the method
+  RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
+  LoadValueDirectFixed(rl_method, reg);
+  store_method_addr_used_ = true;
+
+  // Load the proper value from the literal area.
+  // We don't know the proper offset for the value, so pick one that will force
+  // 4 byte offset.  We will fix this up in the assembler later to have the right
+  // value.
+  LIR *res = RawLIR(current_dalvik_offset_, kX86Mov32RM, reg, reg, 256, 0, 0, target);
+  res->target = target;
+  res->flags.fixup = kFixupLoad;
+  SetMemRefType(res, true, kLiteral);
+  store_method_addr_used_ = true;
+  return res;
 }
 
 LIR* X86Mir2Lir::OpVldm(int rBase, int count) {
@@ -1717,6 +1732,88 @@
   StoreValue(rl_dest, rl_result);
 }
 
+void X86Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
+                                            bool type_known_abstract, bool use_declaring_class,
+                                            bool can_assume_type_is_in_dex_cache,
+                                            uint32_t type_idx, RegLocation rl_dest,
+                                            RegLocation rl_src) {
+  FlushAllRegs();
+  // May generate a call - use explicit registers.
+  LockCallTemps();
+  LoadCurrMethodDirect(TargetReg(kArg1));  // kArg1 gets current Method*.
+  int class_reg = TargetReg(kArg2);  // kArg2 will hold the Class*.
+  // Reference must end up in kArg0.
+  if (needs_access_check) {
+    // Check we have access to type_idx and if not throw IllegalAccessError,
+    // Caller function returns Class* in kArg0.
+    CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeTypeAndVerifyAccess),
+                         type_idx, true);
+    OpRegCopy(class_reg, TargetReg(kRet0));
+    LoadValueDirectFixed(rl_src, TargetReg(kArg0));
+  } else if (use_declaring_class) {
+    LoadValueDirectFixed(rl_src, TargetReg(kArg0));
+    LoadWordDisp(TargetReg(kArg1),
+                 mirror::ArtMethod::DeclaringClassOffset().Int32Value(), class_reg);
+  } else {
+    // Load dex cache entry into class_reg (kArg2).
+    LoadValueDirectFixed(rl_src, TargetReg(kArg0));
+    LoadWordDisp(TargetReg(kArg1),
+                 mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), class_reg);
+    int32_t offset_of_type =
+        mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() + (sizeof(mirror::Class*)
+        * type_idx);
+    LoadWordDisp(class_reg, offset_of_type, class_reg);
+    if (!can_assume_type_is_in_dex_cache) {
+      // Need to test presence of type in dex cache at runtime.
+      LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL);
+      // Type is not resolved. Call out to helper, which will return resolved type in kRet0/kArg0.
+      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeType), type_idx, true);
+      OpRegCopy(TargetReg(kArg2), TargetReg(kRet0));  // Align usage with fast path.
+      LoadValueDirectFixed(rl_src, TargetReg(kArg0));  /* Reload Ref. */
+      // Rejoin code paths
+      LIR* hop_target = NewLIR0(kPseudoTargetLabel);
+      hop_branch->target = hop_target;
+    }
+  }
+  /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result. */
+  RegLocation rl_result = GetReturn(false);
+
+  // SETcc only works with EAX..EDX.
+  DCHECK_LT(rl_result.low_reg, 4);
+
+  // Is the class NULL?
+  LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, NULL);
+
+  /* Load object->klass_. */
+  DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
+  LoadWordDisp(TargetReg(kArg0),  mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1));
+  /* kArg0 is ref, kArg1 is ref->klass_, kArg2 is class. */
+  LIR* branchover = nullptr;
+  if (type_known_final) {
+    // Ensure top 3 bytes of result are 0.
+    LoadConstant(rl_result.low_reg, 0);
+    OpRegReg(kOpCmp, TargetReg(kArg1), TargetReg(kArg2));
+    // Set the low byte of the result to 0 or 1 from the compare condition code.
+    NewLIR2(kX86Set8R, rl_result.low_reg, kX86CondEq);
+  } else {
+    if (!type_known_abstract) {
+      LoadConstant(rl_result.low_reg, 1);     // Assume result succeeds.
+      branchover = OpCmpBranch(kCondEq, TargetReg(kArg1), TargetReg(kArg2), NULL);
+    }
+    OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));
+    OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(pInstanceofNonTrivial));
+  }
+  // TODO: only clobber when type isn't final?
+  ClobberCallerSave();
+  /* Branch targets here. */
+  LIR* target = NewLIR0(kPseudoTargetLabel);
+  StoreValue(rl_dest, rl_result);
+  branch1->target = target;
+  if (branchover != nullptr) {
+    branchover->target = target;
+  }
+}
+
 void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
                             RegLocation rl_lhs, RegLocation rl_rhs) {
   OpKind op = kOpBkpt;
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index a347d8b..1893ffc 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -510,7 +510,11 @@
 }
 
 X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
-    : Mir2Lir(cu, mir_graph, arena) {
+    : Mir2Lir(cu, mir_graph, arena),
+      method_address_insns_(arena, 100, kGrowableArrayMisc),
+      class_type_address_insns_(arena, 100, kGrowableArrayMisc),
+      call_method_insns_(arena, 100, kGrowableArrayMisc) {
+  store_method_addr_used_ = false;
   for (int i = 0; i < kX86Last; i++) {
     if (X86Mir2Lir::EncodingMap[i].opcode != i) {
       LOG(FATAL) << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
@@ -816,4 +820,104 @@
   Mir2Lir::Materialize();
 }
 
+void X86Mir2Lir::LoadMethodAddress(int dex_method_index, InvokeType type,
+                                   SpecialTargetRegister symbolic_reg) {
+  /*
+   * For x86, just generate a 32 bit move immediate instruction, that will be filled
+   * in at 'link time'.  For now, put a unique value based on target to ensure that
+   * code deduplication works.
+   */
+  const DexFile::MethodId& id = cu_->dex_file->GetMethodId(dex_method_index);
+  uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
+
+  // Generate the move instruction with the unique pointer and save index and type.
+  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg),
+                     static_cast<int>(ptr), dex_method_index, type);
+  AppendLIR(move);
+  method_address_insns_.Insert(move);
+}
+
+void X86Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) {
+  /*
+   * For x86, just generate a 32 bit move immediate instruction, that will be filled
+   * in at 'link time'.  For now, put a unique value based on target to ensure that
+   * code deduplication works.
+   */
+  const DexFile::TypeId& id = cu_->dex_file->GetTypeId(type_idx);
+  uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
+
+  // Generate the move instruction with the unique pointer and save index and type.
+  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg),
+                     static_cast<int>(ptr), type_idx);
+  AppendLIR(move);
+  class_type_address_insns_.Insert(move);
+}
+
+LIR *X86Mir2Lir::CallWithLinkerFixup(int dex_method_index, InvokeType type) {
+  /*
+   * For x86, just generate a 32 bit call relative instruction, that will be filled
+   * in at 'link time'.  For now, put a unique value based on target to ensure that
+   * code deduplication works.
+   */
+  const DexFile::MethodId& id = cu_->dex_file->GetMethodId(dex_method_index);
+  uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
+
+  // Generate the call instruction with the unique pointer and save index and type.
+  LIR *call = RawLIR(current_dalvik_offset_, kX86CallI, static_cast<int>(ptr), dex_method_index,
+                     type);
+  AppendLIR(call);
+  call_method_insns_.Insert(call);
+  return call;
+}
+
+void X86Mir2Lir::InstallLiteralPools() {
+  // These are handled differently for x86.
+  DCHECK(code_literal_list_ == nullptr);
+  DCHECK(method_literal_list_ == nullptr);
+  DCHECK(class_literal_list_ == nullptr);
+
+  // Handle the fixups for methods.
+  for (uint32_t i = 0; i < method_address_insns_.Size(); i++) {
+      LIR* p = method_address_insns_.Get(i);
+      DCHECK_EQ(p->opcode, kX86Mov32RI);
+      uint32_t target = p->operands[2];
+
+      // The offset to patch is the last 4 bytes of the instruction.
+      int patch_offset = p->offset + p->flags.size - 4;
+      cu_->compiler_driver->AddMethodPatch(cu_->dex_file, cu_->class_def_idx,
+                                           cu_->method_idx, cu_->invoke_type,
+                                           target, static_cast<InvokeType>(p->operands[3]),
+                                           patch_offset);
+  }
+
+  // Handle the fixups for class types.
+  for (uint32_t i = 0; i < class_type_address_insns_.Size(); i++) {
+      LIR* p = class_type_address_insns_.Get(i);
+      DCHECK_EQ(p->opcode, kX86Mov32RI);
+      uint32_t target = p->operands[2];
+
+      // The offset to patch is the last 4 bytes of the instruction.
+      int patch_offset = p->offset + p->flags.size - 4;
+      cu_->compiler_driver->AddClassPatch(cu_->dex_file, cu_->class_def_idx,
+                                          cu_->method_idx, target, patch_offset);
+  }
+
+  // And now the PC-relative calls to methods.
+  for (uint32_t i = 0; i < call_method_insns_.Size(); i++) {
+      LIR* p = call_method_insns_.Get(i);
+      DCHECK_EQ(p->opcode, kX86CallI);
+      uint32_t target = p->operands[1];
+
+      // The offset to patch is the last 4 bytes of the instruction.
+      int patch_offset = p->offset + p->flags.size - 4;
+      cu_->compiler_driver->AddRelativeCodePatch(cu_->dex_file, cu_->class_def_idx,
+                                                 cu_->method_idx, cu_->invoke_type, target,
+                                                 static_cast<InvokeType>(p->operands[2]),
+                                                 patch_offset, -4 /* offset */);
+  }
+
+  // And do the normal processing.
+  Mir2Lir::InstallLiteralPools();
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index e2744d0..48a39bb 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -518,8 +518,7 @@
         res->target = data_target;
         res->flags.fixup = kFixupLoad;
         SetMemRefType(res, true, kLiteral);
-        // Redo after we assign target to ensure size is correct.
-        SetupResourceMasks(res);
+        store_method_addr_used_ = true;
       } else {
         if (val_lo == 0) {
           res = NewLIR2(kX86XorpsRR, r_dest_lo, r_dest_lo);
@@ -860,6 +859,7 @@
     case Instruction::REM_DOUBLE_2ADDR:
       AnalyzeFPInstruction(opcode, bb, mir);
       break;
+
     // Packed switches and array fills need a pointer to the base of the method.
     case Instruction::FILL_ARRAY_DATA:
     case Instruction::PACKED_SWITCH:
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 6962ff7..c49f627 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -387,6 +387,7 @@
   kX86CallA,            // call [base + index * scale + disp]
                         // lir operands - 0: base, 1: index, 2: scale, 3: disp
   kX86CallT,            // call fs:[disp]; fs: is equal to Thread::Current(); lir operands - 0: disp
+  kX86CallI,            // call <relative> - 0: disp; Used for core.oat linking only
   kX86Ret,              // ret; no lir operands
   kX86StartOfMethod,    // call 0; pop reg; sub reg, # - generate start of method into reg
                         // lir operands - 0: reg
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index c8fe4a8..402d4f4 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -29,6 +29,7 @@
 #include "dex_file-inl.h"
 #include "dex/verification_results.h"
 #include "dex/verified_method.h"
+#include "dex/quick/dex_file_method_inliner.h"
 #include "jni_internal.h"
 #include "object_utils.h"
 #include "runtime.h"
@@ -49,6 +50,7 @@
 #include "thread_pool.h"
 #include "trampolines/trampoline_compiler.h"
 #include "verifier/method_verifier.h"
+#include "verifier/method_verifier-inl.h"
 
 #if defined(ART_USE_PORTABLE_COMPILER)
 #include "elf_writer_mclinker.h"
@@ -365,7 +367,7 @@
       jni_compiler_(NULL),
       compiler_enable_auto_elf_loading_(NULL),
       compiler_get_method_code_addr_(NULL),
-      support_boot_image_fixup_(instruction_set == kThumb2),
+      support_boot_image_fixup_(instruction_set != kMips),
       dedupe_code_("dedupe code"),
       dedupe_mapping_table_("dedupe mapping table"),
       dedupe_vmap_table_("dedupe vmap table"),
@@ -997,6 +999,30 @@
                                                 class_loader, NULL, type);
 }
 
+bool CompilerDriver::ComputeSpecialAccessorInfo(uint32_t field_idx, bool is_put,
+                                                verifier::MethodVerifier* verifier,
+                                                InlineIGetIPutData* result) {
+  mirror::DexCache* dex_cache = verifier->GetDexCache();
+  uint32_t method_idx = verifier->GetMethodReference().dex_method_index;
+  mirror::ArtMethod* method = dex_cache->GetResolvedMethod(method_idx);
+  mirror::ArtField* field = dex_cache->GetResolvedField(field_idx);
+  if (method == nullptr || field == nullptr) {
+    return false;
+  }
+  mirror::Class* method_class = method->GetDeclaringClass();
+  mirror::Class* field_class = field->GetDeclaringClass();
+  if (!method_class->CanAccessResolvedField(field_class, field, dex_cache, field_idx) ||
+      (is_put && field->IsFinal() && method_class != field_class)) {
+    return false;
+  }
+  DCHECK_GE(field->GetOffset().Int32Value(), 0);
+  result->method_is_static = method->IsStatic();
+  result->field_idx = field_idx;
+  result->field_offset = field->GetOffset().Int32Value();
+  result->is_volatile = field->IsVolatile();
+  return true;
+}
+
 bool CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit,
                                               bool is_put, int* field_offset, bool* is_volatile) {
   ScopedObjectAccess soa(Thread::Current());
@@ -1372,6 +1398,24 @@
                                                     target_invoke_type,
                                                     literal_offset));
 }
+void CompilerDriver::AddRelativeCodePatch(const DexFile* dex_file,
+                                          uint16_t referrer_class_def_idx,
+                                          uint32_t referrer_method_idx,
+                                          InvokeType referrer_invoke_type,
+                                          uint32_t target_method_idx,
+                                          InvokeType target_invoke_type,
+                                          size_t literal_offset,
+                                          int32_t pc_relative_offset) {
+  MutexLock mu(Thread::Current(), compiled_methods_lock_);
+  code_to_patch_.push_back(new RelativeCallPatchInformation(dex_file,
+                                                            referrer_class_def_idx,
+                                                            referrer_method_idx,
+                                                            referrer_invoke_type,
+                                                            target_method_idx,
+                                                            target_invoke_type,
+                                                            literal_offset,
+                                                            pc_relative_offset));
+}
 void CompilerDriver::AddMethodPatch(const DexFile* dex_file,
                                     uint16_t referrer_class_def_idx,
                                     uint32_t referrer_method_idx,
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index c562f76..c4ac9db 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -39,10 +39,15 @@
 
 namespace art {
 
+namespace verifier {
+class MethodVerifier;
+}  // namespace verifier
+
 class AOTCompilationStats;
 class ParallelCompilationManager;
 class DexCompilationUnit;
 class DexFileToMethodInlinerMap;
+class InlineIGetIPutData;
 class OatWriter;
 class TimingLogger;
 class VerificationResults;
@@ -196,6 +201,13 @@
                           bool* is_type_initialized, bool* use_direct_type_ptr,
                           uintptr_t* direct_type_ptr);
 
+  // Can we fast path instance field access in a verified accessor?
+  // If yes, computes field's offset and volatility and whether the method is static or not.
+  static bool ComputeSpecialAccessorInfo(uint32_t field_idx, bool is_put,
+                                         verifier::MethodVerifier* verifier,
+                                         InlineIGetIPutData* result)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Can we fast path instance field access? Computes field's offset and volatility.
   bool ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, bool is_put,
                                 int* field_offset, bool* is_volatile)
@@ -228,6 +240,15 @@
                     InvokeType target_invoke_type,
                     size_t literal_offset)
       LOCKS_EXCLUDED(compiled_methods_lock_);
+  void AddRelativeCodePatch(const DexFile* dex_file,
+                            uint16_t referrer_class_def_idx,
+                            uint32_t referrer_method_idx,
+                            InvokeType referrer_invoke_type,
+                            uint32_t target_method_idx,
+                            InvokeType target_invoke_type,
+                            size_t literal_offset,
+                            int32_t pc_relative_offset)
+      LOCKS_EXCLUDED(compiled_methods_lock_);
   void AddMethodPatch(const DexFile* dex_file,
                       uint16_t referrer_class_def_idx,
                       uint32_t referrer_method_idx,
@@ -361,8 +382,14 @@
     bool IsCall() const {
       return true;
     }
+    virtual bool IsRelative() const {
+      return false;
+    }
+    virtual int RelativeOffset() const {
+      return 0;
+    }
 
-   private:
+   protected:
     CallPatchInformation(const DexFile* dex_file,
                          uint16_t referrer_class_def_idx,
                          uint32_t referrer_method_idx,
@@ -377,6 +404,7 @@
           target_invoke_type_(target_invoke_type) {
     }
 
+   private:
     const InvokeType referrer_invoke_type_;
     const uint32_t target_method_idx_;
     const InvokeType target_invoke_type_;
@@ -385,6 +413,36 @@
     DISALLOW_COPY_AND_ASSIGN(CallPatchInformation);
   };
 
+  class RelativeCallPatchInformation : public CallPatchInformation {
+   public:
+    bool IsRelative() const {
+      return true;
+    }
+    int RelativeOffset() const {
+      return offset_;
+    }
+
+   private:
+    RelativeCallPatchInformation(const DexFile* dex_file,
+                                 uint16_t referrer_class_def_idx,
+                                 uint32_t referrer_method_idx,
+                                 InvokeType referrer_invoke_type,
+                                 uint32_t target_method_idx,
+                                 InvokeType target_invoke_type,
+                                 size_t literal_offset,
+                                 int32_t pc_relative_offset)
+        : CallPatchInformation(dex_file, referrer_class_def_idx,
+                           referrer_method_idx, referrer_invoke_type,
+                           target_method_idx, target_invoke_type, literal_offset),
+          offset_(pc_relative_offset) {
+    }
+
+    const int offset_;
+
+    friend class CompilerDriver;
+    DISALLOW_COPY_AND_ASSIGN(RelativeCallPatchInformation);
+  };
+
   class TypePatchInformation : public PatchInformation {
    public:
     uint32_t GetTargetTypeIdx() const {
diff --git a/compiler/elf_writer_mclinker.cc b/compiler/elf_writer_mclinker.cc
index cdfb881..c7baf4f 100644
--- a/compiler/elf_writer_mclinker.cc
+++ b/compiler/elf_writer_mclinker.cc
@@ -265,12 +265,12 @@
   added_symbols_.Put(&symbol, &symbol);
 
   // Add input to supply code for symbol
-  const std::vector<uint8_t>& code = compiled_code.GetCode();
+  const std::vector<uint8_t>* code = compiled_code.GetPortableCode();
   // TODO: ownership of code_input?
   // TODO: why does IRBuilder::ReadInput take a non-const pointer?
   mcld::Input* code_input = ir_builder_->ReadInput(symbol,
-                                                   const_cast<uint8_t*>(&code[0]),
-                                                   code.size());
+                                                   const_cast<uint8_t*>(&(*code)[0]),
+                                                   code->size());
   CHECK(code_input != NULL);
 }
 
@@ -376,7 +376,7 @@
           (!method->IsStatic() ||
            method->IsConstructor() ||
            method->GetDeclaringClass()->IsInitialized())) {
-        method->SetOatCodeOffset(offset);
+        method->SetPortableOatCodeOffset(offset);
       }
     }
     it.Next();
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 67cd51b..e5dfb9d 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -785,7 +785,19 @@
     uintptr_t quick_code = reinterpret_cast<uintptr_t>(class_linker->GetQuickOatCodeFor(target));
     uintptr_t code_base = reinterpret_cast<uintptr_t>(&oat_file_->GetOatHeader());
     uintptr_t code_offset = quick_code - code_base;
-    SetPatchLocation(patch, PointerToLowMemUInt32(GetOatAddress(code_offset)));
+    if (patch->IsRelative()) {
+      // value to patch is relative to the location being patched
+      const void* quick_oat_code =
+        class_linker->GetQuickOatCodeFor(patch->GetDexFile(),
+                                         patch->GetReferrerClassDefIdx(),
+                                         patch->GetReferrerMethodIdx());
+      uintptr_t base = reinterpret_cast<uintptr_t>(quick_oat_code);
+      uintptr_t patch_location = base + patch->GetLiteralOffset();
+      uintptr_t value = quick_code - patch_location + patch->RelativeOffset();
+      SetPatchLocation(patch, value);
+    } else {
+      SetPatchLocation(patch, PointerToLowMemUInt32(GetOatAddress(code_offset)));
+    }
   }
 
   const CallPatches& methods_to_patch = compiler_driver_.GetMethodsToPatch();
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 6c2c8b0..98c64aa 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -368,7 +368,7 @@
         if (result && method_inliner_map_ != nullptr) {
           MethodReference ref = verifier->GetMethodReference();
           method_inliner_map_->GetMethodInliner(ref.dex_file)
-              ->AnalyseMethodCode(ref.dex_method_index, verifier->CodeItem());
+              ->AnalyseMethodCode(verifier);
         }
         return result;
       }
diff --git a/runtime/base/bit_vector.cc b/runtime/base/bit_vector.cc
index 3b82651..3db8e12 100644
--- a/runtime/base/bit_vector.cc
+++ b/runtime/base/bit_vector.cc
@@ -44,10 +44,10 @@
     expandable_(expandable),
     storage_size_(storage_size),
     storage_(storage) {
-  DCHECK_EQ(sizeof(storage_[0]), 4U);  // Assuming 32-bit units.
-  if (storage_ == NULL) {
+  DCHECK_EQ(sizeof(*storage_), 4U);  // Assuming 32-bit units.
+  if (storage_ == nullptr) {
     storage_size_ = BitsToWords(start_bits);
-    storage_ = static_cast<uint32_t*>(allocator_->Alloc(storage_size_ * sizeof(uint32_t)));
+    storage_ = static_cast<uint32_t*>(allocator_->Alloc(storage_size_ * sizeof(*storage_)));
   }
 }
 
@@ -59,7 +59,11 @@
  * Determine whether or not the specified bit is set.
  */
 bool BitVector::IsBitSet(uint32_t num) const {
-  DCHECK_LT(num, storage_size_ * sizeof(uint32_t) * 8);
+  // If the index is over the size:
+  if (num >= storage_size_ * sizeof(*storage_) * 8) {
+    // Whether it is expandable or not, this bit does not exist: thus it is not set.
+    return false;
+  }
 
   uint32_t val = storage_[num >> 5] & check_masks[num & 0x1f];
   return (val != 0);
@@ -67,7 +71,7 @@
 
 // Mark all bits bit as "clear".
 void BitVector::ClearAllBits() {
-  memset(storage_, 0, storage_size_ * sizeof(uint32_t));
+  memset(storage_, 0, storage_size_ * sizeof(*storage_));
 }
 
 // Mark the specified bit as "set".
@@ -76,17 +80,17 @@
  * not using it badly or change resize mechanism.
  */
 void BitVector::SetBit(uint32_t num) {
-  if (num >= storage_size_ * sizeof(uint32_t) * 8) {
+  if (num >= storage_size_ * sizeof(*storage_) * 8) {
     DCHECK(expandable_) << "Attempted to expand a non-expandable bitmap to position " << num;
 
     /* Round up to word boundaries for "num+1" bits */
     uint32_t new_size = BitsToWords(num + 1);
     DCHECK_GT(new_size, storage_size_);
     uint32_t *new_storage =
-        static_cast<uint32_t*>(allocator_->Alloc(new_size * sizeof(uint32_t)));
-    memcpy(new_storage, storage_, storage_size_ * sizeof(uint32_t));
+        static_cast<uint32_t*>(allocator_->Alloc(new_size * sizeof(*storage_)));
+    memcpy(new_storage, storage_, storage_size_ * sizeof(*storage_));
     // Zero out the new storage words.
-    memset(&new_storage[storage_size_], 0, (new_size - storage_size_) * sizeof(uint32_t));
+    memset(&new_storage[storage_size_], 0, (new_size - storage_size_) * sizeof(*storage_));
     // TOTO: collect stats on space wasted because of resize.
     storage_ = new_storage;
     storage_size_ = new_size;
@@ -97,30 +101,109 @@
 
 // Mark the specified bit as "unset".
 void BitVector::ClearBit(uint32_t num) {
-  DCHECK_LT(num, storage_size_ * sizeof(uint32_t) * 8);
-  storage_[num >> 5] &= ~check_masks[num & 0x1f];
+  // If the index is over the size, we don't have to do anything, it is cleared.
+  if (num < storage_size_ * sizeof(*storage_) * 8) {
+    // Otherwise, go ahead and clear it.
+    storage_[num >> 5] &= ~check_masks[num & 0x1f];
+  }
 }
 
-// Intersect with another bit vector.  Sizes and expandability must be the same.
+bool BitVector::SameBitsSet(const BitVector *src) {
+  int our_highest = GetHighestBitSet();
+  int src_highest = src->GetHighestBitSet();
+
+  // If the highest bit set is different, we are different.
+  if (our_highest != src_highest) {
+    return true;
+  }
+
+  // If the highest bit set is -1, both are cleared, we are the same.
+  // If the highest bit set is 0, both have a unique bit set, we are the same.
+  if (our_highest >= 0) {
+    return true;
+  }
+
+  // Get the highest bit set's cell's index.
+  int our_highest_index = (our_highest >> 5);
+
+  // This memcmp is enough: we know that the highest bit set is the same for both:
+  //   - Therefore, min_size goes up to at least that, we are thus comparing at least what we need to, but not less.
+  //      ie. we are comparing all storage cells that could have difference, if both vectors have cells above our_highest_index,
+  //          they are automatically at 0.
+  return (memcmp(storage_, src->GetRawStorage(), our_highest_index * sizeof(*storage_)) != 0);
+}
+
+// Intersect with another bit vector.
 void BitVector::Intersect(const BitVector* src) {
-  DCHECK_EQ(storage_size_, src->GetStorageSize());
-  DCHECK_EQ(expandable_, src->IsExpandable());
-  for (uint32_t idx = 0; idx < storage_size_; idx++) {
+  uint32_t src_storage_size = src->storage_size_;
+
+  // Get the minimum size between us and source.
+  uint32_t min_size = (storage_size_ < src_storage_size) ? storage_size_ : src_storage_size;
+
+  uint32_t idx;
+  for (idx = 0; idx < min_size; idx++) {
     storage_[idx] &= src->GetRawStorageWord(idx);
   }
+
+  // Now, due to this being an intersection, there are two possibilities:
+  //   - Either src was larger than us: we don't care, all upper bits would thus be 0.
+  //   - Either we are larger than src: we don't care, all upper bits would have been 0 too.
+  // So all we need to do is set all remaining bits to 0.
+  for (; idx < storage_size_; idx++) {
+    storage_[idx] = 0;
+  }
 }
 
 /*
- * Union with another bit vector.  Sizes and expandability must be the same.
+ * Union with another bit vector.
  */
 void BitVector::Union(const BitVector* src) {
-  DCHECK_EQ(storage_size_, src->GetStorageSize());
-  DCHECK_EQ(expandable_, src->IsExpandable());
-  for (uint32_t idx = 0; idx < storage_size_; idx++) {
+  uint32_t src_size = src->storage_size_;
+
+  // Get our size, we use this variable for the last loop of the method:
+  //   - It can change in the if block if src is of a different size.
+  uint32_t size = storage_size_;
+
+  // Is the storage size smaller than src's?
+  if (storage_size_ < src_size) {
+    // Get the highest bit to determine how much we need to expand.
+    int highest_bit = src->GetHighestBitSet();
+
+    // If src has no bit set, we are done: there is no need for a union with src.
+    if (highest_bit == -1) {
+      return;
+    }
+
+    // Set it to reallocate.
+    SetBit(highest_bit);
+
+    // Paranoid: storage size should be big enough to hold this bit now.
+    DCHECK_LT(static_cast<uint32_t> (highest_bit), storage_size_ * sizeof(*(storage_)) * 8);
+
+    //  Update the size, our size can now not be bigger than the src size
+    size = storage_size_;
+  }
+
+  for (uint32_t idx = 0; idx < size; idx++) {
     storage_[idx] |= src->GetRawStorageWord(idx);
   }
 }
 
+void BitVector::Subtract(const BitVector *src) {
+    uint32_t src_size = src->storage_size_;
+
+    // We only need to operate on bytes up to the smaller of the sizes of the two operands.
+    unsigned int min_size = (storage_size_ > src_size) ? src_size : storage_size_;
+
+    // Difference until max, we know both accept it:
+    //   There is no need to do more:
+    //     If we are bigger than src, the upper bits are unchanged.
+    //     If we are smaller than src, the non-existant upper bits are 0 and thus can't get subtracted.
+    for (uint32_t idx = 0; idx < min_size; idx++) {
+        storage_[idx] &= (~(src->GetRawStorageWord(idx)));
+    }
+}
+
 // Count the number of bits that are set.
 uint32_t BitVector::NumSetBits() const {
   uint32_t count = 0;
@@ -132,7 +215,7 @@
 
 // Count the number of bits that are set up through and including num.
 uint32_t BitVector::NumSetBits(uint32_t num) const {
-  DCHECK_LT(num, storage_size_ * sizeof(uint32_t) * 8);
+  DCHECK_LT(num, storage_size_ * sizeof(*storage_) * 8);
   uint32_t last_word = num >> 5;
   uint32_t partial_word_bits = num & 0x1f;
 
@@ -163,15 +246,84 @@
  * iterator.
  */
 void BitVector::SetInitialBits(uint32_t num_bits) {
-  DCHECK_LE(BitsToWords(num_bits), storage_size_);
+  // If num_bits is 0, clear everything.
+  if (num_bits == 0) {
+    ClearAllBits();
+    return;
+  }
+
+  // Set the highest bit we want to set to get the BitVector allocated if need be.
+  SetBit(num_bits - 1);
+
   uint32_t idx;
+  // We can set every storage element with -1.
   for (idx = 0; idx < (num_bits >> 5); idx++) {
     storage_[idx] = -1;
   }
+
+  // Handle the potentially last few bits.
   uint32_t rem_num_bits = num_bits & 0x1f;
-  if (rem_num_bits) {
+  if (rem_num_bits != 0) {
     storage_[idx] = (1 << rem_num_bits) - 1;
   }
+
+  // Now set the upper ones to 0.
+  for (; idx < storage_size_; idx++) {
+    storage_[idx] = 0;
+  }
+}
+
+int BitVector::GetHighestBitSet() const {
+  unsigned int max = storage_size_;
+  for (int idx = max - 1; idx >= 0; idx--) {
+    // If not 0, we have more work: check the bits.
+    uint32_t value = storage_[idx];
+
+    if (value != 0) {
+      // Shift right for the counting.
+      value /= 2;
+
+      int cnt = 0;
+
+      // Count the bits.
+      while (value > 0) {
+        value /= 2;
+        cnt++;
+      }
+
+      // Return cnt + how many storage units still remain * the number of bits per unit.
+      int res = cnt + (idx * (sizeof(*storage_) * 8));
+      return res;
+    }
+  }
+
+  // All zero, therefore return -1.
+  return -1;
+}
+
+void BitVector::Copy(const BitVector *src) {
+  // Get highest bit set, we only need to copy till then.
+  int highest_bit = src->GetHighestBitSet();
+
+  // If nothing is set, clear everything.
+  if (highest_bit == -1) {
+    ClearAllBits();
+    return;
+  }
+
+  // Set upper bit to ensure right size before copy.
+  SetBit(highest_bit);
+
+  // Now set until highest bit's storage.
+  uint32_t size = 1 + (highest_bit / (sizeof(*storage_) * 8));
+  memcpy(storage_, src->GetRawStorage(), sizeof(*storage_) * size);
+
+  // Set upper bits to 0.
+  uint32_t left = storage_size_ - size;
+
+  if (left > 0) {
+    memset(storage_ + size, 0, sizeof(*storage_) * left);
+  }
 }
 
 }  // namespace art
diff --git a/runtime/base/bit_vector.h b/runtime/base/bit_vector.h
index 74bec08..c8f285e 100644
--- a/runtime/base/bit_vector.h
+++ b/runtime/base/bit_vector.h
@@ -46,7 +46,9 @@
           DCHECK_EQ(bit_size_, p_bits_->GetStorageSize() * sizeof(uint32_t) * 8);
           DCHECK_EQ(bit_storage_, p_bits_->GetRawStorage());
 
-          if (UNLIKELY(bit_index_ >= bit_size_)) return -1;
+          if (UNLIKELY(bit_index_ >= bit_size_)) {
+            return -1;
+          }
 
           uint32_t word_index = bit_index_ / 32;
           uint32_t word = bit_storage_[word_index];
@@ -89,7 +91,7 @@
               bool expandable,
               Allocator* allocator,
               uint32_t storage_size = 0,
-              uint32_t* storage = NULL);
+              uint32_t* storage = nullptr);
 
     virtual ~BitVector();
 
@@ -98,17 +100,24 @@
     bool IsBitSet(uint32_t num) const;
     void ClearAllBits();
     void SetInitialBits(uint32_t num_bits);
-    void Copy(BitVector* src) {
-      memcpy(storage_, src->GetRawStorage(), sizeof(uint32_t) * storage_size_);
-    }
+
+    void Copy(const BitVector* src);
     void Intersect(const BitVector* src2);
     void Union(const BitVector* src);
+    void Subtract(const BitVector* src);
     // Are we equal to another bit vector?  Note: expandability attributes must also match.
     bool Equal(const BitVector* src) {
       return (storage_size_ == src->GetStorageSize()) &&
         (expandable_ == src->IsExpandable()) &&
         (memcmp(storage_, src->GetRawStorage(), storage_size_ * sizeof(uint32_t)) == 0);
     }
+
+    /**
+     * @brief Are all the bits set the same?
+     * @details expandability and size can differ as long as the same bits are set.
+     */
+    bool SameBitsSet(const BitVector *src);
+
     uint32_t NumSetBits() const;
     uint32_t NumSetBits(uint32_t num) const;
 
@@ -121,6 +130,11 @@
     const uint32_t* GetRawStorage() const { return storage_; }
     size_t GetSizeOf() const { return storage_size_ * sizeof(uint32_t); }
 
+    /**
+     * @return the highest bit set, -1 if none are set
+     */
+    int GetHighestBitSet() const;
+
   private:
     Allocator* const allocator_;
     const bool expandable_;         // expand bitmap if we run out?
diff --git a/runtime/common_test.h b/runtime/common_test.h
index f37fb5c..af7e8ae 100644
--- a/runtime/common_test.h
+++ b/runtime/common_test.h
@@ -679,7 +679,7 @@
       if (result && method_inliner_map_ != nullptr) {
         MethodReference ref = verifier->GetMethodReference();
         method_inliner_map_->GetMethodInliner(ref.dex_file)
-            ->AnalyseMethodCode(ref.dex_method_index, verifier->CodeItem());
+            ->AnalyseMethodCode(verifier);
       }
       return result;
     }
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 99e7867..8280c7c 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -63,6 +63,9 @@
   mirror::ArtMethod* method;
   uint32_t dex_pc;
 
+  AllocRecordStackTraceElement() : method(nullptr), dex_pc(0) {
+  }
+
   int32_t LineNumber() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return MethodHelper(method).GetLineNumFromDexPC(dex_pc);
   }
@@ -81,6 +84,20 @@
     }
     return depth;
   }
+
+  void UpdateObjectPointers(RootVisitor* visitor, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (type != nullptr) {
+      type = down_cast<mirror::Class*>(visitor(type, arg));
+    }
+    for (size_t stack_frame = 0; stack_frame < kMaxAllocRecordStackDepth; ++stack_frame) {
+      mirror::ArtMethod*& m = stack[stack_frame].method;
+      if (m == nullptr) {
+        break;
+      }
+      m = down_cast<mirror::ArtMethod*>(visitor(m, arg));
+    }
+  }
 };
 
 struct Breakpoint {
@@ -775,6 +792,8 @@
 JDWP::JdwpError Dbg::GetInstanceCounts(const std::vector<JDWP::RefTypeId>& class_ids,
                                        std::vector<uint64_t>& counts)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  heap->CollectGarbage(false);
   std::vector<mirror::Class*> classes;
   counts.clear();
   for (size_t i = 0; i < class_ids.size(); ++i) {
@@ -786,19 +805,20 @@
     classes.push_back(c);
     counts.push_back(0);
   }
-
-  Runtime::Current()->GetHeap()->CountInstances(classes, false, &counts[0]);
+  heap->CountInstances(classes, false, &counts[0]);
   return JDWP::ERR_NONE;
 }
 
 JDWP::JdwpError Dbg::GetInstances(JDWP::RefTypeId class_id, int32_t max_count, std::vector<JDWP::ObjectId>& instances)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  // We only want reachable instances, so do a GC.
+  heap->CollectGarbage(false);
   JDWP::JdwpError status;
   mirror::Class* c = DecodeClass(class_id, status);
-  if (c == NULL) {
+  if (c == nullptr) {
     return status;
   }
-
   std::vector<mirror::Object*> raw_instances;
   Runtime::Current()->GetHeap()->GetInstances(c, max_count, raw_instances);
   for (size_t i = 0; i < raw_instances.size(); ++i) {
@@ -810,13 +830,14 @@
 JDWP::JdwpError Dbg::GetReferringObjects(JDWP::ObjectId object_id, int32_t max_count,
                                          std::vector<JDWP::ObjectId>& referring_objects)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  heap->CollectGarbage(false);
   mirror::Object* o = gRegistry->Get<mirror::Object*>(object_id);
   if (o == NULL || o == ObjectRegistry::kInvalidObject) {
     return JDWP::ERR_INVALID_OBJECT;
   }
-
   std::vector<mirror::Object*> raw_instances;
-  Runtime::Current()->GetHeap()->GetReferringObjects(o, max_count, raw_instances);
+  heap->GetReferringObjects(o, max_count, raw_instances);
   for (size_t i = 0; i < raw_instances.size(); ++i) {
     referring_objects.push_back(gRegistry->Add(raw_instances[i]));
   }
@@ -3772,6 +3793,37 @@
   }
 }
 
+void Dbg::UpdateObjectPointers(RootVisitor* visitor, void* arg) {
+  {
+    MutexLock mu(Thread::Current(), gAllocTrackerLock);
+    if (recent_allocation_records_ != nullptr) {
+      size_t i = HeadIndex();
+      size_t count = gAllocRecordCount;
+      while (count--) {
+        AllocRecord* record = &recent_allocation_records_[i];
+        DCHECK(record != nullptr);
+        record->UpdateObjectPointers(visitor, arg);
+        i = (i + 1) & (gAllocRecordMax - 1);
+      }
+    }
+  }
+  if (gRegistry != nullptr) {
+    gRegistry->UpdateObjectPointers(visitor, arg);
+  }
+}
+
+void Dbg::AllowNewObjectRegistryObjects() {
+  if (gRegistry != nullptr) {
+    gRegistry->AllowNewObjects();
+  }
+}
+
+void Dbg::DisallowNewObjectRegistryObjects() {
+  if (gRegistry != nullptr) {
+    gRegistry->DisallowNewObjects();
+  }
+}
+
 class StringTable {
  public:
   StringTable() {
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 328c9cd..f1e3f45 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -452,6 +452,10 @@
   static jbyteArray GetRecentAllocations() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void DumpRecentAllocations();
 
+  // Updates the stored direct object pointers (called from SweepSystemWeaks).
+  static void UpdateObjectPointers(RootVisitor* visitor, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   enum HpifWhen {
     HPIF_WHEN_NEVER = 0,
     HPIF_WHEN_NOW = 1,
@@ -476,6 +480,9 @@
   static void DdmSendHeapSegments(bool native)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  static void AllowNewObjectRegistryObjects() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void DisallowNewObjectRegistryObjects() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
  private:
   static void DdmBroadcast(bool connect) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void PostThreadStartOrStop(Thread*, uint32_t)
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 6c9e6f2..65d4c441 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -15,6 +15,9 @@
  */
 
 #include "base/mutex-inl.h"
+#include "mirror/class-inl.h"
+#include "mirror/object.h"
+#include "mirror/object-inl.h"
 #include "thread-inl.h"
 #include "thread_list.h"
 #include "rosalloc.h"
@@ -749,21 +752,35 @@
   }
 }
 
-void RosAlloc::Run::Dump() {
-  size_t idx = size_bracket_idx_;
-  size_t num_slots = numOfSlots[idx];
-  size_t num_vec = RoundUp(num_slots, 32) / 32;
+std::string RosAlloc::Run::BitMapToStr(uint32_t* bit_map_base, size_t num_vec) {
   std::string bit_map_str;
   for (size_t v = 0; v < num_vec; v++) {
-    uint32_t vec = alloc_bit_map_[v];
+    uint32_t vec = bit_map_base[v];
     if (v != num_vec - 1) {
       bit_map_str.append(StringPrintf("%x-", vec));
     } else {
       bit_map_str.append(StringPrintf("%x", vec));
     }
   }
-  LOG(INFO) << "Run : " << std::hex << reinterpret_cast<intptr_t>(this)
-            << std::dec << ", idx=" << idx << ", bit_map=" << bit_map_str;
+  return bit_map_str.c_str();
+}
+
+std::string RosAlloc::Run::Dump() {
+  size_t idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  std::ostringstream stream;
+  stream << "RosAlloc Run = " << reinterpret_cast<void*>(this)
+         << "{ magic_num=" << static_cast<int>(magic_num_)
+         << " size_bracket_idx=" << idx
+         << " is_thread_local=" << static_cast<int>(is_thread_local_)
+         << " to_be_bulk_freed=" << static_cast<int>(to_be_bulk_freed_)
+         << " top_slot_idx=" << top_slot_idx_
+         << " alloc_bit_map=" << BitMapToStr(alloc_bit_map_, num_vec)
+         << " bulk_free_bit_map=" << BitMapToStr(BulkFreeBitMap(), num_vec)
+         << " thread_local_bit_map=" << BitMapToStr(ThreadLocalFreeBitMap(), num_vec)
+         << " }" << std::endl;
+  return stream.str();
 }
 
 void* RosAlloc::Run::AllocSlot() {
@@ -849,7 +866,7 @@
   size_t num_vec = RoundUp(num_slots, 32) / 32;
   bool changed = false;
   uint32_t* vecp = &alloc_bit_map_[0];
-  uint32_t* tl_free_vecp = &thread_local_free_bit_map()[0];
+  uint32_t* tl_free_vecp = &ThreadLocalFreeBitMap()[0];
   bool is_all_free_after = true;
   for (size_t v = 0; v < num_vec; v++, vecp++, tl_free_vecp++) {
     uint32_t tl_free_vec = *tl_free_vecp;
@@ -881,7 +898,7 @@
   size_t num_slots = numOfSlots[idx];
   size_t num_vec = RoundUp(num_slots, 32) / 32;
   uint32_t* vecp = &alloc_bit_map_[0];
-  uint32_t* free_vecp = &bulk_free_bit_map()[0];
+  uint32_t* free_vecp = &BulkFreeBitMap()[0];
   for (size_t v = 0; v < num_vec; v++, vecp++, free_vecp++) {
     uint32_t free_vec = *free_vecp;
     if (free_vec != 0) {
@@ -898,8 +915,8 @@
   byte idx = size_bracket_idx_;
   size_t num_slots = numOfSlots[idx];
   size_t num_vec = RoundUp(num_slots, 32) / 32;
-  uint32_t* to_vecp = &thread_local_free_bit_map()[0];
-  uint32_t* from_vecp = &bulk_free_bit_map()[0];
+  uint32_t* to_vecp = &ThreadLocalFreeBitMap()[0];
+  uint32_t* from_vecp = &BulkFreeBitMap()[0];
   for (size_t v = 0; v < num_vec; v++, to_vecp++, from_vecp++) {
     uint32_t from_vec = *from_vecp;
     if (from_vec != 0) {
@@ -912,11 +929,11 @@
 
 inline void RosAlloc::Run::MarkThreadLocalFreeBitMap(void* ptr) {
   DCHECK_NE(is_thread_local_, 0);
-  MarkFreeBitMapShared(ptr, thread_local_free_bit_map(), "MarkThreadLocalFreeBitMap");
+  MarkFreeBitMapShared(ptr, ThreadLocalFreeBitMap(), "MarkThreadLocalFreeBitMap");
 }
 
 inline void RosAlloc::Run::MarkBulkFreeBitMap(void* ptr) {
-  MarkFreeBitMapShared(ptr, bulk_free_bit_map(), "MarkFreeBitMap");
+  MarkFreeBitMapShared(ptr, BulkFreeBitMap(), "MarkFreeBitMap");
 }
 
 inline void RosAlloc::Run::MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base,
@@ -975,6 +992,32 @@
   return true;
 }
 
+inline bool RosAlloc::Run::IsBulkFreeBitmapClean() {
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  for (size_t v = 0; v < num_vec; v++) {
+    uint32_t vec = BulkFreeBitMap()[v];
+    if (vec != 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline bool RosAlloc::Run::IsThreadLocalFreeBitmapClean() {
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  for (size_t v = 0; v < num_vec; v++) {
+    uint32_t vec = ThreadLocalFreeBitMap()[v];
+    if (vec != 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
 inline void RosAlloc::Run::ClearBitMaps() {
   byte idx = size_bracket_idx_;
   size_t num_slots = numOfSlots[idx];
@@ -1196,8 +1239,10 @@
   }
 }
 
-void RosAlloc::DumpPageMap(Thread* self) {
-  MutexLock mu(self, lock_);
+std::string RosAlloc::DumpPageMap() {
+  std::ostringstream stream;
+  stream << "RosAlloc PageMap: " << std::endl;
+  lock_.AssertHeld(Thread::Current());
   size_t end = page_map_.size();
   FreePageRun* curr_fpr = NULL;
   size_t curr_fpr_size = 0;
@@ -1218,15 +1263,15 @@
           curr_fpr_size = fpr->ByteSize(this);
           DCHECK_EQ(curr_fpr_size % kPageSize, static_cast<size_t>(0));
           remaining_curr_fpr_size = curr_fpr_size - kPageSize;
-          LOG(INFO) << "[" << i << "]=Empty (FPR start)"
-                    << " fpr_size=" << curr_fpr_size
-                    << " remaining_fpr_size=" << remaining_curr_fpr_size;
+          stream << "[" << i << "]=Empty (FPR start)"
+                 << " fpr_size=" << curr_fpr_size
+                 << " remaining_fpr_size=" << remaining_curr_fpr_size << std::endl;
           if (remaining_curr_fpr_size == 0) {
             // Reset at the end of the current free page run.
             curr_fpr = NULL;
             curr_fpr_size = 0;
           }
-          LOG(INFO) << "curr_fpr=0x" << std::hex << reinterpret_cast<intptr_t>(curr_fpr);
+          stream << "curr_fpr=0x" << std::hex << reinterpret_cast<intptr_t>(curr_fpr) << std::endl;
           DCHECK_EQ(num_running_empty_pages, static_cast<size_t>(0));
         } else {
           // Still part of the current free page run.
@@ -1235,8 +1280,8 @@
           DCHECK_EQ(remaining_curr_fpr_size % kPageSize, static_cast<size_t>(0));
           DCHECK_GE(remaining_curr_fpr_size, static_cast<size_t>(kPageSize));
           remaining_curr_fpr_size -= kPageSize;
-          LOG(INFO) << "[" << i << "]=Empty (FPR part)"
-                    << " remaining_fpr_size=" << remaining_curr_fpr_size;
+          stream << "[" << i << "]=Empty (FPR part)"
+                 << " remaining_fpr_size=" << remaining_curr_fpr_size << std::endl;
           if (remaining_curr_fpr_size == 0) {
             // Reset at the end of the current free page run.
             curr_fpr = NULL;
@@ -1249,36 +1294,38 @@
       case kPageMapLargeObject: {
         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
         num_running_empty_pages = 0;
-        LOG(INFO) << "[" << i << "]=Large (start)";
+        stream << "[" << i << "]=Large (start)" << std::endl;
         break;
       }
       case kPageMapLargeObjectPart:
         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
         num_running_empty_pages = 0;
-        LOG(INFO) << "[" << i << "]=Large (part)";
+        stream << "[" << i << "]=Large (part)" << std::endl;
         break;
       case kPageMapRun: {
         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
         num_running_empty_pages = 0;
         Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
         size_t idx = run->size_bracket_idx_;
-        LOG(INFO) << "[" << i << "]=Run (start)"
-                  << " idx=" << idx
-                  << " numOfPages=" << numOfPages[idx]
-                  << " thread_local=" << static_cast<int>(run->is_thread_local_)
-                  << " is_all_free=" << (run->IsAllFree() ? 1 : 0);
+        stream << "[" << i << "]=Run (start)"
+               << " idx=" << idx
+               << " numOfPages=" << numOfPages[idx]
+               << " thread_local=" << static_cast<int>(run->is_thread_local_)
+               << " is_all_free=" << (run->IsAllFree() ? 1 : 0)
+               << std::endl;
         break;
       }
       case kPageMapRunPart:
         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
         num_running_empty_pages = 0;
-        LOG(INFO) << "[" << i << "]=Run (part)";
+        stream << "[" << i << "]=Run (part)" << std::endl;
         break;
       default:
-        LOG(FATAL) << "Unreachable - page map type: " << pm;
+        stream << "[" << i << "]=Unrecognizable page map type: " << pm;
         break;
     }
   }
+  return stream.str();
 }
 
 size_t RosAlloc::UsableSize(void* ptr) {
@@ -1631,6 +1678,223 @@
   ++(*objects_allocated);
 }
 
+void RosAlloc::Verify() {
+  Thread* self = Thread::Current();
+  CHECK(Locks::mutator_lock_->IsExclusiveHeld(self))
+      << "The mutator locks isn't exclusively locked at RosAlloc::Verify()";
+  MutexLock mu(self, *Locks::thread_list_lock_);
+  WriterMutexLock wmu(self, bulk_free_lock_);
+  std::vector<Run*> runs;
+  {
+    MutexLock mu(self, lock_);
+    size_t pm_end = page_map_.size();
+    size_t i = 0;
+    while (i < pm_end) {
+      byte pm = page_map_[i];
+      switch (pm) {
+        case kPageMapEmpty: {
+          // The start of a free page run.
+          FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
+          DCHECK(fpr->magic_num_ == kMagicNumFree) << "Bad magic number : " << fpr->magic_num_;
+          CHECK(free_page_runs_.find(fpr) != free_page_runs_.end())
+              << "An empty page must belong to the free page run set";
+          size_t fpr_size = fpr->ByteSize(this);
+          CHECK(IsAligned<kPageSize>(fpr_size))
+              << "A free page run size isn't page-aligned : " << fpr_size;
+          size_t num_pages = fpr_size / kPageSize;
+          CHECK_GT(num_pages, static_cast<uintptr_t>(0))
+              << "A free page run size must be > 0 : " << fpr_size;
+          for (size_t j = i + 1; j < i + num_pages; ++j) {
+            CHECK_EQ(page_map_[j], kPageMapEmpty)
+                << "A mismatch between the page map table for kPageMapEmpty "
+                << " at page index " << j
+                << " and the free page run size : page index range : "
+                << i << " to " << (i + num_pages) << std::endl << DumpPageMap();
+          }
+          i += num_pages;
+          CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
+                              << std::endl << DumpPageMap();
+          break;
+        }
+        case kPageMapLargeObject: {
+          // The start of a large object.
+          size_t num_pages = 1;
+          size_t idx = i + 1;
+          while (idx < pm_end && page_map_[idx] == kPageMapLargeObjectPart) {
+            num_pages++;
+            idx++;
+          }
+          void* start = base_ + i * kPageSize;
+          mirror::Object* obj = reinterpret_cast<mirror::Object*>(start);
+          size_t obj_size = obj->SizeOf();
+          CHECK(obj_size > kLargeSizeThreshold)
+              << "A rosalloc large object size must be > " << kLargeSizeThreshold;
+          CHECK_EQ(num_pages, RoundUp(obj_size, kPageSize) / kPageSize)
+              << "A rosalloc large object size " << obj_size
+              << " does not match the page map table " << (num_pages * kPageSize)
+              << std::endl << DumpPageMap();
+          i += num_pages;
+          CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
+                              << std::endl << DumpPageMap();
+          break;
+        }
+        case kPageMapLargeObjectPart:
+          LOG(FATAL) << "Unreachable - page map type: " << pm << std::endl << DumpPageMap();
+          break;
+        case kPageMapRun: {
+          // The start of a run.
+          Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
+          DCHECK(run->magic_num_ == kMagicNum) << "Bad magic number" << run->magic_num_;
+          size_t idx = run->size_bracket_idx_;
+          CHECK(idx < kNumOfSizeBrackets) << "Out of range size bracket index : " << idx;
+          size_t num_pages = numOfPages[idx];
+          CHECK_GT(num_pages, static_cast<uintptr_t>(0))
+              << "Run size must be > 0 : " << num_pages;
+          for (size_t j = i + 1; j < i + num_pages; ++j) {
+            CHECK_EQ(page_map_[j], kPageMapRunPart)
+                << "A mismatch between the page map table for kPageMapRunPart "
+                << " at page index " << j
+                << " and the run size : page index range " << i << " to " << (i + num_pages)
+                << std::endl << DumpPageMap();
+          }
+          runs.push_back(run);
+          i += num_pages;
+          CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
+                              << std::endl << DumpPageMap();
+          break;
+        }
+        case kPageMapRunPart:
+          LOG(FATAL) << "Unreachable - page map type: " << pm << std::endl << DumpPageMap();
+          break;
+        default:
+          LOG(FATAL) << "Unreachable - page map type: " << pm << std::endl << DumpPageMap();
+          break;
+      }
+    }
+  }
+
+  // Call Verify() here for the lock order.
+  for (auto& run : runs) {
+    run->Verify(self, this);
+  }
+}
+
+void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc) {
+  DCHECK(magic_num_ == kMagicNum) << "Bad magic number : " << Dump();
+  size_t idx = size_bracket_idx_;
+  CHECK(idx < kNumOfSizeBrackets) << "Out of range size bracket index : " << Dump();
+  byte* slot_base = reinterpret_cast<byte*>(this) + headerSizes[idx];
+  size_t num_slots = numOfSlots[idx];
+  size_t bracket_size = IndexToBracketSize(idx);
+  CHECK_EQ(slot_base + num_slots * bracket_size,
+           reinterpret_cast<byte*>(this) + numOfPages[idx] * kPageSize)
+      << "Mismatch in the end address of the run " << Dump();
+  // Check that the bulk free bitmap is clean. It's only used during BulkFree().
+  CHECK(IsBulkFreeBitmapClean()) << "The bulk free bit map isn't clean " << Dump();
+  // Check the bump index mode, if it's on.
+  if (top_slot_idx_ < num_slots) {
+    // If the bump index mode is on (top_slot_idx_ < num_slots), then
+    // all of the slots after the top index must be free.
+    for (size_t i = top_slot_idx_; i < num_slots; ++i) {
+      size_t vec_idx = i / 32;
+      size_t vec_off = i % 32;
+      uint32_t vec = alloc_bit_map_[vec_idx];
+      CHECK_EQ((vec & (1 << vec_off)), static_cast<uint32_t>(0))
+          << "A slot >= top_slot_idx_ isn't free " << Dump();
+    }
+  } else {
+    CHECK_EQ(top_slot_idx_, num_slots)
+        << "If the bump index mode is off, the top index == the number of slots "
+        << Dump();
+  }
+  // Check the thread local runs, the current runs, and the run sets.
+  if (is_thread_local_) {
+    // If it's a thread local run, then it must be pointed to by an owner thread.
+    bool owner_found = false;
+    std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
+    for (auto it = thread_list.begin(); it != thread_list.end(); ++it) {
+      Thread* thread = *it;
+      for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+        MutexLock mu(self, *rosalloc->size_bracket_locks_[i]);
+        Run* thread_local_run = reinterpret_cast<Run*>(thread->rosalloc_runs_[i]);
+        if (thread_local_run == this) {
+          CHECK(!owner_found)
+              << "A thread local run has more than one owner thread " << Dump();
+          CHECK_EQ(i, idx)
+              << "A mismatching size bracket index in a thread local run " << Dump();
+          owner_found = true;
+        }
+      }
+    }
+    CHECK(owner_found) << "A thread local run has no owner thread " << Dump();
+  } else {
+    // If it's not thread local, check that the thread local free bitmap is clean.
+    CHECK(IsThreadLocalFreeBitmapClean())
+        << "A non-thread-local run's thread local free bitmap isn't clean "
+        << Dump();
+    // Check if it's a current run for the size bucket.
+    bool is_current_run = false;
+    for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+      MutexLock mu(self, *rosalloc->size_bracket_locks_[i]);
+      Run* current_run = rosalloc->current_runs_[i];
+      if (idx == i) {
+        if (this == current_run) {
+          is_current_run = true;
+        }
+      } else {
+        // If the size bucket index does not match, then it must not
+        // be a current run.
+        CHECK_NE(this, current_run)
+            << "A current run points to a run with a wrong size bracket index " << Dump();
+      }
+    }
+    // If it's neither a thread local or current run, then it must be
+    // in a run set.
+    if (!is_current_run) {
+      MutexLock mu(self, rosalloc->lock_);
+      std::set<Run*>& non_full_runs = rosalloc->non_full_runs_[idx];
+      // If it's all free, it must be a free page run rather than a run.
+      CHECK(!IsAllFree()) << "A free run must be in a free page run set " << Dump();
+      if (!IsFull()) {
+        // If it's not full, it must in the non-full run set.
+        CHECK(non_full_runs.find(this) != non_full_runs.end())
+            << "A non-full run isn't in the non-full run set " << Dump();
+      } else {
+        // If it's full, it must in the full run set (debug build only.)
+        if (kIsDebugBuild) {
+          hash_set<Run*, hash_run, eq_run>& full_runs = rosalloc->full_runs_[idx];
+          CHECK(full_runs.find(this) != full_runs.end())
+              << " A full run isn't in the full run set " << Dump();
+        }
+      }
+    }
+  }
+  // Check each slot.
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  size_t slots = 0;
+  for (size_t v = 0; v < num_vec; v++, slots += 32) {
+    DCHECK(num_slots >= slots) << "Out of bounds";
+    uint32_t vec = alloc_bit_map_[v];
+    uint32_t thread_local_free_vec = ThreadLocalFreeBitMap()[v];
+    size_t end = std::min(num_slots - slots, static_cast<size_t>(32));
+    for (size_t i = 0; i < end; ++i) {
+      bool is_allocated = ((vec >> i) & 0x1) != 0;
+      // If a thread local run, slots may be marked freed in the
+      // thread local free bitmap.
+      bool is_thread_local_freed = is_thread_local_ && ((thread_local_free_vec >> i) & 0x1) != 0;
+      if (is_allocated && !is_thread_local_freed) {
+        byte* slot_addr = slot_base + (slots + i) * bracket_size;
+        mirror::Object* obj = reinterpret_cast<mirror::Object*>(slot_addr);
+        size_t obj_size = obj->SizeOf();
+        CHECK_LE(obj_size, kLargeSizeThreshold)
+            << "A run slot contains a large object " << Dump();
+        CHECK_EQ(SizeToIndex(obj_size), idx)
+            << "A run slot contains an object with wrong size " << Dump();
+      }
+    }
+  }
+}
+
 }  // namespace allocator
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 7480975..c4238c7 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -212,11 +212,11 @@
       return size;
     }
     // Returns the base address of the free bit map.
-    uint32_t* bulk_free_bit_map() {
+    uint32_t* BulkFreeBitMap() {
       return reinterpret_cast<uint32_t*>(reinterpret_cast<byte*>(this) + bulkFreeBitMapOffsets[size_bracket_idx_]);
     }
     // Returns the base address of the thread local free bit map.
-    uint32_t* thread_local_free_bit_map() {
+    uint32_t* ThreadLocalFreeBitMap() {
       return reinterpret_cast<uint32_t*>(reinterpret_cast<byte*>(this) + threadLocalFreeBitMapOffsets[size_bracket_idx_]);
     }
     void* End() {
@@ -248,16 +248,26 @@
     bool IsAllFree();
     // Returns true if all the slots in the run are in use.
     bool IsFull();
+    // Returns true if the bulk free bit map is clean.
+    bool IsBulkFreeBitmapClean();
+    // Returns true if the thread local free bit map is clean.
+    bool IsThreadLocalFreeBitmapClean();
     // Clear all the bit maps.
     void ClearBitMaps();
     // Iterate over all the slots and apply the given function.
     void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg);
     // Dump the run metadata for debugging.
-    void Dump();
+    std::string Dump();
+    // Verify for debugging.
+    void Verify(Thread* self, RosAlloc* rosalloc)
+        EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+        EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_lock_);
 
    private:
     // The common part of MarkFreeBitMap() and MarkThreadLocalFreeBitMap().
     void MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base, const char* caller_name);
+    // Turns the bit map into a string for debugging.
+    static std::string BitMapToStr(uint32_t* bit_map_base, size_t num_vec);
   };
 
   // The magic number for a run.
@@ -531,7 +541,7 @@
   // Releases the thread-local runs assigned to all the threads back to the common set of runs.
   void RevokeAllThreadLocalRuns() LOCKS_EXCLUDED(Locks::thread_list_lock_);
   // Dumps the page map for debugging.
-  void DumpPageMap(Thread* self);
+  std::string DumpPageMap() EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
   // Callbacks for InspectAll that will count the number of bytes
   // allocated and objects allocated, respectively.
@@ -541,6 +551,9 @@
   bool DoesReleaseAllPages() const {
     return page_release_mode_ == kPageReleaseModeAll;
   }
+
+  // Verify for debugging.
+  void Verify() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 };
 
 }  // namespace allocator
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index ae04074..094e274 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -88,14 +88,18 @@
     // Mutator lock may be already exclusively held when we do garbage collections for changing the
     // current collector / allocator during process state updates.
     if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
+      // PreGcRosAllocVerification() is called in Heap::TransitionCollector().
       GetHeap()->RevokeAllThreadLocalBuffers();
       MarkingPhase();
       ReclaimPhase();
+      // PostGcRosAllocVerification() is called in Heap::TransitionCollector().
     } else {
       thread_list->SuspendAll();
+      GetHeap()->PreGcRosAllocVerification(&timings_);
       GetHeap()->RevokeAllThreadLocalBuffers();
       MarkingPhase();
       ReclaimPhase();
+      GetHeap()->PostGcRosAllocVerification(&timings_);
       thread_list->ResumeAll();
     }
     ATRACE_END();
@@ -114,10 +118,12 @@
       thread_list->SuspendAll();
       ATRACE_END();
       ATRACE_BEGIN("All mutator threads suspended");
+      GetHeap()->PreGcRosAllocVerification(&timings_);
       done = HandleDirtyObjectsPhase();
       if (done) {
         GetHeap()->RevokeAllThreadLocalBuffers();
       }
+      GetHeap()->PostGcRosAllocVerification(&timings_);
       ATRACE_END();
       uint64_t pause_end = NanoTime();
       ATRACE_BEGIN("Resuming mutator threads");
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 03307f5..0c6a938 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -182,6 +182,7 @@
     }
   }
   Locks::mutator_lock_->AssertExclusiveHeld(self_);
+
   TimingLogger::ScopedSplit split("MarkingPhase", &timings_);
   // Need to do this with mutators paused so that somebody doesn't accidentally allocate into the
   // wrong space.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index b1bbfc6..62567d7 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -81,7 +81,8 @@
            size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
            size_t long_pause_log_threshold, size_t long_gc_log_threshold,
            bool ignore_max_footprint, bool use_tlab, bool verify_pre_gc_heap,
-           bool verify_post_gc_heap)
+           bool verify_post_gc_heap, bool verify_pre_gc_rosalloc,
+           bool verify_post_gc_rosalloc)
     : non_moving_space_(nullptr),
       rosalloc_space_(nullptr),
       dlmalloc_space_(nullptr),
@@ -124,6 +125,8 @@
       verify_pre_gc_heap_(verify_pre_gc_heap),
       verify_post_gc_heap_(verify_post_gc_heap),
       verify_mod_union_table_(false),
+      verify_pre_gc_rosalloc_(verify_pre_gc_rosalloc),
+      verify_post_gc_rosalloc_(verify_post_gc_rosalloc),
       last_trim_time_ms_(0),
       allocation_rate_(0),
       /* For GC a lot mode, we limit the allocations stacks to be kGcAlotInterval allocations. This
@@ -1058,18 +1061,18 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       : classes_(classes), use_is_assignable_from_(use_is_assignable_from), counts_(counts) {
   }
-
-  void operator()(mirror::Object* o) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    for (size_t i = 0; i < classes_.size(); ++i) {
-      mirror::Class* instance_class = o->GetClass();
-      if (use_is_assignable_from_) {
-        if (instance_class != NULL && classes_[i]->IsAssignableFrom(instance_class)) {
-          ++counts_[i];
+  static void Callback(mirror::Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    InstanceCounter* instance_counter = reinterpret_cast<InstanceCounter*>(arg);
+    mirror::Class* instance_class = obj->GetClass();
+    CHECK(instance_class != nullptr);
+    for (size_t i = 0; i < instance_counter->classes_.size(); ++i) {
+      if (instance_counter->use_is_assignable_from_) {
+        if (instance_counter->classes_[i]->IsAssignableFrom(instance_class)) {
+          ++instance_counter->counts_[i];
         }
-      } else {
-        if (instance_class == classes_[i]) {
-          ++counts_[i];
-        }
+      } else if (instance_class == instance_counter->classes_[i]) {
+        ++instance_counter->counts_[i];
       }
     }
   }
@@ -1078,22 +1081,18 @@
   const std::vector<mirror::Class*>& classes_;
   bool use_is_assignable_from_;
   uint64_t* const counts_;
-
   DISALLOW_COPY_AND_ASSIGN(InstanceCounter);
 };
 
 void Heap::CountInstances(const std::vector<mirror::Class*>& classes, bool use_is_assignable_from,
                           uint64_t* counts) {
-  // We only want reachable instances, so do a GC. This also ensures that the alloc stack
-  // is empty, so the live bitmap is the only place we need to look.
+  // Can't do any GC in this function since this may move classes.
   Thread* self = Thread::Current();
-  self->TransitionFromRunnableToSuspended(kNative);
-  CollectGarbage(false);
-  self->TransitionFromSuspendedToRunnable();
-
+  auto* old_cause = self->StartAssertNoThreadSuspension("CountInstances");
   InstanceCounter counter(classes, use_is_assignable_from, counts);
-  ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  GetLiveBitmap()->Visit(counter);
+  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  VisitObjects(InstanceCounter::Callback, &counter);
+  self->EndAssertNoThreadSuspension(old_cause);
 }
 
 class InstanceCollector {
@@ -1102,12 +1101,15 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       : class_(c), max_count_(max_count), instances_(instances) {
   }
-
-  void operator()(mirror::Object* o) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::Class* instance_class = o->GetClass();
-    if (instance_class == class_) {
-      if (max_count_ == 0 || instances_.size() < max_count_) {
-        instances_.push_back(o);
+  static void Callback(mirror::Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    DCHECK(arg != nullptr);
+    InstanceCollector* instance_collector = reinterpret_cast<InstanceCollector*>(arg);
+    mirror::Class* instance_class = obj->GetClass();
+    if (instance_class == instance_collector->class_) {
+      if (instance_collector->max_count_ == 0 ||
+          instance_collector->instances_.size() < instance_collector->max_count_) {
+        instance_collector->instances_.push_back(obj);
       }
     }
   }
@@ -1116,22 +1118,18 @@
   mirror::Class* class_;
   uint32_t max_count_;
   std::vector<mirror::Object*>& instances_;
-
   DISALLOW_COPY_AND_ASSIGN(InstanceCollector);
 };
 
 void Heap::GetInstances(mirror::Class* c, int32_t max_count,
                         std::vector<mirror::Object*>& instances) {
-  // We only want reachable instances, so do a GC. This also ensures that the alloc stack
-  // is empty, so the live bitmap is the only place we need to look.
+  // Can't do any GC in this function since this may move classes.
   Thread* self = Thread::Current();
-  self->TransitionFromRunnableToSuspended(kNative);
-  CollectGarbage(false);
-  self->TransitionFromSuspendedToRunnable();
-
+  auto* old_cause = self->StartAssertNoThreadSuspension("GetInstances");
   InstanceCollector collector(c, max_count, instances);
-  ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  GetLiveBitmap()->Visit(collector);
+  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  VisitObjects(&InstanceCollector::Callback, &collector);
+  self->EndAssertNoThreadSuspension(old_cause);
 }
 
 class ReferringObjectsFinder {
@@ -1142,6 +1140,11 @@
       : object_(object), max_count_(max_count), referring_objects_(referring_objects) {
   }
 
+  static void Callback(mirror::Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    reinterpret_cast<ReferringObjectsFinder*>(arg)->operator()(obj);
+  }
+
   // For bitmap Visit.
   // TODO: Fix lock analysis to not use NO_THREAD_SAFETY_ANALYSIS, requires support for
   // annotalysis on visitors.
@@ -1161,22 +1164,18 @@
   mirror::Object* object_;
   uint32_t max_count_;
   std::vector<mirror::Object*>& referring_objects_;
-
   DISALLOW_COPY_AND_ASSIGN(ReferringObjectsFinder);
 };
 
 void Heap::GetReferringObjects(mirror::Object* o, int32_t max_count,
                                std::vector<mirror::Object*>& referring_objects) {
-  // We only want reachable instances, so do a GC. This also ensures that the alloc stack
-  // is empty, so the live bitmap is the only place we need to look.
+  // Can't do any GC in this function since this may move classes.
   Thread* self = Thread::Current();
-  self->TransitionFromRunnableToSuspended(kNative);
-  CollectGarbage(false);
-  self->TransitionFromSuspendedToRunnable();
-
+  auto* old_cause = self->StartAssertNoThreadSuspension("GetReferringObjects");
   ReferringObjectsFinder finder(o, max_count, referring_objects);
-  ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  GetLiveBitmap()->Visit(finder);
+  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  VisitObjects(&ReferringObjectsFinder::Callback, &finder);
+  self->EndAssertNoThreadSuspension(old_cause);
 }
 
 void Heap::CollectGarbage(bool clear_soft_references) {
@@ -1189,6 +1188,8 @@
   if (collector_type == collector_type_) {
     return;
   }
+  VLOG(heap) << "TransitionCollector: " << static_cast<int>(collector_type_)
+             << " -> " << static_cast<int>(collector_type);
   uint64_t start_time = NanoTime();
   uint32_t before_size  = GetTotalMemory();
   uint32_t before_allocated = num_bytes_allocated_.Load();
@@ -1216,6 +1217,7 @@
     usleep(1000);
   }
   tl->SuspendAll();
+  PreGcRosAllocVerification(&semi_space_collector_->GetTimings());
   switch (collector_type) {
     case kCollectorTypeSS:
       // Fall-through.
@@ -1265,6 +1267,7 @@
     }
   }
   ChangeCollector(collector_type);
+  PostGcRosAllocVerification(&semi_space_collector_->GetTimings());
   tl->ResumeAll();
   // Can't call into java code with all threads suspended.
   EnqueueClearedReferences();
@@ -1447,6 +1450,9 @@
   ChangeCollector(post_zygote_collector_type_);
   // TODO: Delete bump_pointer_space_ and temp_pointer_space_?
   if (semi_space_collector_ != nullptr) {
+    // Temporarily disable rosalloc verification because the zygote
+    // compaction will mess up the rosalloc internal metadata.
+    ScopedDisableRosAllocVerification disable_rosalloc_verif(this);
     ZygoteCompactingCollector zygote_collector(this);
     zygote_collector.BuildBins(non_moving_space_);
     // Create a new bump pointer space which we will compact into.
@@ -2108,6 +2114,32 @@
   }
 }
 
+void Heap::PreGcRosAllocVerification(TimingLogger* timings) {
+  if (verify_pre_gc_rosalloc_) {
+    TimingLogger::ScopedSplit split("PreGcRosAllocVerification", timings);
+    for (const auto& space : continuous_spaces_) {
+      if (space->IsRosAllocSpace()) {
+        VLOG(heap) << "PreGcRosAllocVerification : " << space->GetName();
+        space::RosAllocSpace* rosalloc_space = space->AsRosAllocSpace();
+        rosalloc_space->Verify();
+      }
+    }
+  }
+}
+
+void Heap::PostGcRosAllocVerification(TimingLogger* timings) {
+  if (verify_post_gc_rosalloc_) {
+    TimingLogger::ScopedSplit split("PostGcRosAllocVerification", timings);
+    for (const auto& space : continuous_spaces_) {
+      if (space->IsRosAllocSpace()) {
+        VLOG(heap) << "PostGcRosAllocVerification : " << space->GetName();
+        space::RosAllocSpace* rosalloc_space = space->AsRosAllocSpace();
+        rosalloc_space->Verify();
+      }
+    }
+  }
+}
+
 collector::GcType Heap::WaitForGcToComplete(Thread* self) {
   ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
   MutexLock mu(self, *gc_complete_lock_);
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 499d27c..476ceee 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -150,7 +150,8 @@
                 size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
                 size_t long_pause_threshold, size_t long_gc_threshold,
                 bool ignore_max_footprint, bool use_tlab, bool verify_pre_gc_heap,
-                bool verify_post_gc_heap);
+                bool verify_post_gc_heap, bool verify_pre_gc_rosalloc,
+                bool verify_post_gc_rosalloc);
 
   ~Heap();
 
@@ -248,7 +249,7 @@
   void DecrementDisableMovingGC(Thread* self);
 
   // Initiates an explicit garbage collection.
-  void CollectGarbage(bool clear_soft_references) LOCKS_EXCLUDED(Locks::mutator_lock_);
+  void CollectGarbage(bool clear_soft_references);
 
   // Does a concurrent GC, should only be called by the GC daemon thread
   // through runtime.
@@ -440,6 +441,11 @@
   void RevokeThreadLocalBuffers(Thread* thread);
   void RevokeAllThreadLocalBuffers();
 
+  void PreGcRosAllocVerification(TimingLogger* timings)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void PostGcRosAllocVerification(TimingLogger* timings)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   accounting::HeapBitmap* GetLiveBitmap() SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
     return live_bitmap_.get();
   }
@@ -796,6 +802,29 @@
   const bool verify_pre_gc_heap_;
   const bool verify_post_gc_heap_;
   const bool verify_mod_union_table_;
+  bool verify_pre_gc_rosalloc_;
+  bool verify_post_gc_rosalloc_;
+
+  // RAII that temporarily disables the rosalloc verification during
+  // the zygote fork.
+  class ScopedDisableRosAllocVerification {
+   private:
+    Heap* heap_;
+    bool orig_verify_pre_gc_;
+    bool orig_verify_post_gc_;
+   public:
+    explicit ScopedDisableRosAllocVerification(Heap* heap)
+        : heap_(heap),
+          orig_verify_pre_gc_(heap_->verify_pre_gc_rosalloc_),
+          orig_verify_post_gc_(heap_->verify_post_gc_rosalloc_) {
+      heap_->verify_pre_gc_rosalloc_ = false;
+      heap_->verify_post_gc_rosalloc_ = false;
+    }
+    ~ScopedDisableRosAllocVerification() {
+      heap_->verify_pre_gc_rosalloc_ = orig_verify_pre_gc_;
+      heap_->verify_post_gc_rosalloc_ = orig_verify_post_gc_;
+    }
+  };
 
   // Parallel GC data structures.
   UniquePtr<ThreadPool> thread_pool_;
diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h
index 4cd5a6d..2377423 100644
--- a/runtime/gc/space/rosalloc_space.h
+++ b/runtime/gc/space/rosalloc_space.h
@@ -104,6 +104,10 @@
     return this;
   }
 
+  void Verify() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    rosalloc_->Verify();
+  }
+
  protected:
   RosAllocSpace(const std::string& name, MemMap* mem_map, allocator::RosAlloc* rosalloc,
                 byte* begin, byte* end, byte* limit, size_t growth_limit);
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 02a9aa6..922e642 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -57,7 +57,10 @@
   } else if (name == "int java.lang.String.fastIndexOf(int, int)") {
     result->SetI(receiver->AsString()->FastIndexOf(args[0], args[1]));
   } else if (name == "java.lang.Object java.lang.reflect.Array.createMultiArray(java.lang.Class, int[])") {
-    result->SetL(Array::CreateMultiArray(self, reinterpret_cast<Object*>(args[0])->AsClass(), reinterpret_cast<Object*>(args[1])->AsIntArray()));
+    SirtRef<mirror::Class> sirt_class(self, reinterpret_cast<Object*>(args[0])->AsClass());
+    SirtRef<mirror::IntArray> sirt_dimensions(self,
+                                              reinterpret_cast<Object*>(args[1])->AsIntArray());
+    result->SetL(Array::CreateMultiArray(self, sirt_class, sirt_dimensions));
   } else if (name == "java.lang.Object java.lang.Throwable.nativeFillInStackTrace()") {
     ScopedObjectAccessUnchecked soa(self);
     result->SetL(soa.Decode<Object*>(self->CreateInternalStackTrace(soa)));
diff --git a/runtime/jdwp/object_registry.cc b/runtime/jdwp/object_registry.cc
index 369eddd..40ba3e3 100644
--- a/runtime/jdwp/object_registry.cc
+++ b/runtime/jdwp/object_registry.cc
@@ -31,7 +31,8 @@
 }
 
 ObjectRegistry::ObjectRegistry()
-    : lock_("ObjectRegistry lock", kJdwpObjectRegistryLock), next_id_(1) {
+    : lock_("ObjectRegistry lock", kJdwpObjectRegistryLock), allow_new_objects_(true),
+      condition_("object registry condition", lock_), next_id_(1) {
 }
 
 JDWP::RefTypeId ObjectRegistry::AddRefType(mirror::Class* c) {
@@ -49,58 +50,59 @@
 
   ScopedObjectAccessUnchecked soa(Thread::Current());
   MutexLock mu(soa.Self(), lock_);
-  ObjectRegistryEntry dummy;
-  dummy.jni_reference_type = JNIWeakGlobalRefType;
-  dummy.jni_reference = NULL;
-  dummy.reference_count = 0;
-  dummy.id = 0;
-  std::pair<object_iterator, bool> result = object_to_entry_.insert(std::make_pair(o, dummy));
-  ObjectRegistryEntry& entry = result.first->second;
-  if (!result.second) {
-    // This object was already in our map.
-    entry.reference_count += 1;
-    return entry.id;
+  while (UNLIKELY(!allow_new_objects_)) {
+    condition_.WaitHoldingLocks(soa.Self());
   }
+  ObjectRegistryEntry* entry;
+  auto it = object_to_entry_.find(o);
+  if (it != object_to_entry_.end()) {
+    // This object was already in our map.
+    entry = it->second;
+    ++entry->reference_count;
+  } else {
+    entry = new ObjectRegistryEntry;
+    entry->jni_reference_type = JNIWeakGlobalRefType;
+    entry->jni_reference = nullptr;
+    entry->reference_count = 0;
+    entry->id = 0;
+    object_to_entry_.insert(std::make_pair(o, entry));
 
-  // This object isn't in the registry yet, so add it.
-  JNIEnv* env = soa.Env();
+    // This object isn't in the registry yet, so add it.
+    JNIEnv* env = soa.Env();
 
-  jobject local_reference = soa.AddLocalReference<jobject>(o);
+    jobject local_reference = soa.AddLocalReference<jobject>(o);
 
-  entry.jni_reference_type = JNIWeakGlobalRefType;
-  entry.jni_reference = env->NewWeakGlobalRef(local_reference);
-  entry.reference_count = 1;
-  entry.id = next_id_++;
+    entry->jni_reference_type = JNIWeakGlobalRefType;
+    entry->jni_reference = env->NewWeakGlobalRef(local_reference);
+    entry->reference_count = 1;
+    entry->id = next_id_++;
 
-  id_to_entry_.Put(entry.id, &entry);
+    id_to_entry_.Put(entry->id, entry);
 
-  env->DeleteLocalRef(local_reference);
-
-  return entry.id;
+    env->DeleteLocalRef(local_reference);
+  }
+  return entry->id;
 }
 
 bool ObjectRegistry::Contains(mirror::Object* o) {
-  Thread* self = Thread::Current();
-  MutexLock mu(self, lock_);
-  return (object_to_entry_.find(o) != object_to_entry_.end());
+  MutexLock mu(Thread::Current(), lock_);
+  return object_to_entry_.find(o) != object_to_entry_.end();
 }
 
 void ObjectRegistry::Clear() {
   Thread* self = Thread::Current();
   MutexLock mu(self, lock_);
   VLOG(jdwp) << "Object registry contained " << object_to_entry_.size() << " entries";
-
   // Delete all the JNI references.
   JNIEnv* env = self->GetJniEnv();
-  for (object_iterator it = object_to_entry_.begin(); it != object_to_entry_.end(); ++it) {
-    ObjectRegistryEntry& entry = (it->second);
+  for (const auto& pair : object_to_entry_) {
+    const ObjectRegistryEntry& entry = *pair.second;
     if (entry.jni_reference_type == JNIWeakGlobalRefType) {
       env->DeleteWeakGlobalRef(entry.jni_reference);
     } else {
       env->DeleteGlobalRef(entry.jni_reference);
     }
   }
-
   // Clear the maps.
   object_to_entry_.clear();
   id_to_entry_.clear();
@@ -109,11 +111,11 @@
 mirror::Object* ObjectRegistry::InternalGet(JDWP::ObjectId id) {
   Thread* self = Thread::Current();
   MutexLock mu(self, lock_);
-  id_iterator it = id_to_entry_.find(id);
+  auto it = id_to_entry_.find(id);
   if (it == id_to_entry_.end()) {
     return kInvalidObject;
   }
-  ObjectRegistryEntry& entry = *(it->second);
+  ObjectRegistryEntry& entry = *it->second;
   return self->DecodeJObject(entry.jni_reference);
 }
 
@@ -123,26 +125,26 @@
   }
   Thread* self = Thread::Current();
   MutexLock mu(self, lock_);
-  id_iterator it = id_to_entry_.find(id);
+  auto it = id_to_entry_.find(id);
   CHECK(it != id_to_entry_.end()) << id;
-  ObjectRegistryEntry& entry = *(it->second);
+  ObjectRegistryEntry& entry = *it->second;
   return entry.jni_reference;
 }
 
 void ObjectRegistry::DisableCollection(JDWP::ObjectId id) {
   Thread* self = Thread::Current();
   MutexLock mu(self, lock_);
-  id_iterator it = id_to_entry_.find(id);
+  auto it = id_to_entry_.find(id);
   CHECK(it != id_to_entry_.end());
-  Promote(*(it->second));
+  Promote(*it->second);
 }
 
 void ObjectRegistry::EnableCollection(JDWP::ObjectId id) {
   Thread* self = Thread::Current();
   MutexLock mu(self, lock_);
-  id_iterator it = id_to_entry_.find(id);
+  auto it = id_to_entry_.find(id);
   CHECK(it != id_to_entry_.end());
-  Demote(*(it->second));
+  Demote(*it->second);
 }
 
 void ObjectRegistry::Demote(ObjectRegistryEntry& entry) {
@@ -170,10 +172,9 @@
 bool ObjectRegistry::IsCollected(JDWP::ObjectId id) {
   Thread* self = Thread::Current();
   MutexLock mu(self, lock_);
-  id_iterator it = id_to_entry_.find(id);
+  auto it = id_to_entry_.find(id);
   CHECK(it != id_to_entry_.end());
-
-  ObjectRegistryEntry& entry = *(it->second);
+  ObjectRegistryEntry& entry = *it->second;
   if (entry.jni_reference_type == JNIWeakGlobalRefType) {
     JNIEnv* env = self->GetJniEnv();
     return env->IsSameObject(entry.jni_reference, NULL);  // Has the jweak been collected?
@@ -185,24 +186,55 @@
 void ObjectRegistry::DisposeObject(JDWP::ObjectId id, uint32_t reference_count) {
   Thread* self = Thread::Current();
   MutexLock mu(self, lock_);
-  id_iterator it = id_to_entry_.find(id);
+  auto it = id_to_entry_.find(id);
   if (it == id_to_entry_.end()) {
     return;
   }
-
-  ObjectRegistryEntry& entry = *(it->second);
-  entry.reference_count -= reference_count;
-  if (entry.reference_count <= 0) {
+  ObjectRegistryEntry* entry = it->second;
+  entry->reference_count -= reference_count;
+  if (entry->reference_count <= 0) {
     JNIEnv* env = self->GetJniEnv();
-    mirror::Object* object = self->DecodeJObject(entry.jni_reference);
-    if (entry.jni_reference_type == JNIWeakGlobalRefType) {
-      env->DeleteWeakGlobalRef(entry.jni_reference);
+    mirror::Object* object = self->DecodeJObject(entry->jni_reference);
+    if (entry->jni_reference_type == JNIWeakGlobalRefType) {
+      env->DeleteWeakGlobalRef(entry->jni_reference);
     } else {
-      env->DeleteGlobalRef(entry.jni_reference);
+      env->DeleteGlobalRef(entry->jni_reference);
     }
     object_to_entry_.erase(object);
     id_to_entry_.erase(id);
+    delete entry;
   }
 }
 
+void ObjectRegistry::UpdateObjectPointers(RootVisitor visitor, void* arg) {
+  MutexLock mu(Thread::Current(), lock_);
+  if (object_to_entry_.empty()) {
+    return;
+  }
+  std::map<mirror::Object*, ObjectRegistryEntry*> new_object_to_entry;
+  for (auto& pair : object_to_entry_) {
+    mirror::Object* new_obj;
+    if (pair.first != nullptr) {
+      new_obj = visitor(pair.first, arg);
+      if (new_obj != nullptr) {
+        new_object_to_entry.insert(std::make_pair(new_obj, pair.second));
+      }
+    }
+  }
+  object_to_entry_ = new_object_to_entry;
+}
+
+void ObjectRegistry::AllowNewObjects() {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, lock_);
+  allow_new_objects_ = true;
+  condition_.Broadcast(self);
+}
+
+void ObjectRegistry::DisallowNewObjects() {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, lock_);
+  allow_new_objects_ = false;
+}
+
 }  // namespace art
diff --git a/runtime/jdwp/object_registry.h b/runtime/jdwp/object_registry.h
index 7f162ca..0190575 100644
--- a/runtime/jdwp/object_registry.h
+++ b/runtime/jdwp/object_registry.h
@@ -26,6 +26,7 @@
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
+#include "root_visitor.h"
 #include "safe_map.h"
 
 namespace art {
@@ -83,6 +84,15 @@
   // Avoid using this and use standard Get when possible.
   jobject GetJObject(JDWP::ObjectId id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Visit, objects are treated as system weaks.
+  void UpdateObjectPointers(RootVisitor visitor, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // We have allow / disallow functionality since we use system weak sweeping logic to update moved
+  // objects inside of the object_to_entry_ map.
+  void AllowNewObjects() LOCKS_EXCLUDED(lock_);
+  void DisallowNewObjects() LOCKS_EXCLUDED(lock_);
+
  private:
   JDWP::ObjectId InternalAdd(mirror::Object* o) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   mirror::Object* InternalGet(JDWP::ObjectId id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -90,11 +100,10 @@
   void Promote(ObjectRegistryEntry& entry) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, lock_);
 
   Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  bool allow_new_objects_ GUARDED_BY(lock_);
+  ConditionVariable condition_ GUARDED_BY(lock_);
 
-  typedef std::map<mirror::Object*, ObjectRegistryEntry>::iterator object_iterator;
-  std::map<mirror::Object*, ObjectRegistryEntry> object_to_entry_ GUARDED_BY(lock_);
-
-  typedef SafeMap<JDWP::ObjectId, ObjectRegistryEntry*>::iterator id_iterator;
+  std::map<mirror::Object*, ObjectRegistryEntry*> object_to_entry_ GUARDED_BY(lock_);
   SafeMap<JDWP::ObjectId, ObjectRegistryEntry*> id_to_entry_ GUARDED_BY(lock_);
 
   size_t next_id_ GUARDED_BY(lock_);
diff --git a/runtime/mirror/array.cc b/runtime/mirror/array.cc
index ca0d1f3..c23234e 100644
--- a/runtime/mirror/array.cc
+++ b/runtime/mirror/array.cc
@@ -40,23 +40,25 @@
 // piece and work our way in.
 // Recursively create an array with multiple dimensions.  Elements may be
 // Objects or primitive types.
-static Array* RecursiveCreateMultiArray(Thread* self, Class* array_class, int current_dimension,
-                                        SirtRef<mirror::IntArray>& dimensions)
+static Array* RecursiveCreateMultiArray(Thread* self,
+                                        const SirtRef<Class>& array_class, int current_dimension,
+                                        const SirtRef<mirror::IntArray>& dimensions)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   int32_t array_length = dimensions->Get(current_dimension);
-  SirtRef<Array> new_array(self, Array::Alloc<true>(self, array_class, array_length));
-  if (UNLIKELY(new_array.get() == NULL)) {
+  SirtRef<Array> new_array(self, Array::Alloc<true>(self, array_class.get(), array_length));
+  if (UNLIKELY(new_array.get() == nullptr)) {
     CHECK(self->IsExceptionPending());
-    return NULL;
+    return nullptr;
   }
   if (current_dimension + 1 < dimensions->GetLength()) {
     // Create a new sub-array in every element of the array.
     for (int32_t i = 0; i < array_length; i++) {
-      Array* sub_array = RecursiveCreateMultiArray(self, array_class->GetComponentType(),
+      SirtRef<mirror::Class> sirt_component_type(self, array_class->GetComponentType());
+      Array* sub_array = RecursiveCreateMultiArray(self, sirt_component_type,
                                                    current_dimension + 1, dimensions);
-      if (UNLIKELY(sub_array == NULL)) {
+      if (UNLIKELY(sub_array == nullptr)) {
         CHECK(self->IsExceptionPending());
-        return NULL;
+        return nullptr;
       }
       new_array->AsObjectArray<Array>()->Set(i, sub_array);
     }
@@ -64,7 +66,8 @@
   return new_array.get();
 }
 
-Array* Array::CreateMultiArray(Thread* self, Class* element_class, IntArray* dimensions) {
+Array* Array::CreateMultiArray(Thread* self, const SirtRef<Class>& element_class,
+                               const SirtRef<IntArray>& dimensions) {
   // Verify dimensions.
   //
   // The caller is responsible for verifying that "dimArray" is non-null
@@ -77,28 +80,27 @@
     int dimension = dimensions->Get(i);
     if (UNLIKELY(dimension < 0)) {
       ThrowNegativeArraySizeException(StringPrintf("Dimension %d: %d", i, dimension).c_str());
-      return NULL;
+      return nullptr;
     }
   }
 
   // Generate the full name of the array class.
   std::string descriptor(num_dimensions, '[');
-  descriptor += ClassHelper(element_class).GetDescriptor();
+  descriptor += ClassHelper(element_class.get()).GetDescriptor();
 
   // Find/generate the array class.
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   SirtRef<mirror::ClassLoader> class_loader(self, element_class->GetClassLoader());
-  Class* array_class = class_linker->FindClass(descriptor.c_str(), class_loader);
-  if (UNLIKELY(array_class == NULL)) {
+  SirtRef<mirror::Class> array_class(self,
+                                     class_linker->FindClass(descriptor.c_str(), class_loader));
+  if (UNLIKELY(array_class.get() == nullptr)) {
     CHECK(self->IsExceptionPending());
-    return NULL;
+    return nullptr;
   }
   // create the array
-  SirtRef<mirror::IntArray> sirt_dimensions(self, dimensions);
-  Array* new_array = RecursiveCreateMultiArray(self, array_class, 0, sirt_dimensions);
-  if (UNLIKELY(new_array == NULL)) {
+  Array* new_array = RecursiveCreateMultiArray(self, array_class, 0, dimensions);
+  if (UNLIKELY(new_array == nullptr)) {
     CHECK(self->IsExceptionPending());
-    return NULL;
   }
   return new_array;
 }
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 6e366a0..04f03c3 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -47,7 +47,8 @@
                       size_t component_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static Array* CreateMultiArray(Thread* self, Class* element_class, IntArray* dimensions)
+  static Array* CreateMultiArray(Thread* self, const SirtRef<Class>& element_class,
+                                 const SirtRef<IntArray>& dimensions)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   size_t SizeOf() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index afa4112..b994354 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -240,7 +240,7 @@
   return result;
 }
 
-inline uint32_t Object::GetField32(MemberOffset field_offset, bool is_volatile) {
+inline int32_t Object::GetField32(MemberOffset field_offset, bool is_volatile) {
   VerifyObject(this);
   const byte* raw_addr = reinterpret_cast<const byte*>(this) + field_offset.Int32Value();
   const int32_t* word_addr = reinterpret_cast<const int32_t*>(raw_addr);
@@ -253,13 +253,13 @@
   }
 }
 
-inline void Object::SetField32(MemberOffset field_offset, uint32_t new_value, bool is_volatile,
+inline void Object::SetField32(MemberOffset field_offset, int32_t new_value, bool is_volatile,
                                bool this_is_valid) {
   if (this_is_valid) {
     VerifyObject(this);
   }
   byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
-  uint32_t* word_addr = reinterpret_cast<uint32_t*>(raw_addr);
+  int32_t* word_addr = reinterpret_cast<int32_t*>(raw_addr);
   if (UNLIKELY(is_volatile)) {
     QuasiAtomic::MembarStoreStore();  // Ensure this store occurs after others in the queue.
     *word_addr = new_value;
@@ -269,19 +269,19 @@
   }
 }
 
-inline bool Object::CasField32(MemberOffset field_offset, uint32_t old_value, uint32_t new_value) {
+inline bool Object::CasField32(MemberOffset field_offset, int32_t old_value, int32_t new_value) {
   VerifyObject(this);
   byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
-  volatile uint32_t* addr = reinterpret_cast<volatile uint32_t*>(raw_addr);
+  volatile int32_t* addr = reinterpret_cast<volatile int32_t*>(raw_addr);
   return __sync_bool_compare_and_swap(addr, old_value, new_value);
 }
 
-inline uint64_t Object::GetField64(MemberOffset field_offset, bool is_volatile) {
+inline int64_t Object::GetField64(MemberOffset field_offset, bool is_volatile) {
   VerifyObject(this);
   const byte* raw_addr = reinterpret_cast<const byte*>(this) + field_offset.Int32Value();
   const int64_t* addr = reinterpret_cast<const int64_t*>(raw_addr);
   if (UNLIKELY(is_volatile)) {
-    uint64_t result = QuasiAtomic::Read64(addr);
+    int64_t result = QuasiAtomic::Read64(addr);
     QuasiAtomic::MembarLoadLoad();  // Ensure volatile loads don't re-order.
     return result;
   } else {
@@ -289,7 +289,7 @@
   }
 }
 
-inline void Object::SetField64(MemberOffset field_offset, uint64_t new_value, bool is_volatile,
+inline void Object::SetField64(MemberOffset field_offset, int64_t new_value, bool is_volatile,
                                bool this_is_valid) {
   if (this_is_valid) {
     VerifyObject(this);
@@ -309,11 +309,11 @@
   }
 }
 
-inline bool Object::CasField64(MemberOffset field_offset, uint64_t old_value, uint64_t new_value) {
+inline bool Object::CasField64(MemberOffset field_offset, int64_t old_value, int64_t new_value) {
   VerifyObject(this);
   byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
-  volatile uint64_t* addr = reinterpret_cast<volatile uint64_t*>(raw_addr);
-  return __sync_bool_compare_and_swap(addr, old_value, new_value);
+  volatile int64_t* addr = reinterpret_cast<volatile int64_t*>(raw_addr);
+  return QuasiAtomic::Cas64(old_value, new_value, addr);
 }
 
 template<class T>
@@ -361,7 +361,7 @@
 inline bool Object::CasFieldObject(MemberOffset field_offset, Object* old_value, Object* new_value) {
   VerifyObject(this);
   byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
-  volatile uint32_t* addr = reinterpret_cast<volatile uint32_t*>(raw_addr);
+  volatile int32_t* addr = reinterpret_cast<volatile int32_t*>(raw_addr);
   HeapReference<Object> old_ref(HeapReference<Object>::FromMirrorPtr(old_value));
   HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(new_value));
   bool success =  __sync_bool_compare_and_swap(addr, old_ref.reference_, new_ref.reference_);
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 6fe8b73..c42750f 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -170,29 +170,29 @@
         field_offset.Int32Value());
   }
 
-  uint32_t GetField32(MemberOffset field_offset, bool is_volatile);
+  int32_t GetField32(MemberOffset field_offset, bool is_volatile);
 
-  void SetField32(MemberOffset field_offset, uint32_t new_value, bool is_volatile,
+  void SetField32(MemberOffset field_offset, int32_t new_value, bool is_volatile,
                   bool this_is_valid = true);
 
-  bool CasField32(MemberOffset field_offset, uint32_t old_value, uint32_t new_value)
+  bool CasField32(MemberOffset field_offset, int32_t old_value, int32_t new_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint64_t GetField64(MemberOffset field_offset, bool is_volatile);
+  int64_t GetField64(MemberOffset field_offset, bool is_volatile);
 
-  void SetField64(MemberOffset field_offset, uint64_t new_value, bool is_volatile,
+  void SetField64(MemberOffset field_offset, int64_t new_value, bool is_volatile,
                   bool this_is_valid = true);
 
-  bool CasField64(MemberOffset field_offset, uint64_t old_value, uint64_t new_value)
+  bool CasField64(MemberOffset field_offset, int64_t old_value, int64_t new_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<typename T>
   void SetFieldPtr(MemberOffset field_offset, T new_value, bool is_volatile,
                    bool this_is_valid = true) {
 #ifndef __LP64__
-    SetField32(field_offset, reinterpret_cast<uint32_t>(new_value), is_volatile, this_is_valid);
+    SetField32(field_offset, reinterpret_cast<int32_t>(new_value), is_volatile, this_is_valid);
 #else
-    SetField64(field_offset, reinterpret_cast<uint64_t>(new_value), is_volatile, this_is_valid);
+    SetField64(field_offset, reinterpret_cast<int64_t>(new_value), is_volatile, this_is_valid);
 #endif
   }
 
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index 2af32da..db9723b 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -236,12 +236,12 @@
   SirtRef<Class> c(soa.Self(), class_linker_->FindSystemClass("I"));
   SirtRef<IntArray> dims(soa.Self(), IntArray::Alloc(soa.Self(), 1));
   dims->Set(0, 1);
-  Array* multi = Array::CreateMultiArray(soa.Self(), c.get(), dims.get());
+  Array* multi = Array::CreateMultiArray(soa.Self(), c, dims);
   EXPECT_TRUE(multi->GetClass() == class_linker_->FindSystemClass("[I"));
   EXPECT_EQ(1, multi->GetLength());
 
   dims->Set(0, -1);
-  multi = Array::CreateMultiArray(soa.Self(), c.get(), dims.get());
+  multi = Array::CreateMultiArray(soa.Self(), c, dims);
   EXPECT_TRUE(soa.Self()->IsExceptionPending());
   EXPECT_EQ(PrettyDescriptor(soa.Self()->GetException(NULL)->GetClass()),
             "java.lang.NegativeArraySizeException");
@@ -252,7 +252,7 @@
     for (int j = 0; j < 20; ++j) {
       dims->Set(0, i);
       dims->Set(1, j);
-      multi = Array::CreateMultiArray(soa.Self(), c.get(), dims.get());
+      multi = Array::CreateMultiArray(soa.Self(), c, dims);
       EXPECT_TRUE(multi->GetClass() == class_linker_->FindSystemClass("[[I"));
       EXPECT_EQ(i, multi->GetLength());
       for (int k = 0; k < i; ++k) {
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index a82b26c..406c5a3 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -112,7 +112,7 @@
 
  private:
   void SetHashCode(int32_t new_hash_code) {
-    DCHECK_EQ(0u, GetField32(OFFSET_OF_OBJECT_MEMBER(String, hash_code_), false));
+    DCHECK_EQ(0, GetField32(OFFSET_OF_OBJECT_MEMBER(String, hash_code_), false));
     SetField32(OFFSET_OF_OBJECT_MEMBER(String, hash_code_), new_hash_code, false);
   }
 
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index dceea5c..d9baaaf 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -233,14 +233,19 @@
 static jlong VMDebug_countInstancesOfClass(JNIEnv* env, jclass, jclass javaClass,
                                            jboolean countAssignable) {
   ScopedObjectAccess soa(env);
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  // We only want reachable instances, so do a GC. This also ensures that the alloc stack
+  // is empty, so the live bitmap is the only place we need to look. Need to do GC before decoding
+  // any jobjects.
+  heap->CollectGarbage(false);
   mirror::Class* c = soa.Decode<mirror::Class*>(javaClass);
-  if (c == NULL) {
+  if (c == nullptr) {
     return 0;
   }
   std::vector<mirror::Class*> classes;
   classes.push_back(c);
   uint64_t count = 0;
-  Runtime::Current()->GetHeap()->CountInstances(classes, countAssignable, &count);
+  heap->CountInstances(classes, countAssignable, &count);
   return count;
 }
 
diff --git a/runtime/native/java_lang_reflect_Array.cc b/runtime/native/java_lang_reflect_Array.cc
index 52cdb59..2197597 100644
--- a/runtime/native/java_lang_reflect_Array.cc
+++ b/runtime/native/java_lang_reflect_Array.cc
@@ -29,14 +29,16 @@
 static jobject Array_createMultiArray(JNIEnv* env, jclass, jclass javaElementClass, jobject javaDimArray) {
   ScopedFastNativeObjectAccess soa(env);
   DCHECK(javaElementClass != NULL);
-  mirror::Class* element_class = soa.Decode<mirror::Class*>(javaElementClass);
+  SirtRef<mirror::Class> element_class(soa.Self(), soa.Decode<mirror::Class*>(javaElementClass));
   DCHECK(element_class->IsClass());
   DCHECK(javaDimArray != NULL);
   mirror::Object* dimensions_obj = soa.Decode<mirror::Object*>(javaDimArray);
   DCHECK(dimensions_obj->IsArrayInstance());
   DCHECK_STREQ(ClassHelper(dimensions_obj->GetClass()).GetDescriptor(), "[I");
-  mirror::IntArray* dimensions_array = down_cast<mirror::IntArray*>(dimensions_obj);
-  mirror::Array* new_array = mirror::Array::CreateMultiArray(soa.Self(), element_class, dimensions_array);
+  SirtRef<mirror::IntArray> dimensions_array(soa.Self(),
+                                             down_cast<mirror::IntArray*>(dimensions_obj));
+  mirror::Array* new_array = mirror::Array::CreateMultiArray(soa.Self(), element_class,
+                                                             dimensions_array);
   return soa.AddLocalReference<jobject>(new_array);
 }
 
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 0f380ad..00a8506 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -226,7 +226,7 @@
   }
 
   for (size_t i = 0; i < GetOatHeader().GetDexFileCount(); i++) {
-    size_t dex_file_location_size = *reinterpret_cast<const uint32_t*>(oat);
+    uint32_t dex_file_location_size = *reinterpret_cast<const uint32_t*>(oat);
     if (UNLIKELY(dex_file_location_size == 0U)) {
       *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd with empty location name",
                                 GetLocation().c_str(), i);
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 4e90478..09d05d1 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -379,6 +379,7 @@
   GetInternTable()->SweepInternTableWeaks(visitor, arg);
   GetMonitorList()->SweepMonitorList(visitor, arg);
   GetJavaVM()->SweepJniWeakGlobals(visitor, arg);
+  Dbg::UpdateObjectPointers(visitor, arg);
 }
 
 static gc::CollectorType ParseCollectorType(const std::string& option) {
@@ -429,6 +430,8 @@
   parsed->use_tlab_ = false;
   parsed->verify_pre_gc_heap_ = false;
   parsed->verify_post_gc_heap_ = kIsDebugBuild;
+  parsed->verify_pre_gc_rosalloc_ = kIsDebugBuild;
+  parsed->verify_post_gc_rosalloc_ = false;
 
   parsed->compiler_callbacks_ = nullptr;
   parsed->is_zygote_ = false;
@@ -615,12 +618,20 @@
           parsed->collector_type_ = collector_type;
         } else if (gc_option == "preverify") {
           parsed->verify_pre_gc_heap_ = true;
-        }  else if (gc_option == "nopreverify") {
+        } else if (gc_option == "nopreverify") {
           parsed->verify_pre_gc_heap_ = false;
         }  else if (gc_option == "postverify") {
           parsed->verify_post_gc_heap_ = true;
         } else if (gc_option == "nopostverify") {
           parsed->verify_post_gc_heap_ = false;
+        } else if (gc_option == "preverify_rosalloc") {
+          parsed->verify_pre_gc_rosalloc_ = true;
+        } else if (gc_option == "nopreverify_rosalloc") {
+          parsed->verify_pre_gc_rosalloc_ = false;
+        } else if (gc_option == "postverify_rosalloc") {
+          parsed->verify_post_gc_rosalloc_ = true;
+        } else if (gc_option == "nopostverify_rosalloc") {
+          parsed->verify_post_gc_rosalloc_ = false;
         } else {
           LOG(WARNING) << "Ignoring unknown -Xgc option: " << gc_option;
         }
@@ -1018,7 +1029,9 @@
                        options->ignore_max_footprint_,
                        options->use_tlab_,
                        options->verify_pre_gc_heap_,
-                       options->verify_post_gc_heap_);
+                       options->verify_post_gc_heap_,
+                       options->verify_pre_gc_rosalloc_,
+                       options->verify_post_gc_rosalloc_);
 
   dump_gc_performance_on_shutdown_ = options->dump_gc_performance_on_shutdown_;
 
@@ -1483,12 +1496,14 @@
   monitor_list_->DisallowNewMonitors();
   intern_table_->DisallowNewInterns();
   java_vm_->DisallowNewWeakGlobals();
+  Dbg::DisallowNewObjectRegistryObjects();
 }
 
 void Runtime::AllowNewSystemWeaks() {
   monitor_list_->AllowNewMonitors();
   intern_table_->AllowNewInterns();
   java_vm_->AllowNewWeakGlobals();
+  Dbg::AllowNewObjectRegistryObjects();
 }
 
 void Runtime::SetCalleeSaveMethod(mirror::ArtMethod* method, CalleeSaveType type) {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 557ba2c..896a18b 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -109,6 +109,8 @@
     bool use_tlab_;
     bool verify_pre_gc_heap_;
     bool verify_post_gc_heap_;
+    bool verify_pre_gc_rosalloc_;
+    bool verify_post_gc_rosalloc_;
     size_t long_pause_log_threshold_;
     size_t long_gc_log_threshold_;
     bool dump_gc_performance_on_shutdown_;
diff --git a/test/051-thread/expected.txt b/test/051-thread/expected.txt
index 8e6b153..7139b7f 100644
--- a/test/051-thread/expected.txt
+++ b/test/051-thread/expected.txt
@@ -1,8 +1,9 @@
-Initializing System.out...
-Thread count: 512
-Starting thread 'Thready'
-@ Thread running
-@ Got expected setDaemon exception
-@ Thread bailing
-Thread starter returning
+thread test starting
+testThreadCapacity thread count: 512
+testThreadDaemons starting thread 'TestDaemonThread'
+testThreadDaemons @ Thread running
+testThreadDaemons @ Got expected setDaemon exception
+testThreadDaemons @ Thread bailing
+testThreadDaemons finished
+testSleepZero finished
 thread test done
diff --git a/test/051-thread/src/Main.java b/test/051-thread/src/Main.java
index 911c739..608b7e0 100644
--- a/test/051-thread/src/Main.java
+++ b/test/051-thread/src/Main.java
@@ -21,50 +21,36 @@
  */
 public class Main {
     public static void main(String[] args) throws Exception {
-        System.out.println("Initializing System.out...");
-
-        MyThread[] threads = new MyThread[512];
-        for (int i = 0; i < 512; i++) {
-            threads[i] = new MyThread();
-        }
-
-        for (MyThread thread : threads) {
-            thread.start();
-        }
-        for (MyThread thread : threads) {
-            thread.join();
-        }
-
-        System.out.println("Thread count: " + MyThread.mCount);
-
-        go();
+        System.out.println("thread test starting");
+        testThreadCapacity();
+        testThreadDaemons();
+        testSleepZero();
         System.out.println("thread test done");
     }
 
-    public static void go() {
-        Thread t = new Thread(null, new ThreadTestSub(), "Thready", 7168);
-
-        t.setDaemon(false);
-
-        System.out.print("Starting thread '" + t.getName() + "'\n");
-        t.start();
-
-        try {
-            t.join();
-        } catch (InterruptedException ex) {
-            ex.printStackTrace();
-        }
-
-        System.out.print("Thread starter returning\n");
-    }
-
     /*
      * Simple thread capacity test.
      */
-    static class MyThread extends Thread {
+    private static void testThreadCapacity() throws Exception {
+        TestCapacityThread[] threads = new TestCapacityThread[512];
+        for (int i = 0; i < 512; i++) {
+            threads[i] = new TestCapacityThread();
+        }
+
+        for (TestCapacityThread thread : threads) {
+            thread.start();
+        }
+        for (TestCapacityThread thread : threads) {
+            thread.join();
+        }
+
+        System.out.println("testThreadCapacity thread count: " + TestCapacityThread.mCount);
+    }
+
+    private static class TestCapacityThread extends Thread {
         static int mCount = 0;
         public void run() {
-            synchronized (MyThread.class) {
+            synchronized (TestCapacityThread.class) {
                 ++mCount;
             }
             try {
@@ -73,29 +59,57 @@
             }
         }
     }
-}
 
-class ThreadTestSub implements Runnable {
-    public void run() {
-        System.out.print("@ Thread running\n");
+    private static void testThreadDaemons() {
+        Thread t = new Thread(null, new TestDaemonThread(), "TestDaemonThread", 7168);
+
+        t.setDaemon(false);
+
+        System.out.print("testThreadDaemons starting thread '" + t.getName() + "'\n");
+        t.start();
 
         try {
-            Thread.currentThread().setDaemon(true);
-            System.out.print("@ FAILED: setDaemon() succeeded\n");
-        } catch (IllegalThreadStateException itse) {
-            System.out.print("@ Got expected setDaemon exception\n");
+            t.join();
+        } catch (InterruptedException ex) {
+            ex.printStackTrace();
         }
 
-        //if (true)
-        //    throw new NullPointerException();
+        System.out.print("testThreadDaemons finished\n");
+    }
+
+    private static class TestDaemonThread implements Runnable {
+        public void run() {
+            System.out.print("testThreadDaemons @ Thread running\n");
+
+            try {
+                Thread.currentThread().setDaemon(true);
+                System.out.print("testThreadDaemons @ FAILED: setDaemon() succeeded\n");
+            } catch (IllegalThreadStateException itse) {
+                System.out.print("testThreadDaemons @ Got expected setDaemon exception\n");
+            }
+
+            try {
+                Thread.sleep(2000);
+            }
+            catch (InterruptedException ie) {
+                System.out.print("testThreadDaemons @ Interrupted!\n");
+            }
+            finally {
+                System.out.print("testThreadDaemons @ Thread bailing\n");
+            }
+        }
+    }
+
+    private static void testSleepZero() throws Exception {
+        Thread.currentThread().interrupt();
         try {
-            Thread.sleep(2000);
+            Thread.sleep(0);
+            throw new AssertionError("unreachable");
+        } catch (InterruptedException e) {
+            if (Thread.currentThread().isInterrupted()) {
+                throw new AssertionError("thread is interrupted");
+            }
         }
-        catch (InterruptedException ie) {
-            System.out.print("@ Interrupted!\n");
-        }
-        finally {
-            System.out.print("@ Thread bailing\n");
-        }
+        System.out.print("testSleepZero finished\n");
     }
 }
diff --git a/test/083-compiler-regressions/src/Main.java b/test/083-compiler-regressions/src/Main.java
index a532141..3307e50 100644
--- a/test/083-compiler-regressions/src/Main.java
+++ b/test/083-compiler-regressions/src/Main.java
@@ -61,19 +61,19 @@
 
     static void wideIdentityTest() {
         Foo foo = new Foo();
-        long i = 1;
+        long i = 0x200000001L;
         i += foo.wideIdent0(i);
         i += foo.wideIdent1(0,i);
         i += foo.wideIdent2(0,0,i);
         i += foo.wideIdent3(0,0,0,i);
         i += foo.wideIdent4(0,0,0,0,i);
         i += foo.wideIdent5(0,0,0,0,0,i);
-        if (i == 64) {
+        if (i == 0x8000000040L) {
             System.out.println("wideIdentityTest passes");
         }
         else {
-            System.out.println("wideIdentityTest fails: " + i +
-                               " (expecting 64)");
+            System.out.println("wideIdentityTest fails: 0x" + Long.toHexString(i) +
+                               " (expecting 0x8000000040)");
         }
     }
 
@@ -90,12 +90,25 @@
         foo.wideSetBar4(0,0,0,sum);
         sum += foo.wideGetBar5(1,2,3,4,5);
         foo.wideSetBar5(0,0,0,0,sum);
-        if (foo.wideGetBar0() == 39488) {
+        long result1 = foo.wideGetBar0();
+        long expected1 = 1234L << 5;
+        sum += foo.wideGetBar0();
+        foo.wideSetBar2i(0,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar3i(0,0,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar4i(0,0,0,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar5i(0,0,0,0,sum);
+        long result2 = foo.wideGetBar0();
+        long expected2 = 1234L << 9;
+        if (result1 == expected1 && result2 == expected2) {
             System.out.println("wideGetterSetterTest passes");
         }
         else {
             System.out.println("wideGetterSetterTest fails: " +
-                                foo.wideGetBar0() + " (expecting 39488)");
+                                "result1: " + result1 + " (expecting " + expected1 + "), " +
+                                "result2: " + result2 + " (expecting " + expected2 + ")");
         }
     }
 
@@ -8374,6 +8387,18 @@
     public void wideSetBar5(long a1, long a2, long a3, long a4, long a5) {
         lbar = a5;
     }
+    public void wideSetBar2i(int a1, long a2) {
+      lbar = a2;
+    }
+    public void wideSetBar3i(int a1, int a2, long a3) {
+        lbar = a3;
+    }
+    public void wideSetBar4i(int a1, int a2, int a3, long a4) {
+        lbar = a4;
+    }
+    public void wideSetBar5i(int a1, int a2, int a3, int a4, long a5) {
+        lbar = a5;
+    }
     public long wideGetBar0() {
         return lbar;
     }