Merge "Speed up method lookup in a different dex file."
diff --git a/Android.mk b/Android.mk
index 62d40bb..8e43879 100644
--- a/Android.mk
+++ b/Android.mk
@@ -92,6 +92,7 @@
 include $(art_path)/disassembler/Android.mk
 include $(art_path)/oatdump/Android.mk
 include $(art_path)/dalvikvm/Android.mk
+include $(art_path)/tools/Android.mk
 include $(art_build_path)/Android.oat.mk
 
 # ART_HOST_DEPENDENCIES depends on Android.executable.mk above for ART_HOST_EXECUTABLES
@@ -300,6 +301,12 @@
 .PHONY: build-art-target
 build-art-target: $(ART_TARGET_EXECUTABLES) $(ART_TARGET_TEST_EXECUTABLES) $(TARGET_CORE_IMG_OUT) $(TARGET_OUT)/lib/libjavacore.so
 
+.PHONY: art-host
+art-host:   $(HOST_OUT_EXECUTABLES)/art $(HOST_OUT)/bin/dalvikvm $(HOST_OUT)/lib/libart.so $(HOST_OUT)/bin/dex2oat $(HOST_OUT_JAVA_LIBRARIES)/core.art $(HOST_OUT)/lib/libjavacore.so
+
+.PHONY: art-host-debug
+art-host-debug:   art-host $(HOST_OUT)/lib/libartd.so $(HOST_OUT)/bin/dex2oatd
+
 ########################################################################
 # oatdump targets
 
diff --git a/build/Android.libarttest.mk b/build/Android.libarttest.mk
index f946d91..65b78c9 100644
--- a/build/Android.libarttest.mk
+++ b/build/Android.libarttest.mk
@@ -17,7 +17,8 @@
 LIBARTTEST_COMMON_SRC_FILES := \
 	test/JniTest/jni_test.cc \
 	test/ReferenceMap/stack_walk_refmap_jni.cc \
-	test/StackWalk/stack_walk_jni.cc
+	test/StackWalk/stack_walk_jni.cc \
+	test/UnsafeTest/unsafe_test.cc
 
 # $(1): target or host
 define build-libarttest
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index fdc609a..a30e80a 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -24,70 +24,62 @@
 
 namespace art {
 
+// TODO: generalize & move to RegUtil.cc
+// The number of dalvik registers passed in core registers.
+constexpr int kInArgsInCoreRegs = 3;
+// The core register corresponding to the first (index 0) input argument.
+constexpr int kInArg0CoreReg = r1;  // r0 is Method*.
+// Offset, in words, for getting args from stack (even core reg args have space on stack).
+constexpr int kInArgToStackOffset = 1;
 
-/* Return the position of an ssa name within the argument list */
-int ArmMir2Lir::InPosition(int s_reg) {
-  int v_reg = mir_graph_->SRegToVReg(s_reg);
-  return v_reg - cu_->num_regs;
+/* Lock argument if it's in register. */
+void ArmMir2Lir::LockArg(int in_position, bool wide) {
+  if (in_position < kInArgsInCoreRegs) {
+    LockTemp(kInArg0CoreReg + in_position);
+  }
+  if (wide && in_position + 1 < kInArgsInCoreRegs) {
+    LockTemp(kInArg0CoreReg + in_position + 1);
+  }
 }
 
-/*
- * Describe an argument.  If it's already in an arg register, just leave it
- * there.  NOTE: all live arg registers must be locked prior to this call
- * to avoid having them allocated as a temp by downstream utilities.
- */
-RegLocation ArmMir2Lir::ArgLoc(RegLocation loc) {
-  int arg_num = InPosition(loc.s_reg_low);
-  if (loc.wide) {
-    if (arg_num == 2) {
-      // Bad case - half in register, half in frame.  Just punt
-      loc.location = kLocInvalid;
-    } else if (arg_num < 2) {
-      loc.low_reg = rARM_ARG1 + arg_num;
-      loc.high_reg = loc.low_reg + 1;
-      loc.location = kLocPhysReg;
+/* Load argument into register. LockArg(in_position, wide) must have been previously called. */
+int ArmMir2Lir::LoadArg(int in_position, bool wide) {
+  if (in_position < kInArgsInCoreRegs) {
+    int low_reg = kInArg0CoreReg + in_position;
+    if (!wide) {
+      return low_reg;
+    }
+    int high_reg = (in_position != kInArgsInCoreRegs - 1) ? low_reg + 1 : LoadArg(in_position + 1);
+    return (low_reg & 0xff) | ((high_reg & 0xff) << 8);
+  }
+  int low_reg = AllocTemp();
+  int offset = (in_position + kInArgToStackOffset) * sizeof(uint32_t);
+  if (!wide) {
+    LoadWordDisp(rARM_SP, offset, low_reg);
+    return low_reg;
+  }
+  int high_reg = AllocTemp();
+  LoadBaseDispWide(rARM_SP, offset, low_reg, high_reg, INVALID_SREG);
+  return (low_reg & 0xff) | ((high_reg & 0xff) << 8);
+}
+
+void ArmMir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
+  int reg = kInArg0CoreReg + in_position;
+  int offset = (in_position + kInArgToStackOffset) * sizeof(uint32_t);
+  if (!rl_dest.wide) {
+    if (in_position < kInArgsInCoreRegs) {
+      OpRegCopy(rl_dest.low_reg, reg);
     } else {
-      loc.location = kLocDalvikFrame;
+      LoadWordDisp(rARM_SP, offset, rl_dest.low_reg);
     }
   } else {
-    if (arg_num < 3) {
-      loc.low_reg = rARM_ARG1 + arg_num;
-      loc.location = kLocPhysReg;
+    if (in_position < kInArgsInCoreRegs - 1) {
+      OpRegCopyWide(rl_dest.low_reg, rl_dest.high_reg, reg, reg + 1);
+    } else if (in_position == kInArgsInCoreRegs - 1) {
+      OpRegCopy(rl_dest.low_reg, reg);
+      LoadWordDisp(rARM_SP, offset + sizeof(uint32_t), rl_dest.high_reg);
     } else {
-      loc.location = kLocDalvikFrame;
-    }
-  }
-  return loc;
-}
-
-/*
- * Load an argument.  If already in a register, just return.  If in
- * the frame, we can't use the normal LoadValue() because it assumed
- * a proper frame - and we're frameless.
- */
-RegLocation ArmMir2Lir::LoadArg(RegLocation loc) {
-  if (loc.location == kLocDalvikFrame) {
-    int start = (InPosition(loc.s_reg_low) + 1) * sizeof(uint32_t);
-    loc.low_reg = AllocTemp();
-    LoadWordDisp(rARM_SP, start, loc.low_reg);
-    if (loc.wide) {
-      loc.high_reg = AllocTemp();
-      LoadWordDisp(rARM_SP, start + sizeof(uint32_t), loc.high_reg);
-    }
-    loc.location = kLocPhysReg;
-  }
-  return loc;
-}
-
-/* Lock any referenced arguments that arrive in registers */
-void ArmMir2Lir::LockLiveArgs(MIR* mir) {
-  int first_in = cu_->num_regs;
-  const int num_arg_regs = 3;  // TODO: generalize & move to RegUtil.cc
-  for (int i = 0; i < mir->ssa_rep->num_uses; i++) {
-    int v_reg = mir_graph_->SRegToVReg(mir->ssa_rep->uses[i]);
-    int InPosition = v_reg - first_in;
-    if (InPosition < num_arg_regs) {
-      LockTemp(rARM_ARG1 + InPosition);
+      LoadBaseDispWide(rARM_SP, offset, rl_dest.low_reg, rl_dest.high_reg, INVALID_SREG);
     }
   }
 }
@@ -134,26 +126,22 @@
     return NULL;  // The object is not "this" and has to be null-checked.
   }
 
-  OpSize size = static_cast<OpSize>(data.op_size);
   DCHECK_NE(data.op_size, kDouble);  // The inliner doesn't distinguish kDouble, uses kLong.
-  bool long_or_double = (data.op_size == kLong);
-  bool is_object = data.is_object;
+  bool wide = (data.op_size == kLong);
 
-  // TODO: Generate the method using only the data in special.
-  RegLocation rl_obj = mir_graph_->GetSrc(mir, 0);
-  LockLiveArgs(mir);
-  rl_obj = ArmMir2Lir::ArgLoc(rl_obj);
-  RegLocation rl_dest;
-  if (long_or_double) {
-    rl_dest = GetReturnWide(false);
-  } else {
-    rl_dest = GetReturn(false);
-  }
   // Point of no return - no aborts after this
   ArmMir2Lir::GenPrintLabel(mir);
-  rl_obj = LoadArg(rl_obj);
-  uint32_t field_idx = mir->dalvikInsn.vC;
-  GenIGet(field_idx, mir->optimization_flags, size, rl_dest, rl_obj, long_or_double, is_object);
+  LockArg(data.object_arg);
+  RegLocation rl_dest = wide ? GetReturnWide(false) : GetReturn(false);
+  int reg_obj = LoadArg(data.object_arg);
+  if (wide) {
+    LoadBaseDispWide(reg_obj, data.field_offset, rl_dest.low_reg, rl_dest.high_reg, INVALID_SREG);
+  } else {
+    LoadBaseDisp(reg_obj, data.field_offset, rl_dest.low_reg, kWord, INVALID_SREG);
+  }
+  if (data.is_volatile) {
+    GenMemBarrier(kLoadLoad);
+  }
   return GetNextMir(bb, mir);
 }
 
@@ -164,63 +152,42 @@
     return NULL;  // The object is not "this" and has to be null-checked.
   }
 
-  OpSize size = static_cast<OpSize>(data.op_size);
   DCHECK_NE(data.op_size, kDouble);  // The inliner doesn't distinguish kDouble, uses kLong.
-  bool long_or_double = (data.op_size == kLong);
-  bool is_object = data.is_object;
+  bool wide = (data.op_size == kLong);
 
-  // TODO: Generate the method using only the data in special.
-  RegLocation rl_src;
-  RegLocation rl_obj;
-  LockLiveArgs(mir);
-  if (long_or_double) {
-    rl_src = mir_graph_->GetSrcWide(mir, 0);
-    rl_obj = mir_graph_->GetSrc(mir, 2);
-  } else {
-    rl_src = mir_graph_->GetSrc(mir, 0);
-    rl_obj = mir_graph_->GetSrc(mir, 1);
-  }
-  rl_src = ArmMir2Lir::ArgLoc(rl_src);
-  rl_obj = ArmMir2Lir::ArgLoc(rl_obj);
-  // Reject if source is split across registers & frame
-  if (rl_src.location == kLocInvalid) {
-    ResetRegPool();
-    return NULL;
-  }
   // Point of no return - no aborts after this
   ArmMir2Lir::GenPrintLabel(mir);
-  rl_obj = LoadArg(rl_obj);
-  rl_src = LoadArg(rl_src);
-  uint32_t field_idx = mir->dalvikInsn.vC;
-  GenIPut(field_idx, mir->optimization_flags, size, rl_src, rl_obj, long_or_double, is_object);
+  LockArg(data.object_arg);
+  LockArg(data.src_arg, wide);
+  int reg_obj = LoadArg(data.object_arg);
+  int reg_src = LoadArg(data.src_arg, wide);
+  if (data.is_volatile) {
+    GenMemBarrier(kStoreStore);
+  }
+  if (wide) {
+    StoreBaseDispWide(reg_obj, data.field_offset, reg_src & 0xff, reg_src >> 8);
+  } else {
+    StoreBaseDisp(reg_obj, data.field_offset, reg_src, kWord);
+  }
+  if (data.is_volatile) {
+    GenMemBarrier(kLoadLoad);
+  }
+  if (data.is_object) {
+    MarkGCCard(reg_src, reg_obj);
+  }
   return GetNextMir(bb, mir);
 }
 
-MIR* ArmMir2Lir::SpecialIdentity(MIR* mir) {
-  RegLocation rl_src;
-  RegLocation rl_dest;
-  bool wide = (mir->ssa_rep->num_uses == 2);
-  if (wide) {
-    rl_src = mir_graph_->GetSrcWide(mir, 0);
-    rl_dest = GetReturnWide(false);
-  } else {
-    rl_src = mir_graph_->GetSrc(mir, 0);
-    rl_dest = GetReturn(false);
-  }
-  LockLiveArgs(mir);
-  rl_src = ArmMir2Lir::ArgLoc(rl_src);
-  if (rl_src.location == kLocInvalid) {
-    ResetRegPool();
-    return NULL;
-  }
+MIR* ArmMir2Lir::SpecialIdentity(MIR* mir, const InlineMethod& special) {
+  const InlineReturnArgData& data = special.d.return_data;
+  DCHECK_NE(data.op_size, kDouble);  // The inliner doesn't distinguish kDouble, uses kLong.
+  bool wide = (data.op_size == kLong);
+
   // Point of no return - no aborts after this
   ArmMir2Lir::GenPrintLabel(mir);
-  rl_src = LoadArg(rl_src);
-  if (wide) {
-    StoreValueWide(rl_dest, rl_src);
-  } else {
-    StoreValue(rl_dest, rl_src);
-  }
+  LockArg(data.arg, wide);
+  RegLocation rl_dest = wide ? GetReturnWide(false) : GetReturn(false);
+  LoadArgDirect(data.arg, rl_dest);
   return mir;
 }
 
@@ -249,8 +216,7 @@
       next_mir = SpecialIPut(&bb, mir, special);
       break;
     case kInlineOpReturnArg:
-      // TODO: Generate the method using only the data in special.
-      next_mir = SpecialIdentity(mir);
+      next_mir = SpecialIdentity(mir, special);
       break;
     default:
       return;
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 598da89..7ee241c 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -167,7 +167,6 @@
     void OpRegCopyWide(int dest_lo, int dest_hi, int src_lo, int src_hi);
     void OpTlsCmp(ThreadOffset offset, int val);
 
-    RegLocation ArgLoc(RegLocation loc);
     LIR* LoadBaseDispBody(int rBase, int displacement, int r_dest, int r_dest_hi, OpSize size,
                           int s_reg);
     LIR* StoreBaseDispBody(int rBase, int displacement, int r_src, int r_src_hi, OpSize size);
@@ -186,13 +185,13 @@
   private:
     void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val,
                                   ConditionCode ccode);
-    int InPosition(int s_reg);
-    RegLocation LoadArg(RegLocation loc);
-    void LockLiveArgs(MIR* mir);
+    void LockArg(int in_position, bool wide = false);
+    int LoadArg(int in_position, bool wide = false);
+    void LoadArgDirect(int in_position, RegLocation rl_dest);
     MIR* GetNextMir(BasicBlock** p_bb, MIR* mir);
     MIR* SpecialIGet(BasicBlock** bb, MIR* mir, const InlineMethod& special);
     MIR* SpecialIPut(BasicBlock** bb, MIR* mir, const InlineMethod& special);
-    MIR* SpecialIdentity(MIR* mir);
+    MIR* SpecialIdentity(MIR* mir, const InlineMethod& special);
     LIR* LoadFPConstantValue(int r_dest, int value);
     void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
     void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 150794e..43928fc 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -357,6 +357,7 @@
     } else {
       // Handle overlap
       if (src_hi == dest_lo) {
+        DCHECK_NE(src_lo, dest_hi);
         OpRegCopy(dest_hi, src_hi);
         OpRegCopy(dest_lo, src_lo);
       } else {
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index c49f627..8c385a1 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -22,54 +22,53 @@
 namespace art {
 
 /*
- * Runtime register conventions. We consider both x86, x86-64 and x32 (32bit mode x86-64), although
- * we currently only target x86. The ABI has different conventions and we hope to have a single
- * convention to simplify code generation. Changing something that is callee save and making it
- * caller save places a burden on up-calls to save/restore the callee save register, however, there
- * are few registers that are callee save in the ABI. Changing something that is caller save and
- * making it callee save places a burden on down-calls to save/restore the callee save register.
- * For these reasons we aim to match native conventions for caller and callee save. The first 4
- * registers can be used for byte operations, for this reason they are preferred for temporary
- * scratch registers.
+ * Runtime register conventions. We consider both x86, x86-64 and x32 (32bit mode x86-64). The ABI
+ * has different conventions and we capture those here. Changing something that is callee save and
+ * making it caller save places a burden on up-calls to save/restore the callee save register,
+ * however, there are few registers that are callee save in the ABI. Changing something that is
+ * caller save and making it callee save places a burden on down-calls to save/restore the callee
+ * save register. For these reasons we aim to match native conventions for caller and callee save.
+ * On x86 only the first 4 registers can be used for byte operations, for this reason they are
+ * preferred for temporary scratch registers.
  *
  * General Purpose Register:
- *  Native: x86         | x86-64 / x32      | ART
- *  r0/eax: caller save | caller save       | caller, Method*, scratch, return value
- *  r1/ecx: caller save | caller save, arg4 | caller, arg1, scratch
- *  r2/edx: caller save | caller save, arg3 | caller, arg2, scratch, high half of long return
- *  r3/ebx: callEE save | callEE save       | callER, arg3, scratch
+ *  Native: x86    | x86-64 / x32 | ART x86                                         | ART x86-64
+ *  r0/eax: caller | caller       | caller, Method*, scratch, return value          | caller, scratch, return value
+ *  r1/ecx: caller | caller, arg4 | caller, arg1, scratch                           | caller, arg3, scratch
+ *  r2/edx: caller | caller, arg3 | caller, arg2, scratch, high half of long return | caller, arg2, scratch
+ *  r3/ebx: callEE | callEE       | callER, arg3, scratch                           | callee, promotable
  *  r4/esp: stack pointer
- *  r5/ebp: callee save | callee save       | callee, available for dalvik register promotion
- *  r6/esi: callEE save | callER save, arg2 | callee, available for dalvik register promotion
- *  r7/edi: callEE save | callER save, arg1 | callee, available for dalvik register promotion
+ *  r5/ebp: callee | callee       | callee, promotable                              | callee, promotable
+ *  r6/esi: callEE | callER, arg2 | callee, promotable                              | caller, arg1, scratch
+ *  r7/edi: callEE | callER, arg1 | callee, promotable                              | caller, Method*, scratch
  *  ---  x86-64/x32 registers
  *  Native: x86-64 / x32      | ART
- *  r8:     caller save, arg5 | caller, scratch
- *  r9:     caller save, arg6 | caller, scratch
+ *  r8:     caller save, arg5 | caller, arg4, scratch
+ *  r9:     caller save, arg6 | caller, arg5, scratch
  *  r10:    caller save       | caller, scratch
  *  r11:    caller save       | caller, scratch
- *  r12:    callee save       | callee, available for dalvik register promotion
- *  r13:    callee save       | callee, available for dalvik register promotion
- *  r14:    callee save       | callee, available for dalvik register promotion
- *  r15:    callee save       | callee, available for dalvik register promotion
+ *  r12:    callee save       | callee, available for register promotion (promotable)
+ *  r13:    callee save       | callee, available for register promotion (promotable)
+ *  r14:    callee save       | callee, available for register promotion (promotable)
+ *  r15:    callee save       | callee, available for register promotion (promotable)
  *
  * There is no rSELF, instead on x86 fs: has a base address of Thread::Current, whereas on
  * x86-64/x32 gs: holds it.
  *
  * For floating point we don't support CPUs without SSE2 support (ie newer than PIII):
- *  Native: x86       | x86-64 / x32     | ART
- *  XMM0: caller save |caller save, arg1 | caller, float/double return value (except for native x86 code)
- *  XMM1: caller save |caller save, arg2 | caller, scratch
- *  XMM2: caller save |caller save, arg3 | caller, scratch
- *  XMM3: caller save |caller save, arg4 | caller, scratch
- *  XMM4: caller save |caller save, arg5 | caller, scratch
- *  XMM5: caller save |caller save, arg6 | caller, scratch
- *  XMM6: caller save |caller save, arg7 | caller, scratch
- *  XMM7: caller save |caller save, arg8 | caller, scratch
+ *  Native: x86  | x86-64 / x32 | ART x86                    | ART x86-64
+ *  XMM0: caller | caller, arg1 | caller, float return value | caller, arg1, float return value
+ *  XMM1: caller | caller, arg2 | caller, scratch            | caller, arg2, scratch
+ *  XMM2: caller | caller, arg3 | caller, scratch            | caller, arg3, scratch
+ *  XMM3: caller | caller, arg4 | caller, scratch            | caller, arg4, scratch
+ *  XMM4: caller | caller, arg5 | caller, scratch            | caller, arg5, scratch
+ *  XMM5: caller | caller, arg6 | caller, scratch            | caller, arg6, scratch
+ *  XMM6: caller | caller, arg7 | caller, scratch            | caller, arg7, scratch
+ *  XMM7: caller | caller, arg8 | caller, scratch            | caller, arg8, scratch
  *  ---  x86-64/x32 registers
- *  XMM8 .. 15: caller save
+ *  XMM8 .. 15: caller save available as scratch registers for ART.
  *
- * X87 is a necessary evil outside of ART code:
+ * X87 is a necessary evil outside of ART code for x86:
  *  ST0:  x86 float/double native return value, caller save
  *  ST1 .. ST7: caller save
  *
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 0e5c60a..cf3f72e 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -274,7 +274,7 @@
      *   r2 = size of argument array in bytes
      *   r3 = (managed) thread pointer
      *   [sp] = JValue* result
-     *   [sp + 4] = result type char
+     *   [sp + 4] = shorty
      */
 ENTRY art_quick_invoke_stub
     push   {r0, r4, r5, r9, r11, lr}       @ spill regs
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index c60bca0..f9a200a 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -456,7 +456,7 @@
      *   a2 = size of argument array in bytes
      *   a3 = (managed) thread pointer
      *   [sp + 16] = JValue* result
-     *   [sp + 20] = result type char
+     *   [sp + 20] = shorty
      */
 ENTRY art_quick_invoke_stub
     GENERATE_GLOBAL_POINTER
@@ -502,7 +502,8 @@
     addiu $sp, $sp, 16
     .cfi_adjust_cfa_offset -16
     lw    $t0, 16($sp)          # get result pointer
-    lw    $t1, 20($sp)          # get result type char
+    lw    $t1, 20($sp)          # get shorty
+    lb    $t1, 0($t1)           # get result type char
     li    $t2, 68               # put char 'D' into t2
     beq   $t1, $t2, 1f          # branch if result type char == 'D'
     li    $t3, 70               # put char 'F' into t3
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 9c3eb30..c76c6b2 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -251,7 +251,7 @@
      *   [sp + 12] = size of argument array in bytes
      *   [sp + 16] = (managed) thread pointer
      *   [sp + 20] = JValue* result
-     *   [sp + 24] = result type char
+     *   [sp + 24] = shorty
      */
 DEFINE_FUNCTION art_quick_invoke_stub
     PUSH ebp                      // save ebp
@@ -281,17 +281,20 @@
     POP ebx                       // pop ebx
     POP ebp                       // pop ebp
     mov 20(%esp), %ecx            // get result pointer
-    cmpl LITERAL(68), 24(%esp)    // test if result type char == 'D'
-    je return_double_quick
-    cmpl LITERAL(70), 24(%esp)    // test if result type char == 'F'
-    je return_float_quick
-    mov %eax, (%ecx)              // store the result
+    mov %eax, (%ecx)              // store the result assuming its a long, int or Object*
     mov %edx, 4(%ecx)             // store the other half of the result
+    mov 24(%esp), %edx            // get the shorty
+    cmpb LITERAL(68), (%edx)      // test if result type char == 'D'
+    je return_double_quick
+    cmpb LITERAL(70), (%edx)      // test if result type char == 'F'
+    je return_float_quick
     ret
 return_double_quick:
-return_float_quick:
     movsd %xmm0, (%ecx)           // store the floating point result
     ret
+return_float_quick:
+    movss %xmm0, (%ecx)           // store the floating point result
+    ret
 END_FUNCTION art_quick_invoke_stub
 
 MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index e01a31b..44bc7a2 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -149,6 +149,14 @@
 
     /*
      * Quick invocation stub.
+     * On entry:
+     *   [sp] = return address
+     *   rdi = method pointer
+     *   rsi = argument array or NULL for no argument methods
+     *   rdx = size of argument array in bytes
+     *   rcx = (managed) thread pointer
+     *   r8 = JValue* result
+     *   r9 = char* shorty
      */
 DEFINE_FUNCTION art_quick_invoke_stub
     int3
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 978c99b..fac1e14 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1119,14 +1119,15 @@
 // Keep in sync with InitCallback. Anything we visit, we need to
 // reinit references to when reinitializing a ClassLinker from a
 // mapped image.
-void ClassLinker::VisitRoots(RootVisitor* visitor, void* arg, bool only_dirty, bool clean_dirty) {
-  class_roots_ = down_cast<mirror::ObjectArray<mirror::Class>*>(visitor(class_roots_, arg));
+void ClassLinker::VisitRoots(RootCallback* callback, void* arg, bool only_dirty, bool clean_dirty) {
+  class_roots_ = down_cast<mirror::ObjectArray<mirror::Class>*>(
+      callback(class_roots_, arg, 0, kRootVMInternal));
   Thread* self = Thread::Current();
   {
     ReaderMutexLock mu(self, dex_lock_);
     if (!only_dirty || dex_caches_dirty_) {
       for (mirror::DexCache*& dex_cache : dex_caches_) {
-        dex_cache = down_cast<mirror::DexCache*>(visitor(dex_cache, arg));
+        dex_cache = down_cast<mirror::DexCache*>(callback(dex_cache, arg, 0, kRootVMInternal));
         DCHECK(dex_cache != nullptr);
       }
       if (clean_dirty) {
@@ -1139,7 +1140,7 @@
     WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
     if (!only_dirty || class_table_dirty_) {
       for (std::pair<const size_t, mirror::Class*>& it : class_table_) {
-        it.second = down_cast<mirror::Class*>(visitor(it.second, arg));
+        it.second = down_cast<mirror::Class*>(callback(it.second, arg, 0, kRootStickyClass));
         DCHECK(it.second != nullptr);
       }
       if (clean_dirty) {
@@ -1151,7 +1152,8 @@
     // handle image roots by using the MS/CMS rescanning of dirty cards.
   }
 
-  array_iftable_ = reinterpret_cast<mirror::IfTable*>(visitor(array_iftable_, arg));
+  array_iftable_ = reinterpret_cast<mirror::IfTable*>(callback(array_iftable_, arg, 0,
+                                                               kRootVMInternal));
   DCHECK(array_iftable_ != nullptr);
 }
 
@@ -3152,7 +3154,7 @@
     CHECK(can_init_statics);
     if (LIKELY(Runtime::Current()->IsStarted())) {
       JValue result;
-      clinit->Invoke(self, NULL, 0, &result, 'V');
+      clinit->Invoke(self, NULL, 0, &result, "V");
     } else {
       art::interpreter::EnterInterpreterFromInvoke(self, clinit, NULL, NULL, NULL);
     }
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 7e31356..0745ee2 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -26,8 +26,8 @@
 #include "dex_file.h"
 #include "gtest/gtest.h"
 #include "jni.h"
-#include "root_visitor.h"
 #include "oat_file.h"
+#include "object_callbacks.h"
 
 namespace art {
 namespace gc {
@@ -235,7 +235,7 @@
       LOCKS_EXCLUDED(dex_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitRoots(RootVisitor* visitor, void* arg, bool only_dirty, bool clean_dirty)
+  void VisitRoots(RootCallback* callback, void* arg, bool only_dirty, bool clean_dirty)
       LOCKS_EXCLUDED(Locks::classlinker_classes_lock_, dex_lock_);
 
   mirror::DexCache* FindDexCache(const DexFile& dex_file) const
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index fb979c2..d9ef0c1 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -339,7 +339,7 @@
     }
   }
 
-  static mirror::Object* TestRootVisitor(mirror::Object* root, void*) {
+  static mirror::Object* TestRootVisitor(mirror::Object* root, void*, uint32_t, RootType) {
     EXPECT_TRUE(root != NULL);
     return root;
   }
diff --git a/runtime/common_test.h b/runtime/common_test.h
index af7e8ae..f7859ea 100644
--- a/runtime/common_test.h
+++ b/runtime/common_test.h
@@ -268,7 +268,7 @@
     MakeExecutable(&code[0], code.size());
   }
 
-  // Create an OatMethod based on pointers (for unit tests)
+  // Create an OatMethod based on pointers (for unit tests).
   OatFile::OatMethod CreateOatMethod(const void* code,
                                      const size_t frame_size_in_bytes,
                                      const uint32_t core_spill_mask,
@@ -276,11 +276,23 @@
                                      const uint8_t* mapping_table,
                                      const uint8_t* vmap_table,
                                      const uint8_t* gc_map) {
-    const byte* base = nullptr;  // Base of data in oat file, ie 0.
-    uint32_t code_offset = PointerToLowMemUInt32(code);
-    uint32_t mapping_table_offset = PointerToLowMemUInt32(mapping_table);
-    uint32_t vmap_table_offset = PointerToLowMemUInt32(vmap_table);
-    uint32_t gc_map_offset = PointerToLowMemUInt32(gc_map);
+    const byte* base;
+    uint32_t code_offset, mapping_table_offset, vmap_table_offset, gc_map_offset;
+    if (mapping_table == nullptr && vmap_table == nullptr && gc_map == nullptr) {
+      base = reinterpret_cast<const byte*>(code);  // Base of data points at code.
+      base -= kPointerSize;  // Move backward so that code_offset != 0.
+      code_offset = kPointerSize;
+      mapping_table_offset = 0;
+      vmap_table_offset = 0;
+      gc_map_offset = 0;
+    } else {
+      // TODO: 64bit support.
+      base = nullptr;  // Base of data in oat file, ie 0.
+      code_offset = PointerToLowMemUInt32(code);
+      mapping_table_offset = PointerToLowMemUInt32(mapping_table);
+      vmap_table_offset = PointerToLowMemUInt32(vmap_table);
+      gc_map_offset = PointerToLowMemUInt32(gc_map);
+    }
     return OatFile::OatMethod(base,
                               code_offset,
                               frame_size_in_bytes,
@@ -470,6 +482,8 @@
       instruction_set = kX86;
 #elif defined(__x86_64__)
       instruction_set = kX86_64;
+      // TODO: x86_64 compilation support.
+      runtime_->SetCompilerFilter(Runtime::kInterpretOnly);
 #endif
 
       for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 8280c7c..733e843 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -85,17 +85,17 @@
     return depth;
   }
 
-  void UpdateObjectPointers(RootVisitor* visitor, void* arg)
+  void UpdateObjectPointers(IsMarkedCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (type != nullptr) {
-      type = down_cast<mirror::Class*>(visitor(type, arg));
+      type = down_cast<mirror::Class*>(callback(type, arg));
     }
     for (size_t stack_frame = 0; stack_frame < kMaxAllocRecordStackDepth; ++stack_frame) {
       mirror::ArtMethod*& m = stack[stack_frame].method;
       if (m == nullptr) {
         break;
       }
-      m = down_cast<mirror::ArtMethod*>(visitor(m, arg));
+      m = down_cast<mirror::ArtMethod*>(callback(m, arg));
     }
   }
 };
@@ -3019,7 +3019,7 @@
   MethodHelper mh(m.get());
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
   arg_array.BuildArgArray(soa, pReq->receiver, reinterpret_cast<jvalue*>(pReq->arg_values));
-  InvokeWithArgArray(soa, m.get(), &arg_array, &pReq->result_value, mh.GetShorty()[0]);
+  InvokeWithArgArray(soa, m.get(), &arg_array, &pReq->result_value, mh.GetShorty());
 
   mirror::Throwable* exception = soa.Self()->GetException(NULL);
   soa.Self()->ClearException();
@@ -3793,7 +3793,7 @@
   }
 }
 
-void Dbg::UpdateObjectPointers(RootVisitor* visitor, void* arg) {
+void Dbg::UpdateObjectPointers(IsMarkedCallback* visitor, void* arg) {
   {
     MutexLock mu(Thread::Current(), gAllocTrackerLock);
     if (recent_allocation_records_ != nullptr) {
diff --git a/runtime/debugger.h b/runtime/debugger.h
index f1e3f45..5d269ee 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -29,7 +29,7 @@
 #include "jdwp/jdwp.h"
 #include "jni.h"
 #include "jvalue.h"
-#include "root_visitor.h"
+#include "object_callbacks.h"
 #include "thread_state.h"
 
 namespace art {
@@ -453,7 +453,7 @@
   static void DumpRecentAllocations();
 
   // Updates the stored direct object pointers (called from SweepSystemWeaks).
-  static void UpdateObjectPointers(RootVisitor* visitor, void* arg)
+  static void UpdateObjectPointers(IsMarkedCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   enum HpifWhen {
diff --git a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
index 8a2ce51..2067a45 100644
--- a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
+++ b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
@@ -48,11 +48,11 @@
   if (kUsePortableCompiler) {
     ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
     arg_array.BuildArgArrayFromFrame(shadow_frame, arg_offset);
-    method->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), result, mh.GetShorty()[0]);
+    method->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), result, mh.GetShorty());
   } else {
     method->Invoke(self, shadow_frame->GetVRegArgs(arg_offset),
                    (shadow_frame->NumberOfVRegs() - arg_offset) * sizeof(uint32_t),
-                   result, mh.GetShorty()[0]);
+                   result, mh.GetShorty());
   }
 }
 
diff --git a/runtime/entrypoints/jni/jni_entrypoints.cc b/runtime/entrypoints/jni/jni_entrypoints.cc
index 4d1e531..c0304eb 100644
--- a/runtime/entrypoints/jni/jni_entrypoints.cc
+++ b/runtime/entrypoints/jni/jni_entrypoints.cc
@@ -46,7 +46,8 @@
   }
 }
 
-static void WorkAroundJniBugsForJobject(intptr_t* arg_ptr) {
+static void WorkAroundJniBugsForJobject(intptr_t* arg_ptr)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   intptr_t value = *arg_ptr;
   mirror::Object** value_as_jni_rep = reinterpret_cast<mirror::Object**>(value);
   mirror::Object* value_as_work_around_rep = value_as_jni_rep != NULL ? *value_as_jni_rep : NULL;
diff --git a/runtime/gc/accounting/heap_bitmap.cc b/runtime/gc/accounting/heap_bitmap.cc
index c520ee6..f94cf24 100644
--- a/runtime/gc/accounting/heap_bitmap.cc
+++ b/runtime/gc/accounting/heap_bitmap.cc
@@ -72,12 +72,10 @@
   discontinuous_space_sets_.erase(it);
 }
 
-void HeapBitmap::Walk(SpaceBitmap::Callback* callback, void* arg) {
+void HeapBitmap::Walk(ObjectCallback* callback, void* arg) {
   for (const auto& bitmap : continuous_space_bitmaps_) {
     bitmap->Walk(callback, arg);
   }
-
-  DCHECK(!discontinuous_space_sets_.empty());
   for (const auto& space_set : discontinuous_space_sets_) {
     space_set->Walk(callback, arg);
   }
diff --git a/runtime/gc/accounting/heap_bitmap.h b/runtime/gc/accounting/heap_bitmap.h
index bcf36a2..dde1425 100644
--- a/runtime/gc/accounting/heap_bitmap.h
+++ b/runtime/gc/accounting/heap_bitmap.h
@@ -20,6 +20,7 @@
 #include "base/logging.h"
 #include "gc_allocator.h"
 #include "locks.h"
+#include "object_callbacks.h"
 #include "space_bitmap.h"
 
 namespace art {
@@ -83,7 +84,7 @@
     return NULL;
   }
 
-  void Walk(SpaceBitmap::Callback* callback, void* arg)
+  void Walk(ObjectCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   template <typename Visitor>
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 0225f29..aad214a 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -70,8 +70,8 @@
 
 class ModUnionUpdateObjectReferencesVisitor {
  public:
-  ModUnionUpdateObjectReferencesVisitor(RootVisitor visitor, void* arg)
-    : visitor_(visitor),
+  ModUnionUpdateObjectReferencesVisitor(RootCallback* callback, void* arg)
+    : callback_(callback),
       arg_(arg) {
   }
 
@@ -80,7 +80,7 @@
                   bool /* is_static */) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Only add the reference if it is non null and fits our criteria.
     if (ref != nullptr) {
-      Object* new_ref = visitor_(ref, arg_);
+      Object* new_ref = callback_(ref, arg_, 0, kRootVMInternal);
       if (new_ref != ref) {
         // Use SetFieldObjectWithoutWriteBarrier to avoid card mark as an optimization which
         // reduces dirtied pages and improves performance.
@@ -90,26 +90,26 @@
   }
 
  private:
-  RootVisitor* visitor_;
+  RootCallback* const callback_;
   void* arg_;
 };
 
 class ModUnionScanImageRootVisitor {
  public:
-  ModUnionScanImageRootVisitor(RootVisitor visitor, void* arg)
-      : visitor_(visitor), arg_(arg) {}
+  ModUnionScanImageRootVisitor(RootCallback* callback, void* arg)
+      : callback_(callback), arg_(arg) {}
 
   void operator()(Object* root) const
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(root != NULL);
-    ModUnionUpdateObjectReferencesVisitor ref_visitor(visitor_, arg_);
+    ModUnionUpdateObjectReferencesVisitor ref_visitor(callback_, arg_);
     collector::MarkSweep::VisitObjectReferences(root, ref_visitor, true);
   }
 
  private:
-  RootVisitor* visitor_;
-  void* arg_;
+  RootCallback* const callback_;
+  void* const arg_;
 };
 
 void ModUnionTableReferenceCache::ClearCards() {
@@ -261,7 +261,7 @@
   }
 }
 
-void ModUnionTableReferenceCache::UpdateAndMarkReferences(RootVisitor visitor, void* arg) {
+void ModUnionTableReferenceCache::UpdateAndMarkReferences(RootCallback* callback, void* arg) {
   Heap* heap = GetHeap();
   CardTable* card_table = heap->GetCardTable();
 
@@ -296,7 +296,7 @@
     for (mirror::HeapReference<Object>* obj_ptr : ref.second) {
       Object* obj = obj_ptr->AsMirrorPtr();
       if (obj != nullptr) {
-        Object* new_obj = visitor(obj, arg);
+        Object* new_obj = callback(obj, arg, 0, kRootVMInternal);
         // Avoid dirtying pages in the image unless necessary.
         if (new_obj != obj) {
           obj_ptr->Assign(new_obj);
@@ -318,9 +318,9 @@
 }
 
 // Mark all references to the alloc space(s).
-void ModUnionTableCardCache::UpdateAndMarkReferences(RootVisitor visitor, void* arg) {
+void ModUnionTableCardCache::UpdateAndMarkReferences(RootCallback* callback, void* arg) {
   CardTable* card_table = heap_->GetCardTable();
-  ModUnionScanImageRootVisitor scan_visitor(visitor, arg);
+  ModUnionScanImageRootVisitor scan_visitor(callback, arg);
   SpaceBitmap* bitmap = space_->GetLiveBitmap();
   for (const byte* card_addr : cleared_cards_) {
     uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card_addr));
diff --git a/runtime/gc/accounting/mod_union_table.h b/runtime/gc/accounting/mod_union_table.h
index a89dbd1..7d5d8d2 100644
--- a/runtime/gc/accounting/mod_union_table.h
+++ b/runtime/gc/accounting/mod_union_table.h
@@ -19,7 +19,7 @@
 
 #include "gc_allocator.h"
 #include "globals.h"
-#include "root_visitor.h"
+#include "object_callbacks.h"
 #include "safe_map.h"
 
 #include <set>
@@ -69,7 +69,7 @@
   // Update the mod-union table using data stored by ClearCards. There may be multiple ClearCards
   // before a call to update, for example, back-to-back sticky GCs. Also mark references to other
   // spaces which are stored in the mod-union table.
-  virtual void UpdateAndMarkReferences(RootVisitor visitor, void* arg) = 0;
+  virtual void UpdateAndMarkReferences(RootCallback* callback, void* arg) = 0;
 
   // Verification, sanity checks that we don't have clean cards which conflict with out cached data
   // for said cards. Exclusive lock is required since verify sometimes uses
@@ -106,7 +106,7 @@
   void ClearCards();
 
   // Update table based on cleared cards and mark all references to the other spaces.
-  void UpdateAndMarkReferences(RootVisitor visitor, void* arg)
+  void UpdateAndMarkReferences(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -142,7 +142,7 @@
   void ClearCards();
 
   // Mark all references to the alloc space(s).
-  void UpdateAndMarkReferences(RootVisitor visitor, void* arg)
+  void UpdateAndMarkReferences(RootCallback* callback, void* arg)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index a080bee..ad4ff1b 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -44,7 +44,7 @@
                       reinterpret_cast<void*>(HeapLimit()));
 }
 
-void ObjectSet::Walk(SpaceBitmap::Callback* callback, void* arg) {
+void ObjectSet::Walk(ObjectCallback* callback, void* arg) {
   for (const mirror::Object* obj : contained_) {
     callback(const_cast<mirror::Object*>(obj), arg);
   }
@@ -102,7 +102,7 @@
 
 // Visits set bits in address order.  The callback is not permitted to
 // change the bitmap bits or max during the traversal.
-void SpaceBitmap::Walk(SpaceBitmap::Callback* callback, void* arg) {
+void SpaceBitmap::Walk(ObjectCallback* callback, void* arg) {
   CHECK(bitmap_begin_ != NULL);
   CHECK(callback != NULL);
 
@@ -174,12 +174,12 @@
   }
 }
 
-static void WalkFieldsInOrder(SpaceBitmap* visited, SpaceBitmap::Callback* callback, mirror::Object* obj,
+static void WalkFieldsInOrder(SpaceBitmap* visited, ObjectCallback* callback, mirror::Object* obj,
                               void* arg);
 
 // Walk instance fields of the given Class. Separate function to allow recursion on the super
 // class.
-static void WalkInstanceFields(SpaceBitmap* visited, SpaceBitmap::Callback* callback, mirror::Object* obj,
+static void WalkInstanceFields(SpaceBitmap* visited, ObjectCallback* callback, mirror::Object* obj,
                                mirror::Class* klass, void* arg)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Visit fields of parent classes first.
@@ -204,7 +204,7 @@
 }
 
 // For an unvisited object, visit it then all its children found via fields.
-static void WalkFieldsInOrder(SpaceBitmap* visited, SpaceBitmap::Callback* callback, mirror::Object* obj,
+static void WalkFieldsInOrder(SpaceBitmap* visited, ObjectCallback* callback, mirror::Object* obj,
                               void* arg)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (visited->Test(obj)) {
@@ -246,7 +246,7 @@
 
 // Visits set bits with an in order traversal.  The callback is not permitted to change the bitmap
 // bits or max during the traversal.
-void SpaceBitmap::InOrderWalk(SpaceBitmap::Callback* callback, void* arg) {
+void SpaceBitmap::InOrderWalk(ObjectCallback* callback, void* arg) {
   UniquePtr<SpaceBitmap> visited(Create("bitmap for in-order walk",
                                        reinterpret_cast<byte*>(heap_begin_),
                                        IndexToOffset(bitmap_size_ / kWordSize)));
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index aa074eb..3c4b674 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -17,10 +17,11 @@
 #ifndef ART_RUNTIME_GC_ACCOUNTING_SPACE_BITMAP_H_
 #define ART_RUNTIME_GC_ACCOUNTING_SPACE_BITMAP_H_
 
-#include "locks.h"
 #include "gc_allocator.h"
 #include "globals.h"
+#include "locks.h"
 #include "mem_map.h"
+#include "object_callbacks.h"
 #include "UniquePtr.h"
 
 #include <limits.h>
@@ -42,8 +43,6 @@
   // Alignment of objects within spaces.
   static const size_t kAlignment = 8;
 
-  typedef void Callback(mirror::Object* obj, void* arg);
-
   typedef void ScanCallback(mirror::Object* obj, void* finger, void* arg);
 
   typedef void SweepCallback(size_t ptr_count, mirror::Object** ptrs, void* arg);
@@ -102,7 +101,7 @@
     return index < bitmap_size_ / kWordSize;
   }
 
-  void VisitRange(uintptr_t base, uintptr_t max, Callback* visitor, void* arg) const;
+  void VisitRange(uintptr_t base, uintptr_t max, ObjectCallback* callback, void* arg) const;
 
   class ClearVisitor {
    public:
@@ -129,10 +128,10 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void Walk(Callback* callback, void* arg)
+  void Walk(ObjectCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  void InOrderWalk(Callback* callback, void* arg)
+  void InOrderWalk(ObjectCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   static void SweepWalk(const SpaceBitmap& live, const SpaceBitmap& mark, uintptr_t base,
@@ -249,7 +248,7 @@
     contained_ = space_set.contained_;
   }
 
-  void Walk(SpaceBitmap::Callback* callback, void* arg)
+  void Walk(ObjectCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
   template <typename Visitor>
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 862d06f..de9f59e 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -525,14 +525,16 @@
   }
 }
 
-Object* MarkSweep::MarkRootParallelCallback(Object* root, void* arg) {
+mirror::Object* MarkSweep::MarkRootParallelCallback(mirror::Object* root, void* arg,
+                                                    uint32_t /*thread_id*/, RootType /*root_type*/) {
   DCHECK(root != NULL);
   DCHECK(arg != NULL);
   reinterpret_cast<MarkSweep*>(arg)->MarkObjectNonNullParallel(root);
   return root;
 }
 
-Object* MarkSweep::MarkRootCallback(Object* root, void* arg) {
+Object* MarkSweep::MarkRootCallback(Object* root, void* arg, uint32_t /*thread_id*/,
+                                    RootType /*root_type*/) {
   DCHECK(root != nullptr);
   DCHECK(arg != nullptr);
   reinterpret_cast<MarkSweep*>(arg)->MarkObjectNonNull(root);
@@ -930,7 +932,7 @@
   ProcessMarkStack(false);
 }
 
-mirror::Object* MarkSweep::IsMarkedCallback(Object* object, void* arg) {
+mirror::Object* MarkSweep::IsMarkedCallback(mirror::Object* object, void* arg) {
   if (reinterpret_cast<MarkSweep*>(arg)->IsMarked(object)) {
     return object;
   }
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index bfedac7..8bc0bb5 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -22,8 +22,8 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "garbage_collector.h"
+#include "object_callbacks.h"
 #include "offsets.h"
-#include "root_visitor.h"
 #include "UniquePtr.h"
 
 namespace art {
@@ -180,11 +180,13 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static mirror::Object* MarkRootCallback(mirror::Object* root, void* arg)
+  static mirror::Object* MarkRootCallback(mirror::Object* root, void* arg, uint32_t thread_id,
+                                          RootType root_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static mirror::Object* MarkRootParallelCallback(mirror::Object* root, void* arg);
+  static mirror::Object* MarkRootParallelCallback(mirror::Object* root, void* arg,
+                                                  uint32_t thread_id, RootType root_type);
 
   // Marks an object.
   void MarkObject(const mirror::Object* obj)
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 0c6a938..b37b9d2 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -513,7 +513,7 @@
   return forward_address;
 }
 
-Object* SemiSpace::RecursiveMarkObjectCallback(Object* root, void* arg) {
+mirror::Object* SemiSpace::RecursiveMarkObjectCallback(mirror::Object* root, void* arg) {
   DCHECK(root != nullptr);
   DCHECK(arg != nullptr);
   SemiSpace* semi_space = reinterpret_cast<SemiSpace*>(arg);
@@ -522,7 +522,8 @@
   return ret;
 }
 
-Object* SemiSpace::MarkRootCallback(Object* root, void* arg) {
+Object* SemiSpace::MarkRootCallback(Object* root, void* arg, uint32_t /*thread_id*/,
+                                    RootType /*root_type*/) {
   DCHECK(root != nullptr);
   DCHECK(arg != nullptr);
   return reinterpret_cast<SemiSpace*>(arg)->MarkObject(root);
@@ -536,7 +537,7 @@
   timings_.EndSplit();
 }
 
-mirror::Object* SemiSpace::MarkedForwardingAddressCallback(Object* object, void* arg) {
+mirror::Object* SemiSpace::MarkedForwardingAddressCallback(mirror::Object* object, void* arg) {
   return reinterpret_cast<SemiSpace*>(arg)->GetMarkedForwardAddress(object);
 }
 
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 685b33c..f58402f 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -22,8 +22,8 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "garbage_collector.h"
+#include "object_callbacks.h"
 #include "offsets.h"
-#include "root_visitor.h"
 #include "UniquePtr.h"
 
 namespace art {
@@ -142,7 +142,8 @@
   static void VisitObjectReferencesAndClass(mirror::Object* obj, const Visitor& visitor)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  static mirror::Object* MarkRootCallback(mirror::Object* root, void* arg)
+  static mirror::Object* MarkRootCallback(mirror::Object* root, void* arg, uint32_t /*tid*/,
+                                          RootType /*root_type*/)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   static mirror::Object* RecursiveMarkObjectCallback(mirror::Object* root, void* arg)
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 62567d7..a324925 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -369,7 +369,7 @@
   }
 }
 
-void Heap::VisitObjects(ObjectVisitorCallback callback, void* arg) {
+void Heap::VisitObjects(ObjectCallback callback, void* arg) {
   Thread* self = Thread::Current();
   // GCs can move objects, so don't allow this.
   const char* old_cause = self->StartAssertNoThreadSuspension("Visiting objects");
@@ -604,8 +604,8 @@
 }
 
 struct SoftReferenceArgs {
-  RootVisitor* is_marked_callback_;
-  RootVisitor* recursive_mark_callback_;
+  IsMarkedCallback* is_marked_callback_;
+  MarkObjectCallback* recursive_mark_callback_;
   void* arg_;
 };
 
@@ -617,8 +617,8 @@
 
 // Process reference class instances and schedule finalizations.
 void Heap::ProcessReferences(TimingLogger& timings, bool clear_soft,
-                             RootVisitor* is_marked_callback,
-                             RootVisitor* recursive_mark_object_callback, void* arg) {
+                             IsMarkedCallback* is_marked_callback,
+                             MarkObjectCallback* recursive_mark_object_callback, void* arg) {
   // Unless we are in the zygote or required to clear soft references with white references,
   // preserve some white referents.
   if (!clear_soft && !Runtime::Current()->IsZygote()) {
@@ -671,13 +671,13 @@
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
 // marked, put it on the appropriate list in the heap for later processing.
 void Heap::DelayReferenceReferent(mirror::Class* klass, mirror::Object* obj,
-                                  RootVisitor mark_visitor, void* arg) {
+                                  IsMarkedCallback is_marked_callback, void* arg) {
   DCHECK(klass != nullptr);
   DCHECK(klass->IsReferenceClass());
   DCHECK(obj != nullptr);
   mirror::Object* referent = GetReferenceReferent(obj);
   if (referent != nullptr) {
-    mirror::Object* forward_address = mark_visitor(referent, arg);
+    mirror::Object* forward_address = is_marked_callback(referent, arg);
     // Null means that the object is not currently marked.
     if (forward_address == nullptr) {
       Thread* self = Thread::Current();
@@ -800,11 +800,12 @@
   return IsAligned<kObjectAlignment>(obj) && IsHeapAddress(obj);
 }
 
+bool Heap::IsNonDiscontinuousSpaceHeapAddress(const mirror::Object* obj) const {
+  return FindContinuousSpaceFromObject(obj, true) != nullptr;
+}
+
 bool Heap::IsHeapAddress(const mirror::Object* obj) const {
-  if (kMovingCollector && bump_pointer_space_ && bump_pointer_space_->HasAddress(obj)) {
-    return true;
-  }
-  // TODO: This probably doesn't work for large objects.
+  // TODO: This might not work for large objects.
   return FindSpaceFromObject(obj, true) != nullptr;
 }
 
@@ -1169,7 +1170,7 @@
 
 void Heap::GetReferringObjects(mirror::Object* o, int32_t max_count,
                                std::vector<mirror::Object*>& referring_objects) {
-  // Can't do any GC in this function since this may move classes.
+  // Can't do any GC in this function since this may move the object o.
   Thread* self = Thread::Current();
   auto* old_cause = self->StartAssertNoThreadSuspension("GetReferringObjects");
   ReferringObjectsFinder finder(o, max_count, referring_objects);
@@ -1696,7 +1697,8 @@
   gc_complete_cond_->Broadcast(self);
 }
 
-static mirror::Object* RootMatchesObjectVisitor(mirror::Object* root, void* arg) {
+static mirror::Object* RootMatchesObjectVisitor(mirror::Object* root, void* arg,
+                                                uint32_t /*thread_id*/, RootType /*root_type*/) {
   mirror::Object* obj = reinterpret_cast<mirror::Object*>(arg);
   if (root == obj) {
     LOG(INFO) << "Object " << obj << " is a root";
@@ -1823,7 +1825,8 @@
     return heap_->IsLiveObjectLocked(obj, true, false, true);
   }
 
-  static mirror::Object* VerifyRoots(mirror::Object* root, void* arg) {
+  static mirror::Object* VerifyRoots(mirror::Object* root, void* arg, uint32_t /*thread_id*/,
+                                     RootType /*root_type*/) {
     VerifyReferenceVisitor* visitor = reinterpret_cast<VerifyReferenceVisitor*>(arg);
     (*visitor)(nullptr, root, MemberOffset(0), true);
     return root;
@@ -2041,7 +2044,7 @@
   }
 }
 
-static mirror::Object* IdentityCallback(mirror::Object* obj, void*) {
+static mirror::Object* IdentityRootCallback(mirror::Object* obj, void*, uint32_t, RootType) {
   return obj;
 }
 
@@ -2080,7 +2083,7 @@
     ReaderMutexLock reader_lock(self, *Locks::heap_bitmap_lock_);
     for (const auto& table_pair : mod_union_tables_) {
       accounting::ModUnionTable* mod_union_table = table_pair.second;
-      mod_union_table->UpdateAndMarkReferences(IdentityCallback, nullptr);
+      mod_union_table->UpdateAndMarkReferences(IdentityRootCallback, nullptr);
       mod_union_table->Verify();
     }
     thread_list->ResumeAll();
@@ -2316,10 +2319,10 @@
 void Heap::AddFinalizerReference(Thread* self, mirror::Object* object) {
   ScopedObjectAccess soa(self);
   JValue result;
-  ArgArray arg_array(NULL, 0);
+  ArgArray arg_array("VL", 2);
   arg_array.Append(object);
   soa.DecodeMethod(WellKnownClasses::java_lang_ref_FinalizerReference_add)->Invoke(self,
-      arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
+      arg_array.GetArray(), arg_array.GetNumBytes(), &result, "VL");
 }
 
 void Heap::EnqueueClearedReferences() {
@@ -2330,10 +2333,10 @@
     if (LIKELY(Runtime::Current()->IsStarted())) {
       ScopedObjectAccess soa(self);
       JValue result;
-      ArgArray arg_array(NULL, 0);
+      ArgArray arg_array("VL", 2);
       arg_array.Append(cleared_references_.GetList());
       soa.DecodeMethod(WellKnownClasses::java_lang_ref_ReferenceQueue_add)->Invoke(soa.Self(),
-          arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
+          arg_array.GetArray(), arg_array.GetNumBytes(), &result, "VL");
     }
     cleared_references_.Clear();
   }
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 476ceee..e416c0e 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -32,9 +32,9 @@
 #include "gtest/gtest.h"
 #include "jni.h"
 #include "locks.h"
+#include "object_callbacks.h"
 #include "offsets.h"
 #include "reference_queue.h"
-#include "root_visitor.h"
 #include "safe_map.h"
 #include "thread_pool.h"
 
@@ -183,7 +183,7 @@
   }
 
   // Visit all of the live objects in the heap.
-  void VisitObjects(ObjectVisitorCallback callback, void* arg)
+  void VisitObjects(ObjectCallback callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   void SwapSemiSpaces() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -226,10 +226,16 @@
   // A weaker test than IsLiveObject or VerifyObject that doesn't require the heap lock,
   // and doesn't abort on error, allowing the caller to report more
   // meaningful diagnostics.
-  bool IsValidObjectAddress(const mirror::Object* obj) const;
+  bool IsValidObjectAddress(const mirror::Object* obj) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns true if the address passed in is a heap address, doesn't need to be aligned.
-  bool IsHeapAddress(const mirror::Object* obj) const;
+  bool IsHeapAddress(const mirror::Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Faster alternative to IsHeapAddress since finding if an object is in the large object space is
+  // very slow.
+  bool IsNonDiscontinuousSpaceHeapAddress(const mirror::Object* obj) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns true if 'obj' is a live heap object, false otherwise (including for invalid addresses).
   // Requires the heap lock to be held.
@@ -328,8 +334,9 @@
     return finalizer_reference_zombie_offset_;
   }
   static mirror::Object* PreserveSoftReferenceCallback(mirror::Object* obj, void* arg);
-  void ProcessReferences(TimingLogger& timings, bool clear_soft, RootVisitor* is_marked_callback,
-                         RootVisitor* recursive_mark_object_callback, void* arg)
+  void ProcessReferences(TimingLogger& timings, bool clear_soft,
+                         IsMarkedCallback* is_marked_callback,
+                         MarkObjectCallback* recursive_mark_object_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -605,8 +612,9 @@
   // Returns true if the reference object has not yet been enqueued.
   bool IsEnqueuable(mirror::Object* ref) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool IsEnqueued(mirror::Object* ref) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void DelayReferenceReferent(mirror::Class* klass, mirror::Object* obj, RootVisitor mark_visitor,
-                              void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void DelayReferenceReferent(mirror::Class* klass, mirror::Object* obj,
+                              IsMarkedCallback is_marked_callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Run the finalizers.
   void RunFinalization(JNIEnv* env);
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 2d73a71..fae4cac 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -94,13 +94,14 @@
   }
 }
 
-void ReferenceQueue::ClearWhiteReferences(ReferenceQueue& cleared_references, RootVisitor visitor,
+void ReferenceQueue::ClearWhiteReferences(ReferenceQueue& cleared_references,
+                                          IsMarkedCallback* preserve_callback,
                                           void* arg) {
   while (!IsEmpty()) {
     mirror::Object* ref = DequeuePendingReference();
     mirror::Object* referent = heap_->GetReferenceReferent(ref);
     if (referent != nullptr) {
-      mirror::Object* forward_address = visitor(referent, arg);
+      mirror::Object* forward_address = preserve_callback(referent, arg);
       if (forward_address == nullptr) {
         // Referent is white, clear it.
         heap_->ClearReferenceReferent(ref);
@@ -108,7 +109,7 @@
           cleared_references.EnqueuePendingReference(ref);
         }
       } else if (referent != forward_address) {
-        // Object moved, need to updated the referrent.
+        // Object moved, need to updated the referent.
         heap_->SetReferenceReferent(ref, forward_address);
       }
     }
@@ -116,8 +117,9 @@
 }
 
 void ReferenceQueue::EnqueueFinalizerReferences(ReferenceQueue& cleared_references,
-                                                RootVisitor is_marked_callback,
-                                                RootVisitor recursive_mark_callback, void* arg) {
+                                                IsMarkedCallback is_marked_callback,
+                                                MarkObjectCallback recursive_mark_callback,
+                                                void* arg) {
   while (!IsEmpty()) {
     mirror::Object* ref = DequeuePendingReference();
     mirror::Object* referent = heap_->GetReferenceReferent(ref);
@@ -139,7 +141,7 @@
   }
 }
 
-void ReferenceQueue::PreserveSomeSoftReferences(RootVisitor preserve_callback, void* arg) {
+void ReferenceQueue::PreserveSomeSoftReferences(IsMarkedCallback preserve_callback, void* arg) {
   ReferenceQueue cleared(heap_);
   while (!IsEmpty()) {
     mirror::Object* ref = DequeuePendingReference();
@@ -149,7 +151,7 @@
       if (forward_address == nullptr) {
         // Either the reference isn't marked or we don't wish to preserve it.
         cleared.EnqueuePendingReference(ref);
-      } else {
+      } else if (forward_address != referent) {
         heap_->SetReferenceReferent(ref, forward_address);
       }
     }
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index 3f3069e..e12a95f 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -27,8 +27,8 @@
 #include "gtest/gtest.h"
 #include "jni.h"
 #include "locks.h"
+#include "object_callbacks.h"
 #include "offsets.h"
-#include "root_visitor.h"
 #include "thread_pool.h"
 
 namespace art {
@@ -56,17 +56,18 @@
   // Enqueues finalizer references with white referents.  White referents are blackened, moved to the
   // zombie field, and the referent field is cleared.
   void EnqueueFinalizerReferences(ReferenceQueue& cleared_references,
-                                  RootVisitor is_marked_callback,
-                                  RootVisitor recursive_mark_callback, void* arg)
+                                  IsMarkedCallback is_marked_callback,
+                                  MarkObjectCallback recursive_mark_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Walks the reference list marking any references subject to the reference clearing policy.
   // References with a black referent are removed from the list.  References with white referents
   // biased toward saving are blackened and also removed from the list.
-  void PreserveSomeSoftReferences(RootVisitor preserve_callback, void* arg)
+  void PreserveSomeSoftReferences(IsMarkedCallback* preserve_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Unlink the reference list clearing references objects with white referents.  Cleared references
   // registered to a reference queue are scheduled for appending by the heap worker thread.
-  void ClearWhiteReferences(ReferenceQueue& cleared_references, RootVisitor visitor, void* arg)
+  void ClearWhiteReferences(ReferenceQueue& cleared_references, IsMarkedCallback is_marked_callback,
+                            void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void Dump(std::ostream& os) const
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
index a314d74..f7bdc4c 100644
--- a/runtime/gc/space/bump_pointer_space.cc
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -44,9 +44,8 @@
       growth_end_(limit),
       objects_allocated_(0), bytes_allocated_(0),
       block_lock_("Block lock"),
+      main_block_size_(0),
       num_blocks_(0) {
-  CHECK_GE(Capacity(), sizeof(BlockHeader));
-  end_ += sizeof(BlockHeader);
 }
 
 BumpPointerSpace::BumpPointerSpace(const std::string& name, MemMap* mem_map)
@@ -55,9 +54,8 @@
       growth_end_(mem_map->End()),
       objects_allocated_(0), bytes_allocated_(0),
       block_lock_("Block lock"),
+      main_block_size_(0),
       num_blocks_(0) {
-  CHECK_GE(Capacity(), sizeof(BlockHeader));
-  end_ += sizeof(BlockHeader);
 }
 
 mirror::Object* BumpPointerSpace::Alloc(Thread*, size_t num_bytes, size_t* bytes_allocated) {
@@ -78,13 +76,14 @@
   CHECK_NE(madvise(Begin(), Limit() - Begin(), MADV_DONTNEED), -1) << "madvise failed";
   // Reset the end of the space back to the beginning, we move the end forward as we allocate
   // objects.
-  SetEnd(Begin() + sizeof(BlockHeader));
+  SetEnd(Begin());
   objects_allocated_ = 0;
   bytes_allocated_ = 0;
   growth_end_ = Limit();
   {
     MutexLock mu(Thread::Current(), block_lock_);
     num_blocks_ = 0;
+    main_block_size_ = 0;
   }
 }
 
@@ -115,9 +114,8 @@
 }
 
 void BumpPointerSpace::UpdateMainBlock() {
-  BlockHeader* header = reinterpret_cast<BlockHeader*>(Begin());
-  header->size_ = Size() - sizeof(BlockHeader);
   DCHECK_EQ(num_blocks_, 0U);
+  main_block_size_ = Size();
 }
 
 // Returns the start of the storage.
@@ -137,9 +135,9 @@
   return storage;
 }
 
-void BumpPointerSpace::Walk(ObjectVisitorCallback callback, void* arg) {
+void BumpPointerSpace::Walk(ObjectCallback* callback, void* arg) {
   byte* pos = Begin();
-
+  byte* main_end = pos;
   {
     MutexLock mu(Thread::Current(), block_lock_);
     // If we have 0 blocks then we need to update the main header since we have bump pointer style
@@ -147,8 +145,15 @@
     if (num_blocks_ == 0) {
       UpdateMainBlock();
     }
+    main_end += main_block_size_;
   }
-
+  // Walk all of the objects in the main block first.
+  while (pos < main_end) {
+    mirror::Object* obj = reinterpret_cast<mirror::Object*>(pos);
+    callback(obj, arg);
+    pos = reinterpret_cast<byte*>(GetNextObject(obj));
+  }
+  // Walk the other blocks (currently only TLABs).
   while (pos < End()) {
     BlockHeader* header = reinterpret_cast<BlockHeader*>(pos);
     size_t block_size = header->size_;
@@ -167,7 +172,7 @@
 }
 
 bool BumpPointerSpace::IsEmpty() const {
-  return Size() == sizeof(BlockHeader);
+  return Begin() == End();
 }
 
 uint64_t BumpPointerSpace::GetBytesAllocated() {
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
index d73fe3b..d7e6f5b 100644
--- a/runtime/gc/space/bump_pointer_space.h
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -17,7 +17,7 @@
 #ifndef ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_H_
 #define ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_H_
 
-#include "root_visitor.h"
+#include "object_callbacks.h"
 #include "space.h"
 
 namespace art {
@@ -121,7 +121,7 @@
   }
 
   // Go through all of the blocks and visit the continuous objects.
-  void Walk(ObjectVisitorCallback callback, void* arg)
+  void Walk(ObjectCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Object alignment within the space.
@@ -139,15 +139,16 @@
 
   // The main block is an unbounded block where objects go when there are no other blocks. This
   // enables us to maintain tightly packed objects when you are not using thread local buffers for
-  // allocation.
-  // The main block is also the block which starts at address 0.
+  // allocation. The main block starts at the space Begin().
   void UpdateMainBlock() EXCLUSIVE_LOCKS_REQUIRED(block_lock_);
 
   byte* growth_end_;
   AtomicInteger objects_allocated_;  // Accumulated from revoked thread local regions.
   AtomicInteger bytes_allocated_;  // Accumulated from revoked thread local regions.
   Mutex block_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-
+  // The objects at the start of the space are stored in the main block. The main block doesn't
+  // have a header, this lets us walk empty spaces which are mprotected.
+  size_t main_block_size_ GUARDED_BY(block_lock_);
   // The number of blocks in the space, if it is 0 then the space has one long continuous block
   // which doesn't have an updated header.
   size_t num_blocks_ GUARDED_BY(block_lock_);
diff --git a/runtime/gc/space/space_test.cc b/runtime/gc/space/space_test.cc
index 9989ffe..6d07a60 100644
--- a/runtime/gc/space/space_test.cc
+++ b/runtime/gc/space/space_test.cc
@@ -39,20 +39,23 @@
     Runtime::Current()->GetHeap()->AddSpace(space);
   }
   void InstallClass(mirror::Object* o, size_t size) NO_THREAD_SAFETY_ANALYSIS {
-    // Note the minimum size, which is the size of a zero-length byte array, is 12.
-    EXPECT_GE(size, static_cast<size_t>(12));
+    // Note the minimum size, which is the size of a zero-length byte array.
+    EXPECT_GE(size, SizeOfZeroLengthByteArray());
     SirtRef<mirror::ClassLoader> null_loader(Thread::Current(), NULL);
     mirror::Class* byte_array_class = Runtime::Current()->GetClassLinker()->FindClass("[B", null_loader);
     EXPECT_TRUE(byte_array_class != NULL);
     o->SetClass(byte_array_class);
     mirror::Array* arr = o->AsArray();
-    // size_t header_size = sizeof(mirror::Object) + 4;
-    size_t header_size = arr->DataOffset(1).Uint32Value();
+    size_t header_size = SizeOfZeroLengthByteArray();
     int32_t length = size - header_size;
     arr->SetLength(length);
     EXPECT_EQ(arr->SizeOf(), size);
   }
 
+  static size_t SizeOfZeroLengthByteArray() {
+    return mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimByte)).Uint32Value();
+  }
+
   static MallocSpace* CreateDlMallocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
                                           size_t capacity, byte* requested_begin) {
     return DlMallocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin);
@@ -355,9 +358,10 @@
   mirror::Object* lots_of_objects[1024];
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
     size_t allocation_size = 0;
-    lots_of_objects[i] = space->Alloc(self, 16, &allocation_size);
+    size_t size_of_zero_length_byte_array = SizeOfZeroLengthByteArray();
+    lots_of_objects[i] = space->Alloc(self, size_of_zero_length_byte_array, &allocation_size);
     EXPECT_TRUE(lots_of_objects[i] != nullptr);
-    InstallClass(lots_of_objects[i], 16);
+    InstallClass(lots_of_objects[i], size_of_zero_length_byte_array);
     EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
   }
 
@@ -436,9 +440,10 @@
         alloc_size = object_size;
       } else {
         alloc_size = test_rand(&rand_seed) % static_cast<size_t>(-object_size);
-        // Note the minimum size, which is the size of a zero-length byte array, is 12.
-        if (alloc_size < 12) {
-          alloc_size = 12;
+        // Note the minimum size, which is the size of a zero-length byte array.
+        size_t size_of_zero_length_byte_array = SizeOfZeroLengthByteArray();
+        if (alloc_size < size_of_zero_length_byte_array) {
+          alloc_size = size_of_zero_length_byte_array;
         }
       }
       mirror::Object* object;
@@ -562,6 +567,10 @@
 }
 
 void SpaceTest::SizeFootPrintGrowthLimitAndTrimDriver(size_t object_size, CreateSpaceFn create_space) {
+  if (object_size < SizeOfZeroLengthByteArray()) {
+    // Too small for the object layout/model.
+    return;
+  }
   size_t initial_size = 4 * MB;
   size_t growth_limit = 8 * MB;
   size_t capacity = 16 * MB;
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 24d403d..ae03dd9 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -431,12 +431,8 @@
     Runtime::Current()->VisitRoots(RootVisitor, this, false, false);
     Thread* self = Thread::Current();
     {
-      WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-      Runtime::Current()->GetHeap()->FlushAllocStack();
-    }
-    {
       ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-      Runtime::Current()->GetHeap()->GetLiveBitmap()->Walk(HeapBitmapCallback, this);
+      Runtime::Current()->GetHeap()->VisitObjects(VisitObjectCallback, this);
     }
     current_record_.StartNewRecord(body_fp_, HPROF_TAG_HEAP_DUMP_END, HPROF_TIME);
     current_record_.Flush();
@@ -500,22 +496,23 @@
   }
 
  private:
-  static mirror::Object* RootVisitor(mirror::Object* obj, void* arg)
+  static mirror::Object* RootVisitor(mirror::Object* obj, void* arg, uint32_t thread_id,
+                                     RootType root_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(arg != NULL);
-    reinterpret_cast<Hprof*>(arg)->VisitRoot(obj);
+    reinterpret_cast<Hprof*>(arg)->VisitRoot(obj, thread_id, root_type);
     return obj;
   }
 
-  static void HeapBitmapCallback(mirror::Object* obj, void* arg)
+  static void VisitObjectCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CHECK(obj != NULL);
-    CHECK(arg != NULL);
-    Hprof* hprof = reinterpret_cast<Hprof*>(arg);
-    hprof->DumpHeapObject(obj);
+    DCHECK(obj != NULL);
+    DCHECK(arg != NULL);
+    reinterpret_cast<Hprof*>(arg)->DumpHeapObject(obj);
   }
 
-  void VisitRoot(const mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitRoot(const mirror::Object* obj, uint32_t thread_id, RootType type)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   int DumpHeapObject(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -1050,10 +1047,7 @@
   return 0;
 }
 
-void Hprof::VisitRoot(const mirror::Object* obj) {
-  uint32_t threadId = 0;  // TODO
-  /*RootType*/ size_t type = 0;  // TODO
-
+void Hprof::VisitRoot(const mirror::Object* obj, uint32_t thread_id, RootType type) {
   static const HprofHeapTag xlate[] = {
     HPROF_ROOT_UNKNOWN,
     HPROF_ROOT_JNI_GLOBAL,
@@ -1071,13 +1065,12 @@
     HPROF_ROOT_VM_INTERNAL,
     HPROF_ROOT_JNI_MONITOR,
   };
-
   CHECK_LT(type, sizeof(xlate) / sizeof(HprofHeapTag));
   if (obj == NULL) {
     return;
   }
   gc_scan_state_ = xlate[type];
-  gc_thread_serial_number_ = threadId;
+  gc_thread_serial_number_ = thread_id;
   MarkRootObject(obj, 0);
   gc_scan_state_ = 0;
   gc_thread_serial_number_ = 0;
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index 8194a0d..4a02d74 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -309,9 +309,10 @@
   return true;
 }
 
-void IndirectReferenceTable::VisitRoots(RootVisitor* visitor, void* arg) {
+void IndirectReferenceTable::VisitRoots(RootCallback* callback, void* arg, uint32_t tid,
+                                        RootType root_type) {
   for (auto ref : *this) {
-    *ref = visitor(const_cast<mirror::Object*>(*ref), arg);
+    *ref = callback(const_cast<mirror::Object*>(*ref), arg, tid, root_type);
     DCHECK(*ref != nullptr);
   }
 }
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index 21e942e..9d2fa35 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -23,8 +23,8 @@
 #include <string>
 
 #include "base/logging.h"
+#include "object_callbacks.h"
 #include "offsets.h"
-#include "root_visitor.h"
 
 namespace art {
 namespace mirror {
@@ -307,7 +307,7 @@
     return IrtIterator(table_, Capacity(), Capacity());
   }
 
-  void VisitRoots(RootVisitor* visitor, void* arg);
+  void VisitRoots(RootCallback* callback, void* arg, uint32_t tid, RootType root_type);
 
   uint32_t GetSegmentState() const {
     return segment_state_.all;
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index a829e97..5693747 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -43,15 +43,16 @@
      << weak_interns_.size() << " weak\n";
 }
 
-void InternTable::VisitRoots(RootVisitor* visitor, void* arg,
+void InternTable::VisitRoots(RootCallback* callback, void* arg,
                              bool only_dirty, bool clean_dirty) {
   MutexLock mu(Thread::Current(), intern_table_lock_);
   if (!only_dirty || is_dirty_) {
     for (auto& strong_intern : strong_interns_) {
-      strong_intern.second = down_cast<mirror::String*>(visitor(strong_intern.second, arg));
+      strong_intern.second =
+          down_cast<mirror::String*>(callback(strong_intern.second, arg, 0,
+                                              kRootInternedString));
       DCHECK(strong_intern.second != nullptr);
     }
-
     if (clean_dirty) {
       is_dirty_ = false;
     }
@@ -196,15 +197,15 @@
 }
 
 mirror::String* InternTable::InternStrong(mirror::String* s) {
-  if (s == NULL) {
-    return NULL;
+  if (s == nullptr) {
+    return nullptr;
   }
   return Insert(s, true);
 }
 
 mirror::String* InternTable::InternWeak(mirror::String* s) {
-  if (s == NULL) {
-    return NULL;
+  if (s == nullptr) {
+    return nullptr;
   }
   return Insert(s, false);
 }
@@ -215,11 +216,11 @@
   return found == s;
 }
 
-void InternTable::SweepInternTableWeaks(RootVisitor visitor, void* arg) {
+void InternTable::SweepInternTableWeaks(IsMarkedCallback* callback, void* arg) {
   MutexLock mu(Thread::Current(), intern_table_lock_);
   for (auto it = weak_interns_.begin(), end = weak_interns_.end(); it != end;) {
     mirror::Object* object = it->second;
-    mirror::Object* new_object = visitor(object, arg);
+    mirror::Object* new_object = callback(object, arg);
     if (new_object == nullptr) {
       // TODO: use it = weak_interns_.erase(it) when we get a c++11 stl.
       weak_interns_.erase(it++);
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index eec63c8..9f09fb9 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -18,7 +18,7 @@
 #define ART_RUNTIME_INTERN_TABLE_H_
 
 #include "base/mutex.h"
-#include "root_visitor.h"
+#include "object_callbacks.h"
 
 #include <map>
 
@@ -55,13 +55,13 @@
   // Interns a potentially new string in the 'weak' table. (See above.)
   mirror::String* InternWeak(mirror::String* s) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SweepInternTableWeaks(RootVisitor visitor, void* arg);
+  void SweepInternTableWeaks(IsMarkedCallback* callback, void* arg);
 
   bool ContainsWeak(mirror::String* s) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   size_t Size() const;
 
-  void VisitRoots(RootVisitor* visitor, void* arg, bool only_dirty, bool clean_dirty);
+  void VisitRoots(RootCallback* callback, void* arg, bool only_dirty, bool clean_dirty);
 
   void DumpForSigQuit(std::ostream& os) const;
 
diff --git a/runtime/intern_table_test.cc b/runtime/intern_table_test.cc
index aa2502d..c328245 100644
--- a/runtime/intern_table_test.cc
+++ b/runtime/intern_table_test.cc
@@ -81,7 +81,7 @@
   mutable std::vector<const mirror::String*> expected_;
 };
 
-mirror::Object* IsMarkedSweepingVisitor(mirror::Object* object, void* arg) {
+mirror::Object* IsMarkedSweepingCallback(mirror::Object* object, void* arg) {
   if (reinterpret_cast<TestPredicate*>(arg)->IsMarked(object)) {
     return object;
   }
@@ -108,7 +108,7 @@
   p.Expect(s1.get());
   {
     ReaderMutexLock mu(soa.Self(), *Locks::heap_bitmap_lock_);
-    t.SweepInternTableWeaks(IsMarkedSweepingVisitor, &p);
+    t.SweepInternTableWeaks(IsMarkedSweepingCallback, &p);
   }
 
   EXPECT_EQ(2U, t.Size());
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 922e642..c6faf44 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -80,6 +80,14 @@
     Object* obj = reinterpret_cast<Object*>(args[0]);
     Object* newValue = reinterpret_cast<Object*>(args[3]);
     obj->SetFieldObject(MemberOffset((static_cast<uint64_t>(args[2]) << 32) | args[1]), newValue, false);
+  } else if (name == "int sun.misc.Unsafe.getArrayBaseOffsetForComponentType(java.lang.Class)") {
+    mirror::Class* component = reinterpret_cast<Object*>(args[0])->AsClass();
+    Primitive::Type primitive_type = component->GetPrimitiveType();
+    result->SetI(mirror::Array::DataOffset(Primitive::ComponentSize(primitive_type)).Int32Value());
+  } else if (name == "int sun.misc.Unsafe.getArrayIndexScaleForComponentType(java.lang.Class)") {
+    mirror::Class* component = reinterpret_cast<Object*>(args[0])->AsClass();
+    Primitive::Type primitive_type = component->GetPrimitiveType();
+    result->SetI(Primitive::ComponentSize(primitive_type));
   } else {
     LOG(FATAL) << "Attempt to invoke native method in non-started runtime: " << name;
   }
diff --git a/runtime/jdwp/object_registry.cc b/runtime/jdwp/object_registry.cc
index 40ba3e3..49dceb2 100644
--- a/runtime/jdwp/object_registry.cc
+++ b/runtime/jdwp/object_registry.cc
@@ -206,7 +206,7 @@
   }
 }
 
-void ObjectRegistry::UpdateObjectPointers(RootVisitor visitor, void* arg) {
+void ObjectRegistry::UpdateObjectPointers(IsMarkedCallback* callback, void* arg) {
   MutexLock mu(Thread::Current(), lock_);
   if (object_to_entry_.empty()) {
     return;
@@ -215,7 +215,7 @@
   for (auto& pair : object_to_entry_) {
     mirror::Object* new_obj;
     if (pair.first != nullptr) {
-      new_obj = visitor(pair.first, arg);
+      new_obj = callback(pair.first, arg);
       if (new_obj != nullptr) {
         new_object_to_entry.insert(std::make_pair(new_obj, pair.second));
       }
diff --git a/runtime/jdwp/object_registry.h b/runtime/jdwp/object_registry.h
index 0190575..3c6cb15 100644
--- a/runtime/jdwp/object_registry.h
+++ b/runtime/jdwp/object_registry.h
@@ -26,7 +26,7 @@
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
-#include "root_visitor.h"
+#include "object_callbacks.h"
 #include "safe_map.h"
 
 namespace art {
@@ -85,7 +85,7 @@
   jobject GetJObject(JDWP::ObjectId id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Visit, objects are treated as system weaks.
-  void UpdateObjectPointers(RootVisitor visitor, void* arg)
+  void UpdateObjectPointers(IsMarkedCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // We have allow / disallow functionality since we use system weak sweeping logic to update moved
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index deea5f6..6f31ca7 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -139,13 +139,13 @@
 }
 
 void InvokeWithArgArray(const ScopedObjectAccess& soa, ArtMethod* method,
-                        ArgArray* arg_array, JValue* result, char result_type)
+                        ArgArray* arg_array, JValue* result, const char* shorty)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   uint32_t* args = arg_array->GetArray();
   if (UNLIKELY(soa.Env()->check_jni)) {
     CheckMethodArguments(method, args);
   }
-  method->Invoke(soa.Self(), args, arg_array->GetNumBytes(), result, result_type);
+  method->Invoke(soa.Self(), args, arg_array->GetNumBytes(), result, shorty);
 }
 
 static JValue InvokeWithVarArgs(const ScopedObjectAccess& soa, jobject obj,
@@ -157,7 +157,7 @@
   JValue result;
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
   arg_array.BuildArgArray(soa, receiver, args);
-  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty()[0]);
+  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty());
   return result;
 }
 
@@ -175,7 +175,7 @@
   JValue result;
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
   arg_array.BuildArgArray(soa, receiver, args);
-  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty()[0]);
+  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty());
   return result;
 }
 
@@ -188,7 +188,7 @@
   JValue result;
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
   arg_array.BuildArgArray(soa, receiver, args);
-  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty()[0]);
+  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty());
   return result;
 }
 
@@ -523,9 +523,9 @@
     return dlsym(handle_, symbol_name.c_str());
   }
 
-  void VisitRoots(RootVisitor* visitor, void* arg) {
+  void VisitRoots(RootCallback* visitor, void* arg) {
     if (class_loader_ != nullptr) {
-      class_loader_ = visitor(class_loader_, arg);
+      class_loader_ = visitor(class_loader_, arg, 0, kRootVMInternal);
     }
   }
 
@@ -619,9 +619,9 @@
     return NULL;
   }
 
-  void VisitRoots(RootVisitor* visitor, void* arg) {
+  void VisitRoots(RootCallback* callback, void* arg) {
     for (auto& lib_pair : libraries_) {
-      lib_pair.second->VisitRoots(visitor, arg);
+      lib_pair.second->VisitRoots(callback, arg);
     }
   }
 
@@ -637,7 +637,7 @@
   JValue result;
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
   arg_array.BuildArgArray(soa, receiver, args);
-  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty()[0]);
+  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty());
   return result;
 }
 
@@ -2437,8 +2437,10 @@
         m = c->FindVirtualMethod(name, sig);
       }
       if (m == NULL) {
+        c->DumpClass(LOG(ERROR), mirror::Class::kDumpClassFullDetail);
         LOG(return_errors ? ERROR : FATAL) << "Failed to register native method "
-            << PrettyDescriptor(c) << "." << name << sig;
+            << PrettyDescriptor(c) << "." << name << sig << " in "
+            << c->GetDexCache()->GetLocation()->ToModifiedUtf8();
         ThrowNoSuchMethodError(soa, c, name, sig, "static or non-static");
         return JNI_ERR;
       } else if (!m->IsNative()) {
@@ -2650,6 +2652,16 @@
     size_t bytes = array->GetLength() * component_size;
     VLOG(heap) << "Release primitive array " << env << " array_data " << array_data
                << " elements " << reinterpret_cast<void*>(elements);
+    if (is_copy) {
+      // Sanity check: If elements is not the same as the java array's data, it better not be a
+      // heap address. TODO: This might be slow to check, may be worth keeping track of which
+      // copies we make?
+      if (heap->IsNonDiscontinuousSpaceHeapAddress(reinterpret_cast<mirror::Object*>(elements))) {
+        JniAbortF("ReleaseArrayElements", "invalid element pointer %p, array elements are %p",
+                  reinterpret_cast<void*>(elements), array_data);
+        return;
+      }
+    }
     // Don't need to copy if we had a direct pointer.
     if (mode != JNI_ABORT && is_copy) {
       memcpy(array_data, elements, bytes);
@@ -3373,11 +3385,11 @@
   return native_method;
 }
 
-void JavaVMExt::SweepJniWeakGlobals(RootVisitor visitor, void* arg) {
+void JavaVMExt::SweepJniWeakGlobals(IsMarkedCallback* callback, void* arg) {
   MutexLock mu(Thread::Current(), weak_globals_lock_);
   for (mirror::Object** entry : weak_globals_) {
     mirror::Object* obj = *entry;
-    mirror::Object* new_obj = visitor(obj, arg);
+    mirror::Object* new_obj = callback(obj, arg);
     if (new_obj == nullptr) {
       new_obj = kClearedJniWeakGlobal;
     }
@@ -3385,20 +3397,20 @@
   }
 }
 
-void JavaVMExt::VisitRoots(RootVisitor* visitor, void* arg) {
+void JavaVMExt::VisitRoots(RootCallback* callback, void* arg) {
   Thread* self = Thread::Current();
   {
     ReaderMutexLock mu(self, globals_lock);
-    globals.VisitRoots(visitor, arg);
+    globals.VisitRoots(callback, arg, 0, kRootJNIGlobal);
   }
   {
     MutexLock mu(self, pins_lock);
-    pin_table.VisitRoots(visitor, arg);
+    pin_table.VisitRoots(callback, arg, 0, kRootVMInternal);
   }
   {
     MutexLock mu(self, libraries_lock);
     // Libraries contains shared libraries which hold a pointer to a class loader.
-    libraries->VisitRoots(visitor, arg);
+    libraries->VisitRoots(callback, arg);
   }
   // The weak_globals table is visited by the GC itself (because it mutates the table).
 }
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index cd3c5cb..9e10987 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -22,8 +22,8 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "indirect_reference_table.h"
+#include "object_callbacks.h"
 #include "reference_table.h"
-#include "root_visitor.h"
 #include "runtime.h"
 #include "sirt_ref.h"
 
@@ -57,7 +57,7 @@
 JValue InvokeWithJValues(const ScopedObjectAccess&, jobject obj, jmethodID mid, jvalue* args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 void InvokeWithArgArray(const ScopedObjectAccess& soa, mirror::ArtMethod* method,
-                        ArgArray *arg_array, JValue* result, char result_type)
+                        ArgArray *arg_array, JValue* result, const char* shorty)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 int ThrowNewException(JNIEnv* env, jclass exception_class, const char* msg, jobject cause);
@@ -91,7 +91,7 @@
 
   void SetCheckJniEnabled(bool enabled);
 
-  void VisitRoots(RootVisitor*, void*);
+  void VisitRoots(RootCallback* callback, void* arg);
 
   void DisallowNewWeakGlobals() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   void AllowNewWeakGlobals() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -99,7 +99,7 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void DeleteWeakGlobalRef(Thread* self, jweak obj)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SweepJniWeakGlobals(RootVisitor visitor, void* arg);
+  void SweepJniWeakGlobals(IsMarkedCallback* callback, void* arg);
   mirror::Object* DecodeWeakGlobal(Thread* self, IndirectRef ref);
 
   Runtime* runtime;
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index fed734e..4c58c84 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -134,14 +134,14 @@
       arg_array.Append(receiver);
     }
 
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "V");
   }
 
   void InvokeIdentityByteMethod(bool is_static)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* method;
     mirror::Object* receiver;
-    JniInternalTestMakeExecutable(&method, &receiver, is_static, "identity", "(I)I");
+    JniInternalTestMakeExecutable(&method, &receiver, is_static, "identity", "(B)B");
 
     ArgArray arg_array(NULL, 0);
     uint32_t* args = arg_array.GetArray();
@@ -154,22 +154,22 @@
 
     arg_array.Append(0U);
     result.SetB(-1);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'B');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "BB");
     EXPECT_EQ(0, result.GetB());
 
     args[0] = -1;
     result.SetB(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'B');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "BB");
     EXPECT_EQ(-1, result.GetB());
 
     args[0] = SCHAR_MAX;
     result.SetB(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'B');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "BB");
     EXPECT_EQ(SCHAR_MAX, result.GetB());
 
     args[0] = (SCHAR_MIN << 24) >> 24;
     result.SetB(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'B');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "BB");
     EXPECT_EQ(SCHAR_MIN, result.GetB());
   }
 
@@ -190,22 +190,22 @@
 
     arg_array.Append(0U);
     result.SetI(-1);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "II");
     EXPECT_EQ(0, result.GetI());
 
     args[0] = -1;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "II");
     EXPECT_EQ(-1, result.GetI());
 
     args[0] = INT_MAX;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "II");
     EXPECT_EQ(INT_MAX, result.GetI());
 
     args[0] = INT_MIN;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "II");
     EXPECT_EQ(INT_MIN, result.GetI());
   }
 
@@ -228,28 +228,28 @@
     value.SetD(0.0);
     arg_array.AppendWide(value.GetJ());
     result.SetD(-1.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "DD");
     EXPECT_EQ(0.0, result.GetD());
 
     value.SetD(-1.0);
     args[0] = value.GetJ();
     args[1] = value.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "DD");
     EXPECT_EQ(-1.0, result.GetD());
 
     value.SetD(DBL_MAX);
     args[0] = value.GetJ();
     args[1] = value.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "DD");
     EXPECT_EQ(DBL_MAX, result.GetD());
 
     value.SetD(DBL_MIN);
     args[0] = value.GetJ();
     args[1] = value.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "DD");
     EXPECT_EQ(DBL_MIN, result.GetD());
   }
 
@@ -271,31 +271,31 @@
     arg_array.Append(0U);
     arg_array.Append(0U);
     result.SetI(-1);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "III");
     EXPECT_EQ(0, result.GetI());
 
     args[0] = 1;
     args[1] = 2;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "III");
     EXPECT_EQ(3, result.GetI());
 
     args[0] = -2;
     args[1] = 5;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "III");
     EXPECT_EQ(3, result.GetI());
 
     args[0] = INT_MAX;
     args[1] = INT_MIN;
     result.SetI(1234);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "III");
     EXPECT_EQ(-1, result.GetI());
 
     args[0] = INT_MAX;
     args[1] = INT_MAX;
     result.SetI(INT_MIN);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "III");
     EXPECT_EQ(-2, result.GetI());
   }
 
@@ -318,35 +318,40 @@
     arg_array.Append(0U);
     arg_array.Append(0U);
     result.SetI(-1);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIII");
     EXPECT_EQ(0, result.GetI());
 
     args[0] = 1;
     args[1] = 2;
     args[2] = 3;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIII");
     EXPECT_EQ(6, result.GetI());
 
     args[0] = -1;
     args[1] = 2;
     args[2] = -3;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIII");
     EXPECT_EQ(-2, result.GetI());
 
     args[0] = INT_MAX;
     args[1] = INT_MIN;
     args[2] = INT_MAX;
     result.SetI(1234);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIII");
     EXPECT_EQ(2147483646, result.GetI());
 
     args[0] = INT_MAX;
     args[1] = INT_MAX;
     args[2] = INT_MAX;
     result.SetI(INT_MIN);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIII");
     EXPECT_EQ(2147483645, result.GetI());
   }
 
@@ -370,7 +375,8 @@
     arg_array.Append(0U);
     arg_array.Append(0U);
     result.SetI(-1);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIIII");
     EXPECT_EQ(0, result.GetI());
 
     args[0] = 1;
@@ -378,7 +384,8 @@
     args[2] = 3;
     args[3] = 4;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIIII");
     EXPECT_EQ(10, result.GetI());
 
     args[0] = -1;
@@ -386,7 +393,8 @@
     args[2] = -3;
     args[3] = 4;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIIII");
     EXPECT_EQ(2, result.GetI());
 
     args[0] = INT_MAX;
@@ -394,7 +402,8 @@
     args[2] = INT_MAX;
     args[3] = INT_MIN;
     result.SetI(1234);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIIII");
     EXPECT_EQ(-2, result.GetI());
 
     args[0] = INT_MAX;
@@ -402,7 +411,8 @@
     args[2] = INT_MAX;
     args[3] = INT_MAX;
     result.SetI(INT_MIN);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIIII");
     EXPECT_EQ(-4, result.GetI());
   }
 
@@ -427,7 +437,8 @@
     arg_array.Append(0U);
     arg_array.Append(0U);
     result.SetI(-1.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIIIII");
     EXPECT_EQ(0, result.GetI());
 
     args[0] = 1;
@@ -436,7 +447,8 @@
     args[3] = 4;
     args[4] = 5;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIIIII");
     EXPECT_EQ(15, result.GetI());
 
     args[0] = -1;
@@ -445,7 +457,8 @@
     args[3] = 4;
     args[4] = -5;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIIIII");
     EXPECT_EQ(-3, result.GetI());
 
     args[0] = INT_MAX;
@@ -454,7 +467,8 @@
     args[3] = INT_MIN;
     args[4] = INT_MAX;
     result.SetI(1234);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIIIII");
     EXPECT_EQ(2147483645, result.GetI());
 
     args[0] = INT_MAX;
@@ -463,7 +477,8 @@
     args[3] = INT_MAX;
     args[4] = INT_MAX;
     result.SetI(INT_MIN);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIIIII");
     EXPECT_EQ(2147483643, result.GetI());
   }
 
@@ -489,7 +504,8 @@
     arg_array.AppendWide(value.GetJ());
     arg_array.AppendWide(value2.GetJ());
     result.SetD(-1.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDD");
     EXPECT_EQ(0.0, result.GetD());
 
     value.SetD(1.0);
@@ -499,7 +515,8 @@
     args[2] = value2.GetJ();
     args[3] = value2.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDD");
     EXPECT_EQ(3.0, result.GetD());
 
     value.SetD(1.0);
@@ -509,7 +526,8 @@
     args[2] = value2.GetJ();
     args[3] = value2.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDD");
     EXPECT_EQ(-1.0, result.GetD());
 
     value.SetD(DBL_MAX);
@@ -519,7 +537,8 @@
     args[2] = value2.GetJ();
     args[3] = value2.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDD");
     EXPECT_EQ(1.7976931348623157e308, result.GetD());
 
     value.SetD(DBL_MAX);
@@ -529,7 +548,8 @@
     args[2] = value2.GetJ();
     args[3] = value2.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDD");
     EXPECT_EQ(INFINITY, result.GetD());
   }
 
@@ -558,7 +578,8 @@
     arg_array.AppendWide(value2.GetJ());
     arg_array.AppendWide(value3.GetJ());
     result.SetD(-1.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDDD");
     EXPECT_EQ(0.0, result.GetD());
 
     value.SetD(1.0);
@@ -571,7 +592,8 @@
     args[4] = value3.GetJ();
     args[5] = value3.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDDD");
     EXPECT_EQ(6.0, result.GetD());
 
     value.SetD(1.0);
@@ -584,7 +606,8 @@
     args[4] = value3.GetJ();
     args[5] = value3.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDDD");
     EXPECT_EQ(2.0, result.GetD());
   }
 
@@ -616,7 +639,8 @@
     arg_array.AppendWide(value3.GetJ());
     arg_array.AppendWide(value4.GetJ());
     result.SetD(-1.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDDDD");
     EXPECT_EQ(0.0, result.GetD());
 
     value.SetD(1.0);
@@ -632,7 +656,8 @@
     args[6] = value4.GetJ();
     args[7] = value4.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDDDD");
     EXPECT_EQ(10.0, result.GetD());
 
     value.SetD(1.0);
@@ -648,7 +673,8 @@
     args[6] = value4.GetJ();
     args[7] = value4.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDDDD");
     EXPECT_EQ(-2.0, result.GetD());
   }
 
@@ -683,7 +709,8 @@
     arg_array.AppendWide(value4.GetJ());
     arg_array.AppendWide(value5.GetJ());
     result.SetD(-1.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDDDDD");
     EXPECT_EQ(0.0, result.GetD());
 
     value.SetD(1.0);
@@ -702,7 +729,8 @@
     args[8] = value5.GetJ();
     args[9] = value5.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDDDDD");
     EXPECT_EQ(15.0, result.GetD());
 
     value.SetD(1.0);
@@ -721,7 +749,8 @@
     args[8] = value5.GetJ();
     args[9] = value5.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDDDDD");
     EXPECT_EQ(3.0, result.GetD());
   }
 
@@ -1764,7 +1793,7 @@
   CHECK(started);
   Thread::Current()->TransitionFromSuspendedToRunnable();
 
-  method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
+  method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "VL");
 }
 
 TEST_F(JniInternalTest, StaticNopMethod) {
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index b2725e5..35ea2b3 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -111,9 +111,9 @@
 }
 
 template<class T>
-inline void PrimitiveArray<T>::VisitRoots(RootVisitor* visitor, void* arg) {
+inline void PrimitiveArray<T>::VisitRoots(RootCallback* callback, void* arg) {
   if (array_class_ != nullptr) {
-    array_class_ = down_cast<Class*>(visitor(array_class_, arg));
+    array_class_ = down_cast<Class*>(callback(array_class_, arg, 0, kRootStickyClass));
   }
 }
 
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 04f03c3..2e123ef 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_MIRROR_ARRAY_H_
 
 #include "object.h"
+#include "object_callbacks.h"
 #include "gc/heap.h"
 #include "thread.h"
 
@@ -182,7 +183,7 @@
     array_class_ = NULL;
   }
 
-  static void VisitRoots(RootVisitor* visitor, void* arg)
+  static void VisitRoots(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
diff --git a/runtime/mirror/art_field.cc b/runtime/mirror/art_field.cc
index c3a4efb..29aade9 100644
--- a/runtime/mirror/art_field.cc
+++ b/runtime/mirror/art_field.cc
@@ -52,10 +52,10 @@
   SetField32(OFFSET_OF_OBJECT_MEMBER(ArtField, offset_), num_bytes.Uint32Value(), false);
 }
 
-void ArtField::VisitRoots(RootVisitor* visitor, void* arg) {
+void ArtField::VisitRoots(RootCallback* callback, void* arg) {
   if (java_lang_reflect_ArtField_ != nullptr) {
     java_lang_reflect_ArtField_ = down_cast<mirror::Class*>(
-        visitor(java_lang_reflect_ArtField_, arg));
+        callback(java_lang_reflect_ArtField_, arg, 0, kRootStickyClass));
   }
 }
 
diff --git a/runtime/mirror/art_field.h b/runtime/mirror/art_field.h
index b33fe4b..716b736 100644
--- a/runtime/mirror/art_field.h
+++ b/runtime/mirror/art_field.h
@@ -20,6 +20,7 @@
 #include "class.h"
 #include "modifiers.h"
 #include "object.h"
+#include "object_callbacks.h"
 
 namespace art {
 
@@ -106,7 +107,7 @@
 
   static void SetClass(Class* java_lang_reflect_ArtField);
   static void ResetClass();
-  static void VisitRoots(RootVisitor* visitor, void* arg)
+  static void VisitRoots(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool IsVolatile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 575ea03..67e6c7d 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -35,15 +35,16 @@
 namespace mirror {
 
 extern "C" void art_portable_invoke_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*, char);
-extern "C" void art_quick_invoke_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*, char);
+extern "C" void art_quick_invoke_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*,
+                                      const char*);
 
 // TODO: get global references for these
 Class* ArtMethod::java_lang_reflect_ArtMethod_ = NULL;
 
-void ArtMethod::VisitRoots(RootVisitor* visitor, void* arg) {
+void ArtMethod::VisitRoots(RootCallback* callback, void* arg) {
   if (java_lang_reflect_ArtMethod_ != nullptr) {
     java_lang_reflect_ArtMethod_ = down_cast<mirror::Class*>(
-        visitor(java_lang_reflect_ArtMethod_, arg));
+        callback(java_lang_reflect_ArtMethod_, arg, 0, kRootStickyClass));
   }
 }
 
@@ -245,10 +246,11 @@
 }
 
 void ArtMethod::Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result,
-                       char result_type) {
+                       const char* shorty) {
   if (kIsDebugBuild) {
     self->AssertThreadSuspensionIsAllowable();
     CHECK_EQ(kRunnable, self->GetState());
+    CHECK_STREQ(MethodHelper(this).GetShorty(), shorty);
   }
 
   // Push a transition back into managed code onto the linked list in thread.
@@ -274,9 +276,9 @@
                                                   : GetEntryPointFromPortableCompiledCode());
       }
       if (!IsPortableCompiled()) {
-        (*art_quick_invoke_stub)(this, args, args_size, self, result, result_type);
+        (*art_quick_invoke_stub)(this, args, args_size, self, result, shorty);
       } else {
-        (*art_portable_invoke_stub)(this, args, args_size, self, result, result_type);
+        (*art_portable_invoke_stub)(this, args, args_size, self, result, shorty[0]);
       }
       if (UNLIKELY(reinterpret_cast<intptr_t>(self->GetException(NULL)) == -1)) {
         // Unusual case where we were running LLVM generated code and an
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index bfa7cbe..e678503 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -23,7 +23,7 @@
 #include "locks.h"
 #include "modifiers.h"
 #include "object.h"
-#include "root_visitor.h"
+#include "object_callbacks.h"
 
 namespace art {
 
@@ -207,8 +207,8 @@
   // Find the method that this method overrides
   ArtMethod* FindOverriddenMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result, char result_type)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result,
+              const char* shorty) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   EntryPointFromInterpreter* GetEntryPointFromInterpreter() {
     return GetFieldPtr<EntryPointFromInterpreter*>(
@@ -404,7 +404,7 @@
 
   static void ResetClass();
 
-  static void VisitRoots(RootVisitor* visitor, void* arg)
+  static void VisitRoots(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  protected:
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 8051c9b..99a35e3 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -50,9 +50,10 @@
   java_lang_Class_ = NULL;
 }
 
-void Class::VisitRoots(RootVisitor* visitor, void* arg) {
+void Class::VisitRoots(RootCallback* callback, void* arg) {
   if (java_lang_Class_ != nullptr) {
-    java_lang_Class_ = down_cast<Class*>(visitor(java_lang_Class_, arg));
+    java_lang_Class_ = down_cast<Class*>(
+        callback(java_lang_Class_, arg, 0, kRootStickyClass));
   }
 }
 
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index cbec476..82c8264 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -795,7 +795,7 @@
   // Can't call this SetClass or else gets called instead of Object::SetClass in places.
   static void SetClassClass(Class* java_lang_Class);
   static void ResetClass();
-  static void VisitRoots(RootVisitor* visitor, void* arg)
+  static void VisitRoots(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // When class is verified, set the kAccPreverified flag on each method.
diff --git a/runtime/mirror/stack_trace_element.cc b/runtime/mirror/stack_trace_element.cc
index a7ebe07..2e33198 100644
--- a/runtime/mirror/stack_trace_element.cc
+++ b/runtime/mirror/stack_trace_element.cc
@@ -58,9 +58,10 @@
   return trace;
 }
 
-void StackTraceElement::VisitRoots(RootVisitor* visitor, void* arg) {
+void StackTraceElement::VisitRoots(RootCallback* callback, void* arg) {
   if (java_lang_StackTraceElement_ != nullptr) {
-    java_lang_StackTraceElement_ = down_cast<Class*>(visitor(java_lang_StackTraceElement_, arg));
+    java_lang_StackTraceElement_ = down_cast<Class*>(
+        callback(java_lang_StackTraceElement_, arg, 0, kRootStickyClass));
   }
 }
 
diff --git a/runtime/mirror/stack_trace_element.h b/runtime/mirror/stack_trace_element.h
index 73d2673..51817f6 100644
--- a/runtime/mirror/stack_trace_element.h
+++ b/runtime/mirror/stack_trace_element.h
@@ -57,7 +57,7 @@
 
   static void SetClass(Class* java_lang_StackTraceElement);
   static void ResetClass();
-  static void VisitRoots(RootVisitor* visitor, void* arg)
+  static void VisitRoots(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 10ae066..6f4ead9 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -280,9 +280,9 @@
   return countDiff;
 }
 
-void String::VisitRoots(RootVisitor* visitor, void* arg) {
+void String::VisitRoots(RootCallback* callback, void* arg) {
   if (java_lang_String_ != nullptr) {
-    java_lang_String_ = down_cast<Class*>(visitor(java_lang_String_, arg));
+    java_lang_String_ = down_cast<Class*>(callback(java_lang_String_, arg, 0, kRootStickyClass));
   }
 }
 
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 406c5a3..57ec314 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -19,7 +19,7 @@
 
 #include "class.h"
 #include "gtest/gtest.h"
-#include "root_visitor.h"
+#include "object_callbacks.h"
 
 namespace art {
 
@@ -107,7 +107,7 @@
 
   static void SetClass(Class* java_lang_String);
   static void ResetClass();
-  static void VisitRoots(RootVisitor* visitor, void* arg)
+  static void VisitRoots(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc
index 2318b74..a57bd43 100644
--- a/runtime/mirror/throwable.cc
+++ b/runtime/mirror/throwable.cc
@@ -93,9 +93,10 @@
   java_lang_Throwable_ = NULL;
 }
 
-void Throwable::VisitRoots(RootVisitor* visitor, void* arg) {
+void Throwable::VisitRoots(RootCallback* callback, void* arg) {
   if (java_lang_Throwable_ != nullptr) {
-    java_lang_Throwable_ = down_cast<Class*>(visitor(java_lang_Throwable_, arg));
+    java_lang_Throwable_ = down_cast<Class*>(callback(java_lang_Throwable_, arg, 0,
+                                                      kRootStickyClass));
   }
 }
 
diff --git a/runtime/mirror/throwable.h b/runtime/mirror/throwable.h
index bc9848a..de71957 100644
--- a/runtime/mirror/throwable.h
+++ b/runtime/mirror/throwable.h
@@ -18,7 +18,7 @@
 #define ART_RUNTIME_MIRROR_THROWABLE_H_
 
 #include "object.h"
-#include "root_visitor.h"
+#include "object_callbacks.h"
 #include "string.h"
 
 namespace art {
@@ -51,7 +51,7 @@
 
   static void SetClass(Class* java_lang_Throwable);
   static void ResetClass();
-  static void VisitRoots(RootVisitor* visitor, void* arg)
+  static void VisitRoots(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 72220e0..85f3a09 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -1067,13 +1067,13 @@
   list_.push_front(m);
 }
 
-void MonitorList::SweepMonitorList(RootVisitor visitor, void* arg) {
+void MonitorList::SweepMonitorList(IsMarkedCallback* callback, void* arg) {
   MutexLock mu(Thread::Current(), monitor_list_lock_);
   for (auto it = list_.begin(); it != list_.end(); ) {
     Monitor* m = *it;
     mirror::Object* obj = m->GetObject();
     // The object of a monitor can be null if we have deflated it.
-    mirror::Object* new_obj = obj != nullptr ? visitor(obj, arg) : nullptr;
+    mirror::Object* new_obj = obj != nullptr ? callback(obj, arg) : nullptr;
     if (new_obj == nullptr) {
       VLOG(monitor) << "freeing monitor " << m << " belonging to unmarked object "
                     << m->GetObject();
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 85a8c48..ca95e0b 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -26,7 +26,7 @@
 
 #include "atomic.h"
 #include "base/mutex.h"
-#include "root_visitor.h"
+#include "object_callbacks.h"
 #include "sirt_ref.h"
 #include "thread_state.h"
 
@@ -220,7 +220,8 @@
 
   void Add(Monitor* m);
 
-  void SweepMonitorList(RootVisitor visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SweepMonitorList(IsMarkedCallback* callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void DisallowNewMonitors();
   void AllowNewMonitors();
 
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index d9baaaf..6482917 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -234,9 +234,8 @@
                                            jboolean countAssignable) {
   ScopedObjectAccess soa(env);
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  // We only want reachable instances, so do a GC. This also ensures that the alloc stack
-  // is empty, so the live bitmap is the only place we need to look. Need to do GC before decoding
-  // any jobjects.
+  // We only want reachable instances, so do a GC. Heap::VisitObjects visits all of the heap
+  // objects in the all spaces and the allocation stack.
   heap->CollectGarbage(false);
   mirror::Class* c = soa.Decode<mirror::Class*>(javaClass);
   if (c == nullptr) {
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index e1b5f97..5267069 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -181,11 +181,12 @@
 
 typedef std::map<std::string, mirror::String*> StringTable;
 
-static mirror::Object* PreloadDexCachesStringsVisitor(mirror::Object* root, void* arg)
+static mirror::Object* PreloadDexCachesStringsCallback(mirror::Object* root, void* arg,
+                                                       uint32_t /*thread_id*/,
+                                                       RootType /*root_type*/)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   StringTable& table = *reinterpret_cast<StringTable*>(arg);
   mirror::String* string = const_cast<mirror::Object*>(root)->AsString();
-  // LOG(INFO) << "VMRuntime.preloadDexCaches interned=" << string->ToModifiedUtf8();
   table[string->ToModifiedUtf8()] = string;
   return root;
 }
@@ -404,7 +405,7 @@
   // We use a std::map to avoid heap allocating StringObjects to lookup in gDvm.literalStrings
   StringTable strings;
   if (kPreloadDexCachesStrings) {
-    runtime->GetInternTable()->VisitRoots(PreloadDexCachesStringsVisitor, &strings, false, false);
+    runtime->GetInternTable()->VisitRoots(PreloadDexCachesStringsCallback, &strings, false, false);
   }
 
   const std::vector<const DexFile*>& boot_class_path = linker->GetBootClassPath();
diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc
index 6c22003..6727862 100644
--- a/runtime/native/sun_misc_Unsafe.cc
+++ b/runtime/native/sun_misc_Unsafe.cc
@@ -16,6 +16,7 @@
 
 #include "gc/accounting/card_table-inl.h"
 #include "jni_internal.h"
+#include "mirror/array.h"
 #include "mirror/object.h"
 #include "mirror/object-inl.h"
 #include "scoped_fast_native_object_access.h"
@@ -153,6 +154,20 @@
   obj->SetFieldObject(MemberOffset(offset), newValue, false);
 }
 
+static jint Unsafe_getArrayBaseOffsetForComponentType(JNIEnv* env, jclass, jobject component_class) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::Class* component = soa.Decode<mirror::Class*>(component_class);
+  Primitive::Type primitive_type = component->GetPrimitiveType();
+  return mirror::Array::DataOffset(Primitive::ComponentSize(primitive_type)).Int32Value();
+}
+
+static jint Unsafe_getArrayIndexScaleForComponentType(JNIEnv* env, jclass, jobject component_class) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::Class* component = soa.Decode<mirror::Class*>(component_class);
+  Primitive::Type primitive_type = component->GetPrimitiveType();
+  return Primitive::ComponentSize(primitive_type);
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(Unsafe, compareAndSwapInt, "!(Ljava/lang/Object;JII)Z"),
   NATIVE_METHOD(Unsafe, compareAndSwapLong, "!(Ljava/lang/Object;JJJ)Z"),
@@ -172,6 +187,8 @@
   NATIVE_METHOD(Unsafe, getObject, "!(Ljava/lang/Object;J)Ljava/lang/Object;"),
   NATIVE_METHOD(Unsafe, putObject, "!(Ljava/lang/Object;JLjava/lang/Object;)V"),
   NATIVE_METHOD(Unsafe, putOrderedObject, "!(Ljava/lang/Object;JLjava/lang/Object;)V"),
+  NATIVE_METHOD(Unsafe, getArrayBaseOffsetForComponentType, "!(Ljava/lang/Class;)I"),
+  NATIVE_METHOD(Unsafe, getArrayIndexScaleForComponentType, "!(Ljava/lang/Class;)I"),
 };
 
 void register_sun_misc_Unsafe(JNIEnv* env) {
diff --git a/runtime/object_callbacks.h b/runtime/object_callbacks.h
new file mode 100644
index 0000000..8e3c529
--- /dev/null
+++ b/runtime/object_callbacks.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_OBJECT_CALLBACKS_H_
+#define ART_RUNTIME_OBJECT_CALLBACKS_H_
+
+// For uint32_t.
+#include <stdint.h>
+// For size_t.
+#include <stdlib.h>
+
+namespace art {
+namespace mirror {
+class Object;
+}  // namespace mirror
+class StackVisitor;
+
+enum RootType {
+  kRootUnknown = 0,
+  kRootJNIGlobal,
+  kRootJNILocal,
+  kRootJavaFrame,
+  kRootNativeStack,
+  kRootStickyClass,
+  kRootThreadBlock,
+  kRootMonitorUsed,
+  kRootThreadObject,
+  kRootInternedString,
+  kRootDebugger,
+  kRootVMInternal,
+  kRootJNIMonitor,
+};
+
+// Returns the new address of the object, returns root if it has not moved. tid and root_type are
+// only used by hprof.
+typedef mirror::Object* (RootCallback)(mirror::Object* root, void* arg, uint32_t thread_id,
+    RootType root_type) __attribute__((warn_unused_result));
+// A callback for visiting an object in the heap.
+typedef void (ObjectCallback)(mirror::Object* obj, void* arg);
+// A callback used for marking an object, returns the new address of the object if the object moved.
+typedef mirror::Object* (MarkObjectCallback)(mirror::Object* obj, void* arg)
+    __attribute__((warn_unused_result));
+// A callback for verifying roots.
+typedef void (VerifyRootCallback)(const mirror::Object* root, void* arg, size_t vreg,
+    const StackVisitor* visitor);
+// A callback for testing if an object is marked, returns nullptr if not marked, otherwise the new
+// address the object (if the object didn't move, returns the object input parameter).
+typedef mirror::Object* (IsMarkedCallback)(mirror::Object* object, void* arg)
+    __attribute__((warn_unused_result));
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_OBJECT_CALLBACKS_H_
diff --git a/runtime/reference_table.cc b/runtime/reference_table.cc
index b5ef735..a9b17e0 100644
--- a/runtime/reference_table.cc
+++ b/runtime/reference_table.cc
@@ -231,9 +231,10 @@
   DumpSummaryLine(os, sorted_entries.back(), GetElementCount(sorted_entries.back()), identical, equiv);
 }
 
-void ReferenceTable::VisitRoots(RootVisitor* visitor, void* arg) {
+void ReferenceTable::VisitRoots(RootCallback* visitor, void* arg, uint32_t tid,
+                                RootType root_type) {
   for (auto& ref : entries_) {
-    ref = visitor(ref, arg);
+    ref = visitor(ref, arg, tid, root_type);
   }
 }
 
diff --git a/runtime/reference_table.h b/runtime/reference_table.h
index 37b3172..c9f5bc5 100644
--- a/runtime/reference_table.h
+++ b/runtime/reference_table.h
@@ -22,8 +22,8 @@
 #include <string>
 #include <vector>
 
+#include "object_callbacks.h"
 #include "locks.h"
-#include "root_visitor.h"
 
 namespace art {
 namespace mirror {
@@ -47,7 +47,7 @@
 
   void Dump(std::ostream& os) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitRoots(RootVisitor* visitor, void* arg);
+  void VisitRoots(RootCallback* visitor, void* arg, uint32_t tid, RootType root_type);
 
  private:
   typedef std::vector<mirror::Object*> Table;
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index ac8f5ef..0bfa70f 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -220,36 +220,46 @@
   }
 
   jmethodID m = NULL;
+  const char* shorty;
   switch (src_class) {
   case Primitive::kPrimBoolean:
     m = WellKnownClasses::java_lang_Boolean_valueOf;
+    shorty = "LZ";
     break;
   case Primitive::kPrimByte:
     m = WellKnownClasses::java_lang_Byte_valueOf;
+    shorty = "LB";
     break;
   case Primitive::kPrimChar:
     m = WellKnownClasses::java_lang_Character_valueOf;
+    shorty = "LC";
     break;
   case Primitive::kPrimDouble:
     m = WellKnownClasses::java_lang_Double_valueOf;
+    shorty = "LD";
     break;
   case Primitive::kPrimFloat:
     m = WellKnownClasses::java_lang_Float_valueOf;
+    shorty = "LF";
     break;
   case Primitive::kPrimInt:
     m = WellKnownClasses::java_lang_Integer_valueOf;
+    shorty = "LI";
     break;
   case Primitive::kPrimLong:
     m = WellKnownClasses::java_lang_Long_valueOf;
+    shorty = "LJ";
     break;
   case Primitive::kPrimShort:
     m = WellKnownClasses::java_lang_Short_valueOf;
+    shorty = "LS";
     break;
   case Primitive::kPrimVoid:
     // There's no such thing as a void field, and void methods invoked via reflection return null.
-    return NULL;
+    return nullptr;
   default:
     LOG(FATAL) << static_cast<int>(src_class);
+    shorty = nullptr;
   }
 
   ScopedObjectAccessUnchecked soa(Thread::Current());
@@ -257,7 +267,7 @@
     CHECK_EQ(soa.Self()->GetState(), kRunnable);
   }
 
-  ArgArray arg_array(NULL, 0);
+  ArgArray arg_array(nullptr, 0);
   JValue result;
   if (src_class == Primitive::kPrimDouble || src_class == Primitive::kPrimLong) {
     arg_array.AppendWide(value.GetJ());
@@ -266,7 +276,7 @@
   }
 
   soa.DecodeMethod(m)->Invoke(soa.Self(), arg_array.GetArray(), arg_array.GetNumBytes(),
-                              &result, 'L');
+                              &result, shorty);
   return result.GetL();
 }
 
diff --git a/runtime/root_visitor.h b/runtime/root_visitor.h
deleted file mode 100644
index 78c30ff..0000000
--- a/runtime/root_visitor.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_ROOT_VISITOR_H_
-#define ART_RUNTIME_ROOT_VISITOR_H_
-
-// For size_t.
-#include <stdlib.h>
-
-namespace art {
-namespace mirror {
-class Object;
-}  // namespace mirror
-class StackVisitor;
-
-// Returns the new address of the object, returns root if it has not moved.
-typedef mirror::Object* (RootVisitor)(mirror::Object* root, void* arg)
-    __attribute__((warn_unused_result));
-typedef void (VerifyRootVisitor)(const mirror::Object* root, void* arg, size_t vreg,
-                                 const StackVisitor* visitor);
-typedef bool (IsMarkedTester)(const mirror::Object* object, void* arg);
-typedef void (ObjectVisitorCallback)(mirror::Object* obj, void* arg);
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_ROOT_VISITOR_H_
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 09d05d1..d8f9ca3 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -375,7 +375,7 @@
   return value;
 }
 
-void Runtime::SweepSystemWeaks(RootVisitor* visitor, void* arg) {
+void Runtime::SweepSystemWeaks(IsMarkedCallback* visitor, void* arg) {
   GetInternTable()->SweepInternTableWeaks(visitor, arg);
   GetMonitorList()->SweepMonitorList(visitor, arg);
   GetJavaVM()->SweepJniWeakGlobals(visitor, arg);
@@ -765,6 +765,12 @@
     parsed->image_ += GetAndroidRoot();
     parsed->image_ += "/framework/boot.art";
   }
+  if (!kIsTargetBuild && parsed->host_prefix_.empty()) {
+    const char* build_top = getenv("ANDROID_BUILD_TOP");
+    if (build_top != NULL) {
+      parsed->host_prefix_ = build_top;
+    }
+  }
   if (parsed->heap_growth_limit_ == 0) {
     parsed->heap_growth_limit_ = parsed->heap_maximum_size_;
   }
@@ -807,7 +813,7 @@
 
   JValue result;
   ArgArray arg_array(nullptr, 0);
-  InvokeWithArgArray(soa, getSystemClassLoader, &arg_array, &result, 'L');
+  InvokeWithArgArray(soa, getSystemClassLoader, &arg_array, &result, "L");
   SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
                                             down_cast<mirror::ClassLoader*>(result.GetL()));
   CHECK(class_loader.get() != nullptr);
@@ -834,7 +840,7 @@
 bool Runtime::Start() {
   VLOG(startup) << "Runtime::Start entering";
 
-  CHECK(host_prefix_.empty()) << host_prefix_;
+  CHECK(!kIsTargetBuild || host_prefix_.empty()) << host_prefix_;
 
   // Restore main thread state to kNative as expected by native code.
   Thread* self = Thread::Current();
@@ -1301,66 +1307,69 @@
   return pre_allocated_OutOfMemoryError_;
 }
 
-void Runtime::VisitConcurrentRoots(RootVisitor* visitor, void* arg, bool only_dirty,
+void Runtime::VisitConcurrentRoots(RootCallback* callback, void* arg, bool only_dirty,
                                    bool clean_dirty) {
-  intern_table_->VisitRoots(visitor, arg, only_dirty, clean_dirty);
-  class_linker_->VisitRoots(visitor, arg, only_dirty, clean_dirty);
+  intern_table_->VisitRoots(callback, arg, only_dirty, clean_dirty);
+  class_linker_->VisitRoots(callback, arg, only_dirty, clean_dirty);
 }
 
-void Runtime::VisitNonThreadRoots(RootVisitor* visitor, void* arg) {
+void Runtime::VisitNonThreadRoots(RootCallback* callback, void* arg) {
   // Visit the classes held as static in mirror classes.
-  mirror::ArtField::VisitRoots(visitor, arg);
-  mirror::ArtMethod::VisitRoots(visitor, arg);
-  mirror::Class::VisitRoots(visitor, arg);
-  mirror::StackTraceElement::VisitRoots(visitor, arg);
-  mirror::String::VisitRoots(visitor, arg);
-  mirror::Throwable::VisitRoots(visitor, arg);
+  mirror::ArtField::VisitRoots(callback, arg);
+  mirror::ArtMethod::VisitRoots(callback, arg);
+  mirror::Class::VisitRoots(callback, arg);
+  mirror::StackTraceElement::VisitRoots(callback, arg);
+  mirror::String::VisitRoots(callback, arg);
+  mirror::Throwable::VisitRoots(callback, arg);
   // Visit all the primitive array types classes.
-  mirror::PrimitiveArray<uint8_t>::VisitRoots(visitor, arg);   // BooleanArray
-  mirror::PrimitiveArray<int8_t>::VisitRoots(visitor, arg);    // ByteArray
-  mirror::PrimitiveArray<uint16_t>::VisitRoots(visitor, arg);  // CharArray
-  mirror::PrimitiveArray<double>::VisitRoots(visitor, arg);    // DoubleArray
-  mirror::PrimitiveArray<float>::VisitRoots(visitor, arg);     // FloatArray
-  mirror::PrimitiveArray<int32_t>::VisitRoots(visitor, arg);   // IntArray
-  mirror::PrimitiveArray<int64_t>::VisitRoots(visitor, arg);   // LongArray
-  mirror::PrimitiveArray<int16_t>::VisitRoots(visitor, arg);   // ShortArray
-  java_vm_->VisitRoots(visitor, arg);
+  mirror::PrimitiveArray<uint8_t>::VisitRoots(callback, arg);   // BooleanArray
+  mirror::PrimitiveArray<int8_t>::VisitRoots(callback, arg);    // ByteArray
+  mirror::PrimitiveArray<uint16_t>::VisitRoots(callback, arg);  // CharArray
+  mirror::PrimitiveArray<double>::VisitRoots(callback, arg);    // DoubleArray
+  mirror::PrimitiveArray<float>::VisitRoots(callback, arg);     // FloatArray
+  mirror::PrimitiveArray<int32_t>::VisitRoots(callback, arg);   // IntArray
+  mirror::PrimitiveArray<int64_t>::VisitRoots(callback, arg);   // LongArray
+  mirror::PrimitiveArray<int16_t>::VisitRoots(callback, arg);   // ShortArray
+  java_vm_->VisitRoots(callback, arg);
   if (pre_allocated_OutOfMemoryError_ != nullptr) {
     pre_allocated_OutOfMemoryError_ = down_cast<mirror::Throwable*>(
-        visitor(pre_allocated_OutOfMemoryError_, arg));
+        callback(pre_allocated_OutOfMemoryError_, arg, 0, kRootVMInternal));
     DCHECK(pre_allocated_OutOfMemoryError_ != nullptr);
   }
-  resolution_method_ = down_cast<mirror::ArtMethod*>(visitor(resolution_method_, arg));
+  resolution_method_ = down_cast<mirror::ArtMethod*>(callback(resolution_method_, arg, 0,
+                                                              kRootVMInternal));
   DCHECK(resolution_method_ != nullptr);
   if (HasImtConflictMethod()) {
-    imt_conflict_method_ = down_cast<mirror::ArtMethod*>(visitor(imt_conflict_method_, arg));
+    imt_conflict_method_ = down_cast<mirror::ArtMethod*>(callback(imt_conflict_method_, arg, 0,
+                                                                  kRootVMInternal));
   }
   if (HasDefaultImt()) {
-    default_imt_ = down_cast<mirror::ObjectArray<mirror::ArtMethod>*>(visitor(default_imt_, arg));
+    default_imt_ = down_cast<mirror::ObjectArray<mirror::ArtMethod>*>(callback(default_imt_, arg,
+                                                                               0, kRootVMInternal));
   }
 
   for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
     if (callee_save_methods_[i] != nullptr) {
       callee_save_methods_[i] = down_cast<mirror::ArtMethod*>(
-          visitor(callee_save_methods_[i], arg));
+          callback(callee_save_methods_[i], arg, 0, kRootVMInternal));
     }
   }
   {
     MutexLock mu(Thread::Current(), method_verifiers_lock_);
     for (verifier::MethodVerifier* verifier : method_verifiers_) {
-      verifier->VisitRoots(visitor, arg);
+      verifier->VisitRoots(callback, arg);
     }
   }
 }
 
-void Runtime::VisitNonConcurrentRoots(RootVisitor* visitor, void* arg) {
-  thread_list_->VisitRoots(visitor, arg);
-  VisitNonThreadRoots(visitor, arg);
+void Runtime::VisitNonConcurrentRoots(RootCallback* callback, void* arg) {
+  thread_list_->VisitRoots(callback, arg);
+  VisitNonThreadRoots(callback, arg);
 }
 
-void Runtime::VisitRoots(RootVisitor* visitor, void* arg, bool only_dirty, bool clean_dirty) {
-  VisitConcurrentRoots(visitor, arg, only_dirty, clean_dirty);
-  VisitNonConcurrentRoots(visitor, arg);
+void Runtime::VisitRoots(RootCallback* callback, void* arg, bool only_dirty, bool clean_dirty) {
+  VisitConcurrentRoots(callback, arg, only_dirty, clean_dirty);
+  VisitNonConcurrentRoots(callback, arg);
 }
 
 mirror::ObjectArray<mirror::ArtMethod>* Runtime::CreateDefaultImt(ClassLinker* cl) {
@@ -1473,12 +1482,11 @@
     method->SetFpSpillMask(0);
   } else if (instruction_set == kX86_64) {
     uint32_t ref_spills =
-        (1 << art::x86_64::RBP) | (1 << art::x86_64::RSI) | (1 << art::x86_64::RDI) |
-        (1 << art::x86_64::R8)  | (1 << art::x86_64::R9)  | (1 << art::x86_64::R10) |
-        (1 << art::x86_64::R11) | (1 << art::x86_64::R12) | (1 << art::x86_64::R13) |
-        (1 << art::x86_64::R14) | (1 << art::x86_64::R15);
+        (1 << art::x86_64::RBX) | (1 << art::x86_64::RBP) | (1 << art::x86_64::R12) |
+        (1 << art::x86_64::R13) | (1 << art::x86_64::R14) | (1 << art::x86_64::R15);
     uint32_t arg_spills =
-        (1 << art::x86_64::RCX) | (1 << art::x86_64::RDX) | (1 << art::x86_64::RBX);
+        (1 << art::x86_64::RSI) | (1 << art::x86_64::RDX) | (1 << art::x86_64::RCX) |
+        (1 << art::x86_64::R8) | (1 << art::x86_64::R9);
     uint32_t core_spills = ref_spills | (type == kRefsAndArgs ? arg_spills : 0) |
                          (1 << art::x86::kNumberOfCpuRegisters);  // fake return address callee save
     size_t frame_size = RoundUp((__builtin_popcount(core_spills) /* gprs */ +
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 896a18b..07f3d7d 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -34,7 +34,7 @@
 #include "instrumentation.h"
 #include "jobject_comparator.h"
 #include "locks.h"
-#include "root_visitor.h"
+#include "object_callbacks.h"
 #include "runtime_stats.h"
 #include "safe_map.h"
 
@@ -341,24 +341,24 @@
 
   // Visit all the roots. If only_dirty is true then non-dirty roots won't be visited. If
   // clean_dirty is true then dirty roots will be marked as non-dirty after visiting.
-  void VisitRoots(RootVisitor* visitor, void* arg, bool only_dirty, bool clean_dirty)
+  void VisitRoots(RootCallback* visitor, void* arg, bool only_dirty, bool clean_dirty)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Visit all of the roots we can do safely do concurrently.
-  void VisitConcurrentRoots(RootVisitor* visitor, void* arg, bool only_dirty, bool clean_dirty)
+  void VisitConcurrentRoots(RootCallback* visitor, void* arg, bool only_dirty, bool clean_dirty)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Visit all of the non thread roots, we can do this with mutators unpaused.
-  void VisitNonThreadRoots(RootVisitor* visitor, void* arg)
+  void VisitNonThreadRoots(RootCallback* visitor, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Visit all other roots which must be done with mutators suspended.
-  void VisitNonConcurrentRoots(RootVisitor* visitor, void* arg)
+  void VisitNonConcurrentRoots(RootCallback* visitor, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Sweep system weaks, the system weak is deleted if the visitor return nullptr. Otherwise, the
   // system weak is updated to be the visitor's returned value.
-  void SweepSystemWeaks(RootVisitor* visitor, void* arg)
+  void SweepSystemWeaks(IsMarkedCallback* visitor, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns a special method that calls into a trampoline for runtime method resolution
diff --git a/runtime/thread.cc b/runtime/thread.cc
index c649765..6c3e7ee 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -179,7 +179,7 @@
     JValue result;
     ArgArray arg_array(nullptr, 0);
     arg_array.Append(receiver);
-    m->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
+    m->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), &result, "V");
   }
   // Detach and delete self.
   Runtime::Current()->GetThreadList()->Unregister(self);
@@ -992,7 +992,8 @@
   }
 }
 
-static mirror::Object* MonitorExitVisitor(mirror::Object* object, void* arg)
+static mirror::Object* MonitorExitVisitor(mirror::Object* object, void* arg, uint32_t /*thread_id*/,
+                                          RootType /*root_type*/)
     NO_THREAD_SAFETY_ANALYSIS {
   Thread* self = reinterpret_cast<Thread*>(arg);
   mirror::Object* entered_monitor = object;
@@ -1034,7 +1035,7 @@
 
   // On thread detach, all monitors entered with JNI MonitorEnter are automatically exited.
   if (jni_env_ != nullptr) {
-    jni_env_->monitors.VisitRoots(MonitorExitVisitor, self);
+    jni_env_->monitors.VisitRoots(MonitorExitVisitor, self, 0, kRootVMInternal);
   }
 }
 
@@ -1144,16 +1145,17 @@
   return managed_stack_.ShadowFramesContain(sirt_entry);
 }
 
-void Thread::SirtVisitRoots(RootVisitor* visitor, void* arg) {
+void Thread::SirtVisitRoots(RootCallback* visitor, void* arg) {
+  uint32_t tid = GetTid();
   for (StackIndirectReferenceTable* cur = top_sirt_; cur; cur = cur->GetLink()) {
     size_t num_refs = cur->NumberOfReferences();
     for (size_t j = 0; j < num_refs; ++j) {
       mirror::Object* object = cur->GetReference(j);
       if (object != nullptr) {
-        const mirror::Object* new_obj = visitor(object, arg);
+        mirror::Object* new_obj = visitor(object, arg, tid, kRootNativeStack);
         DCHECK(new_obj != nullptr);
         if (new_obj != object) {
-          cur->SetReference(j, const_cast<mirror::Object*>(new_obj));
+          cur->SetReference(j, new_obj);
         }
       }
     }
@@ -1530,6 +1532,7 @@
 
   // Choose an appropriate constructor and set up the arguments.
   const char* signature;
+  const char* shorty;
   SirtRef<mirror::String> msg_string(this, nullptr);
   if (msg != nullptr) {
     // Ensure we remember this and the method over the String allocation.
@@ -1539,14 +1542,18 @@
       return;
     }
     if (cause.get() == nullptr) {
+      shorty = "VL";
       signature = "(Ljava/lang/String;)V";
     } else {
+      shorty = "VLL";
       signature = "(Ljava/lang/String;Ljava/lang/Throwable;)V";
     }
   } else {
     if (cause.get() == nullptr) {
+      shorty = "V";
       signature = "()V";
     } else {
+      shorty = "VL";
       signature = "(Ljava/lang/Throwable;)V";
     }
   }
@@ -1570,7 +1577,7 @@
                                          throw_location.GetDexPc());
     SetException(gc_safe_throw_location, exception.get());
   } else {
-    ArgArray args("VLL", 3);
+    ArgArray args(shorty, strlen(shorty));
     args.Append(exception.get());
     if (msg != nullptr) {
       args.Append(msg_string.get());
@@ -1579,7 +1586,7 @@
       args.Append(cause.get());
     }
     JValue result;
-    exception_init_method->Invoke(this, args.GetArray(), args.GetNumBytes(), &result, 'V');
+    exception_init_method->Invoke(this, args.GetArray(), args.GetNumBytes(), &result, shorty);
     if (LIKELY(!IsExceptionPending())) {
       ThrowLocation gc_safe_throw_location(saved_throw_this.get(), saved_throw_method.get(),
                                            throw_location.GetDexPc());
@@ -1954,31 +1961,17 @@
 
 class RootCallbackVisitor {
  public:
-  RootCallbackVisitor(RootVisitor* visitor, void* arg) : visitor_(visitor), arg_(arg) {}
+  RootCallbackVisitor(RootCallback* callback, void* arg, uint32_t tid)
+     : callback_(callback), arg_(arg), tid_(tid) {}
 
   mirror::Object* operator()(mirror::Object* obj, size_t, const StackVisitor*) const {
-    return visitor_(obj, arg_);
+    return callback_(obj, arg_, tid_, kRootJavaFrame);
   }
 
  private:
-  RootVisitor* visitor_;
-  void* arg_;
-};
-
-class VerifyCallbackVisitor {
- public:
-  VerifyCallbackVisitor(VerifyRootVisitor* visitor, void* arg)
-      : visitor_(visitor),
-        arg_(arg) {
-  }
-
-  void operator()(const mirror::Object* obj, size_t vreg, const StackVisitor* visitor) const {
-    visitor_(obj, arg_, vreg, visitor);
-  }
-
- private:
-  VerifyRootVisitor* const visitor_;
+  RootCallback* const callback_;
   void* const arg_;
+  const uint32_t tid_;
 };
 
 void Thread::SetClassLoaderOverride(mirror::ClassLoader* class_loader_override) {
@@ -1988,39 +1981,42 @@
   class_loader_override_ = class_loader_override;
 }
 
-void Thread::VisitRoots(RootVisitor* visitor, void* arg) {
+void Thread::VisitRoots(RootCallback* visitor, void* arg) {
+  uint32_t thread_id = GetThreadId();
   if (opeer_ != nullptr) {
-    opeer_ = visitor(opeer_, arg);
+    opeer_ = visitor(opeer_, arg, thread_id, kRootThreadObject);
   }
   if (exception_ != nullptr) {
-    exception_ = down_cast<mirror::Throwable*>(visitor(exception_, arg));
+    exception_ = down_cast<mirror::Throwable*>(visitor(exception_, arg, thread_id,
+                                                       kRootNativeStack));
   }
   throw_location_.VisitRoots(visitor, arg);
   if (class_loader_override_ != nullptr) {
-    class_loader_override_ = down_cast<mirror::ClassLoader*>(visitor(class_loader_override_, arg));
+    class_loader_override_ =
+        down_cast<mirror::ClassLoader*>(visitor(class_loader_override_, arg, thread_id,
+                                                kRootNativeStack));
   }
-  jni_env_->locals.VisitRoots(visitor, arg);
-  jni_env_->monitors.VisitRoots(visitor, arg);
-
+  jni_env_->locals.VisitRoots(visitor, arg, thread_id, kRootJNILocal);
+  jni_env_->monitors.VisitRoots(visitor, arg, thread_id, kRootJNIMonitor);
   SirtVisitRoots(visitor, arg);
-
   // Visit roots on this thread's stack
   Context* context = GetLongJumpContext();
-  RootCallbackVisitor visitorToCallback(visitor, arg);
+  RootCallbackVisitor visitorToCallback(visitor, arg, thread_id);
   ReferenceMapVisitor<RootCallbackVisitor> mapper(this, context, visitorToCallback);
   mapper.WalkStack();
   ReleaseLongJumpContext(context);
-
   for (instrumentation::InstrumentationStackFrame& frame : *GetInstrumentationStack()) {
     if (frame.this_object_ != nullptr) {
-      frame.this_object_ = visitor(frame.this_object_, arg);
+      frame.this_object_ = visitor(frame.this_object_, arg, thread_id, kRootJavaFrame);
     }
     DCHECK(frame.method_ != nullptr);
-    frame.method_ = down_cast<mirror::ArtMethod*>(visitor(frame.method_, arg));
+    frame.method_ = down_cast<mirror::ArtMethod*>(visitor(frame.method_, arg, thread_id,
+                                                          kRootJavaFrame));
   }
 }
 
-static mirror::Object* VerifyRoot(mirror::Object* root, void* arg) {
+static mirror::Object* VerifyRoot(mirror::Object* root, void* arg, uint32_t /*thread_id*/,
+                                  RootType /*root_type*/) {
   DCHECK(root != nullptr);
   DCHECK(arg != nullptr);
   reinterpret_cast<gc::Heap*>(arg)->VerifyObject(root);
@@ -2029,7 +2025,7 @@
 
 void Thread::VerifyStackImpl() {
   UniquePtr<Context> context(Context::Create());
-  RootCallbackVisitor visitorToCallback(VerifyRoot, Runtime::Current()->GetHeap());
+  RootCallbackVisitor visitorToCallback(VerifyRoot, Runtime::Current()->GetHeap(), GetTid());
   ReferenceMapVisitor<RootCallbackVisitor> mapper(this, context.get(), visitorToCallback);
   mapper.WalkStack();
 }
diff --git a/runtime/thread.h b/runtime/thread.h
index b7f8bb0..daffc92 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -31,8 +31,8 @@
 #include "globals.h"
 #include "jvalue.h"
 #include "locks.h"
+#include "object_callbacks.h"
 #include "offsets.h"
-#include "root_visitor.h"
 #include "runtime_stats.h"
 #include "stack.h"
 #include "stack_indirect_reference_table.h"
@@ -389,7 +389,7 @@
   static jobjectArray InternalStackTraceToStackTraceElementArray(JNIEnv* env, jobject internal,
       jobjectArray output_array = NULL, int* stack_depth = NULL);
 
-  void VisitRoots(RootVisitor* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitRoots(RootCallback* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void VerifyStack() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -503,7 +503,8 @@
   // Is the given obj in this thread's stack indirect reference table?
   bool SirtContains(jobject obj) const;
 
-  void SirtVisitRoots(RootVisitor* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SirtVisitRoots(RootCallback* visitor, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void PushSirt(StackIndirectReferenceTable* sirt) {
     sirt->SetLink(top_sirt_);
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 8bf099b..25f692d 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -756,28 +756,30 @@
   }
 }
 
-void ThreadList::VisitRoots(RootVisitor* visitor, void* arg) const {
+void ThreadList::VisitRoots(RootCallback* callback, void* arg) const {
   MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
   for (const auto& thread : list_) {
-    thread->VisitRoots(visitor, arg);
+    thread->VisitRoots(callback, arg);
   }
 }
 
-struct VerifyRootWrapperArg {
-  VerifyRootVisitor* visitor;
-  void* arg;
+class VerifyRootWrapperArg {
+ public:
+  VerifyRootWrapperArg(VerifyRootCallback* callback, void* arg) : callback_(callback), arg_(arg) {
+  }
+  VerifyRootCallback* const callback_;
+  void* const arg_;
 };
 
-static mirror::Object* VerifyRootWrapperCallback(mirror::Object* root, void* arg) {
+static mirror::Object* VerifyRootWrapperCallback(mirror::Object* root, void* arg,
+                                                 uint32_t /*thread_id*/, RootType /*root_type*/) {
   VerifyRootWrapperArg* wrapperArg = reinterpret_cast<VerifyRootWrapperArg*>(arg);
-  wrapperArg->visitor(root, wrapperArg->arg, 0, NULL);
+  wrapperArg->callback_(root, wrapperArg->arg_, 0, NULL);
   return root;
 }
 
-void ThreadList::VerifyRoots(VerifyRootVisitor* visitor, void* arg) const {
-  VerifyRootWrapperArg wrapper;
-  wrapper.visitor = visitor;
-  wrapper.arg = arg;
+void ThreadList::VerifyRoots(VerifyRootCallback* callback, void* arg) const {
+  VerifyRootWrapperArg wrapper(callback, arg);
   MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
   for (const auto& thread : list_) {
     thread->VisitRoots(VerifyRootWrapperCallback, &wrapper);
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 45994ae..e98aed9 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -19,7 +19,7 @@
 
 #include "base/mutex.h"
 #include "jni.h"
-#include "root_visitor.h"
+#include "object_callbacks.h"
 
 #include <bitset>
 #include <list>
@@ -113,10 +113,10 @@
       LOCKS_EXCLUDED(Locks::mutator_lock_, Locks::thread_list_lock_);
   void Unregister(Thread* self) LOCKS_EXCLUDED(Locks::mutator_lock_, Locks::thread_list_lock_);
 
-  void VisitRoots(RootVisitor* visitor, void* arg) const
+  void VisitRoots(RootCallback* callback, void* arg) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VerifyRoots(VerifyRootVisitor* visitor, void* arg) const
+  void VerifyRoots(VerifyRootCallback* callback, void* arg) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Return a copy of the thread list.
diff --git a/runtime/throw_location.cc b/runtime/throw_location.cc
index 1cc3e74..2a1faff 100644
--- a/runtime/throw_location.cc
+++ b/runtime/throw_location.cc
@@ -33,13 +33,13 @@
   }
 }
 
-void ThrowLocation::VisitRoots(RootVisitor* visitor, void* arg) {
+void ThrowLocation::VisitRoots(RootCallback* visitor, void* arg) {
   if (this_object_ != nullptr) {
-    this_object_ = visitor(this_object_, arg);
+    this_object_ = visitor(this_object_, arg, 0, kRootVMInternal);
     DCHECK(this_object_ != nullptr);
   }
   if (method_ != nullptr) {
-    method_ = down_cast<mirror::ArtMethod*>(visitor(method_, arg));
+    method_ = down_cast<mirror::ArtMethod*>(visitor(method_, arg, 0, kRootVMInternal));
     DCHECK(method_ != nullptr);
   }
 }
diff --git a/runtime/throw_location.h b/runtime/throw_location.h
index 5da446e..f30aa4e 100644
--- a/runtime/throw_location.h
+++ b/runtime/throw_location.h
@@ -17,8 +17,8 @@
 #ifndef ART_RUNTIME_THROW_LOCATION_H_
 #define ART_RUNTIME_THROW_LOCATION_H_
 
+#include "object_callbacks.h"
 #include "base/macros.h"
-#include "root_visitor.h"
 
 #include <stdint.h>
 #include <string>
@@ -62,7 +62,7 @@
 
   std::string Dump() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitRoots(RootVisitor* visitor, void* arg);
+  void VisitRoots(RootCallback* visitor, void* arg);
 
  private:
   // The 'this' reference of the throwing method.
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 30be36c..ab943a6 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -3983,8 +3983,8 @@
   verifier::RegTypeCache::ShutDown();
 }
 
-void MethodVerifier::VisitRoots(RootVisitor* visitor, void* arg) {
-  reg_types_.VisitRoots(visitor, arg);
+void MethodVerifier::VisitRoots(RootCallback* callback, void* arg) {
+  reg_types_.VisitRoots(callback, arg);
 }
 
 }  // namespace verifier
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 7c75c9c..031cfec 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -220,7 +220,7 @@
   // Describe VRegs at the given dex pc.
   std::vector<int32_t> DescribeVRegs(uint32_t dex_pc);
 
-  void VisitRoots(RootVisitor* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitRoots(RootCallback* callback, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Accessors used by the compiler via CompilerCallback
   const DexFile::CodeItem* CodeItem() const;
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index f394bce..e56e670 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -969,9 +969,9 @@
   }
 }
 
-void RegType::VisitRoots(RootVisitor* visitor, void* arg) {
+void RegType::VisitRoots(RootCallback* callback, void* arg) {
   if (klass_ != nullptr) {
-    klass_ = down_cast<mirror::Class*>(visitor(klass_, arg));
+    klass_ = down_cast<mirror::Class*>(callback(klass_, arg, 0, kRootUnknown));
   }
 }
 
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index 3818375..a23b8c4 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -19,8 +19,8 @@
 
 #include "base/macros.h"
 #include "globals.h"
+#include "object_callbacks.h"
 #include "primitive.h"
-#include "root_visitor.h"
 
 #include "jni.h"
 
@@ -270,7 +270,7 @@
 
   virtual ~RegType() {}
 
-  void VisitRoots(RootVisitor* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitRoots(RootCallback* callback, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  protected:
   RegType(mirror::Class* klass, const std::string& descriptor, uint16_t cache_id)
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index c8a03d6..5e894ed 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -573,9 +573,9 @@
   }
 }
 
-void RegTypeCache::VisitRoots(RootVisitor* visitor, void* arg) {
+void RegTypeCache::VisitRoots(RootCallback* callback, void* arg) {
   for (RegType* entry : entries_) {
-    entry->VisitRoots(visitor, arg);
+    entry->VisitRoots(callback, arg);
   }
 }
 
diff --git a/runtime/verifier/reg_type_cache.h b/runtime/verifier/reg_type_cache.h
index 41bc8c9..4cc7e61 100644
--- a/runtime/verifier/reg_type_cache.h
+++ b/runtime/verifier/reg_type_cache.h
@@ -20,8 +20,8 @@
 #include "base/casts.h"
 #include "base/macros.h"
 #include "base/stl_util.h"
+#include "object_callbacks.h"
 #include "reg_type.h"
-#include "root_visitor.h"
 #include "runtime.h"
 
 #include <stdint.h>
@@ -146,7 +146,7 @@
   void Dump(std::ostream& os) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   const RegType& RegTypeFromPrimitiveType(Primitive::Type) const;
 
-  void VisitRoots(RootVisitor* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitRoots(RootCallback* callback, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
   void FillPrimitiveAndSmallConstantTypes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/test/Android.mk b/test/Android.mk
index d716f9b..4d47651 100644
--- a/test/Android.mk
+++ b/test/Android.mk
@@ -50,7 +50,8 @@
 	ParallelGC \
 	ReferenceMap \
 	StackWalk \
-	ThreadStress
+	ThreadStress \
+	UnsafeTest
 
 # TODO: Enable when the StackWalk2 tests are passing
 #	StackWalk2 \
diff --git a/test/UnsafeTest/UnsafeTest.java b/test/UnsafeTest/UnsafeTest.java
new file mode 100644
index 0000000..f3d52896
--- /dev/null
+++ b/test/UnsafeTest/UnsafeTest.java
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Field;
+import sun.misc.Unsafe;
+
+public class UnsafeTest {
+  static {
+    System.loadLibrary("arttest");
+  }
+
+  private static void check(int actual, int expected, String msg) {
+    if (actual != expected) {
+      System.logE(msg + " : " + actual + " != " + expected);
+      System.exit(-1);
+    }
+  }
+
+  private static Unsafe getUnsafe() throws Exception {
+    Class<?> unsafeClass = Class.forName("sun.misc.Unsafe");
+    Field f = unsafeClass.getDeclaredField("theUnsafe");
+    f.setAccessible(true);
+    return (Unsafe) f.get(null);
+  }
+
+  public static void main(String[] args) throws Exception {
+    Unsafe unsafe = getUnsafe();
+    check(unsafe.arrayBaseOffset(boolean[].class), vmArrayBaseOffset(boolean[].class),
+        "Unsafe.arrayBaseOffset(boolean[])");
+    check(unsafe.arrayBaseOffset(byte[].class), vmArrayBaseOffset(byte[].class),
+        "Unsafe.arrayBaseOffset(byte[])");
+    check(unsafe.arrayBaseOffset(char[].class), vmArrayBaseOffset(char[].class),
+        "Unsafe.arrayBaseOffset(char[])");
+    check(unsafe.arrayBaseOffset(double[].class), vmArrayBaseOffset(double[].class),
+        "Unsafe.arrayBaseOffset(double[])");
+    check(unsafe.arrayBaseOffset(float[].class), vmArrayBaseOffset(float[].class),
+        "Unsafe.arrayBaseOffset(float[])");
+    check(unsafe.arrayBaseOffset(int[].class), vmArrayBaseOffset(int[].class),
+        "Unsafe.arrayBaseOffset(int[])");
+    check(unsafe.arrayBaseOffset(long[].class), vmArrayBaseOffset(long[].class),
+        "Unsafe.arrayBaseOffset(long[])");
+    check(unsafe.arrayBaseOffset(Object[].class), vmArrayBaseOffset(Object[].class),
+        "Unsafe.arrayBaseOffset(Object[])");
+
+    check(unsafe.arrayIndexScale(boolean[].class), vmArrayIndexScale(boolean[].class),
+        "Unsafe.arrayIndexScale(boolean[])");
+    check(unsafe.arrayIndexScale(byte[].class), vmArrayIndexScale(byte[].class),
+        "Unsafe.arrayIndexScale(byte[])");
+    check(unsafe.arrayIndexScale(char[].class), vmArrayIndexScale(char[].class),
+        "Unsafe.arrayIndexScale(char[])");
+    check(unsafe.arrayIndexScale(double[].class), vmArrayIndexScale(double[].class),
+        "Unsafe.arrayIndexScale(double[])");
+    check(unsafe.arrayIndexScale(float[].class), vmArrayIndexScale(float[].class),
+        "Unsafe.arrayIndexScale(float[])");
+    check(unsafe.arrayIndexScale(int[].class), vmArrayIndexScale(int[].class),
+        "Unsafe.arrayIndexScale(int[])");
+    check(unsafe.arrayIndexScale(long[].class), vmArrayIndexScale(long[].class),
+        "Unsafe.arrayIndexScale(long[])");
+    check(unsafe.arrayIndexScale(Object[].class), vmArrayIndexScale(Object[].class),
+        "Unsafe.arrayIndexScale(Object[])");
+  }
+
+  private static native int vmArrayBaseOffset(Class clazz);
+  private static native int vmArrayIndexScale(Class clazz);
+}
diff --git a/test/UnsafeTest/unsafe_test.cc b/test/UnsafeTest/unsafe_test.cc
new file mode 100644
index 0000000..e36ee14
--- /dev/null
+++ b/test/UnsafeTest/unsafe_test.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni.h"
+#include "mirror/array.h"
+#include "mirror/art_method-inl.h"
+#include "mirror/class.h"
+#include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+
+extern "C" JNIEXPORT jint JNICALL Java_UnsafeTest_vmArrayBaseOffset(JNIEnv* env, jclass, jobject classObj) {
+  ScopedObjectAccess soa(env);
+  mirror::Class* klass = soa.Decode<mirror::Class*>(classObj);
+  return mirror::Array::DataOffset(
+      Primitive::ComponentSize(klass->GetComponentType()->GetPrimitiveType())).Int32Value();
+}
+
+extern "C" JNIEXPORT jint JNICALL Java_UnsafeTest_vmArrayIndexScale(JNIEnv* env, jclass, jobject classObj) {
+  ScopedObjectAccess soa(env);
+  mirror::Class* klass = soa.Decode<mirror::Class*>(classObj);
+  return Primitive::ComponentSize(klass->GetComponentType()->GetPrimitiveType());
+}
+
+}  // namespace art
diff --git a/tools/Android.mk b/tools/Android.mk
new file mode 100644
index 0000000..6c385dc
--- /dev/null
+++ b/tools/Android.mk
@@ -0,0 +1,32 @@
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+LOCAL_PATH := $(call my-dir)
+
+ifeq ($(WITH_HOST_DALVIK),true)
+# Copy the art shell script to the host's bin directory
+include $(CLEAR_VARS)
+LOCAL_IS_HOST_MODULE := true
+LOCAL_MODULE_TAGS := optional
+LOCAL_MODULE_CLASS := EXECUTABLES
+LOCAL_MODULE := art
+include $(BUILD_SYSTEM)/base_rules.mk
+$(LOCAL_BUILT_MODULE): $(LOCAL_PATH)/art $(ACP)
+	@echo "Copy: $(PRIVATE_MODULE) ($@)"
+	$(copy-file-to-new-target)
+	$(hide) chmod 755 $@
+
+endif
diff --git a/tools/art b/tools/art
index 0a6c941..2e3a46e 100755
--- a/tools/art
+++ b/tools/art
@@ -34,8 +34,22 @@
 done
 
 unset ANDROID_PRODUCT_OUT # avoid defaulting dex2oat --host-prefix to target output
+
+function follow_links() {
+  file="$1"
+  while [ -h "$file" ]; do
+    # On Mac OS, readlink -f doesn't work.
+    file="$(readlink "$file")"
+  done
+  echo "$file"
+}
+
+PROG_NAME="$(follow_links "$BASH_SOURCE")"
+PROG_DIR="$(cd "${PROG_NAME%/*}" ; pwd -P)"
+ANDROID_BUILD_TOP="$(cd "${PROG_DIR}/../../../../" ; pwd -P)/"
+ANDROID_HOST_OUT=$PROG_DIR/..
+
 mkdir -p /tmp/android-data/dalvik-cache
-cd $ANDROID_BUILD_TOP
 ANDROID_DATA=/tmp/android-data \
   ANDROID_ROOT=$ANDROID_HOST_OUT \
   LD_LIBRARY_PATH=$ANDROID_HOST_OUT/lib \