Merge "Fix FindCatchBlock to work in -Xverify:none mode."
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 6f4fa3a..ba4b5c3 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -159,8 +159,10 @@
 };
 
 enum OpSize {
-  kWord,
-  kLong,
+  kWord,            // Natural word size of target (32/64).
+  k32,
+  k64,
+  kReference,       // Object reference; compressed on 64-bit targets.
   kSingle,
   kDouble,
   kUnsignedHalf,
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index d0d0e6b..b374ed8 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -128,7 +128,7 @@
 
   // Load the displacement from the switch table
   RegStorage disp_reg = AllocTemp();
-  LoadBaseIndexed(table_base, keyReg, disp_reg, 2, kWord);
+  LoadBaseIndexed(table_base, keyReg, disp_reg, 2, k32);
 
   // ..and go! NOTE: No instruction set switch here - must stay Thumb2
   LIR* switch_branch = NewLIR1(kThumb2AddPCR, disp_reg.GetReg());
@@ -180,6 +180,7 @@
  */
 void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
   FlushAllRegs();
+  // FIXME: need separate LoadValues for object references.
   LoadValueDirectFixed(rl_src, rs_r0);  // Get obj
   LockCallTemps();  // Prepare for explicit register usage
   constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
@@ -193,7 +194,7 @@
         null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
       }
     }
-    LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
+    Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
     NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
     MarkPossibleNullPointerException(opt_flags);
     LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r1, 0, NULL);
@@ -219,7 +220,7 @@
   } else {
     // Explicit null-check as slow-path is entered using an IT.
     GenNullCheck(rs_r0, opt_flags);
-    LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
+    Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
     NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
     MarkPossibleNullPointerException(opt_flags);
     OpRegImm(kOpCmp, rs_r1, 0);
@@ -248,7 +249,7 @@
   LoadValueDirectFixed(rl_src, rs_r0);  // Get obj
   LockCallTemps();  // Prepare for explicit register usage
   LIR* null_check_branch = nullptr;
-  LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
+  Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
   constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
   if (kArchVariantHasGoodBranchPredictor) {
     if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
@@ -259,11 +260,11 @@
         null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
       }
     }
-    LoadWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);
+    Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);
     MarkPossibleNullPointerException(opt_flags);
     LoadConstantNoClobber(rs_r3, 0);
     LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL);
-    StoreWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
+    Store32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
     LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
 
     LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
@@ -284,14 +285,14 @@
   } else {
     // Explicit null-check as slow-path is entered using an IT.
     GenNullCheck(rs_r0, opt_flags);
-    LoadWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);  // Get lock
+    Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);  // Get lock
     MarkPossibleNullPointerException(opt_flags);
-    LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
+    Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
     LoadConstantNoClobber(rs_r3, 0);
     // Is lock unheld on lock or held by us (==thread_id) on unlock?
     OpRegReg(kOpCmp, rs_r1, rs_r2);
     LIR* it = OpIT(kCondEq, "EE");
-    StoreWordDisp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
+    Store32Disp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
     // Go expensive route - UnlockObjectFromCode(obj);
     LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(),
                        rs_rARM_LR);
@@ -307,9 +308,9 @@
   int ex_offset = Thread::ExceptionOffset<4>().Int32Value();
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage reset_reg = AllocTemp();
-  LoadWordDisp(rs_rARM_SELF, ex_offset, rl_result.reg);
+  Load32Disp(rs_rARM_SELF, ex_offset, rl_result.reg);
   LoadConstant(reset_reg, 0);
-  StoreWordDisp(rs_rARM_SELF, ex_offset, reset_reg);
+  Store32Disp(rs_rARM_SELF, ex_offset, reset_reg);
   FreeTemp(reset_reg);
   StoreValue(rl_dest, rl_result);
 }
@@ -354,7 +355,7 @@
   if (!skip_overflow_check) {
     if (Runtime::Current()->ExplicitStackOverflowChecks()) {
       /* Load stack limit */
-      LoadWordDisp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12);
+      Load32Disp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12);
     }
   }
   /* Spill core callee saves */
@@ -391,6 +392,7 @@
           ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow);
           // Load the entrypoint directly into the pc instead of doing a load + branch. Assumes
           // codegen and target are in thumb2 mode.
+          // NOTE: native pointer.
           m2l_->LoadWordDisp(rs_rARM_SELF, func_offset.Int32Value(), rs_rARM_PC);
         }
 
@@ -421,7 +423,7 @@
       // a sub instruction.  Otherwise we will get a temp allocation and the
       // code size will increase.
       OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
-      LoadWordDisp(rs_r12, 0, rs_r12);
+      Load32Disp(rs_r12, 0, rs_r12);
       MarkPossibleStackOverflowException();
       OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
     }
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index d5b34a5..97ef1e4 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -684,18 +684,18 @@
   RegLocation rl_dest = InlineTarget(info);
   RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  if (size == kLong) {
+  if (size == k64) {
     // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0.
     if (rl_address.reg.GetReg() != rl_result.reg.GetLowReg()) {
-      LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow());
-      LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh());
+      Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow());
+      Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh());
     } else {
-      LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh());
-      LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow());
+      Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh());
+      Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow());
     }
     StoreValueWide(rl_dest, rl_result);
   } else {
-    DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord);
+    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
     // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0.
     LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, INVALID_SREG);
     StoreValue(rl_dest, rl_result);
@@ -708,13 +708,13 @@
   rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
   RegLocation rl_src_value = info->args[2];  // [size] value
   RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
-  if (size == kLong) {
+  if (size == k64) {
     // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0.
     RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg);
-    StoreBaseDisp(rl_address.reg, 0, rl_value.reg.GetLow(), kWord);
-    StoreBaseDisp(rl_address.reg, 4, rl_value.reg.GetHigh(), kWord);
+    StoreBaseDisp(rl_address.reg, 0, rl_value.reg.GetLow(), k32);
+    StoreBaseDisp(rl_address.reg, 4, rl_value.reg.GetHigh(), k32);
   } else {
-    DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord);
+    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
     // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0.
     RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
     StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size);
@@ -1148,7 +1148,7 @@
   if (needs_range_check) {
     reg_len = AllocTemp();
     /* Get len */
-    LoadWordDisp(rl_array.reg, len_offset, reg_len);
+    Load32Disp(rl_array.reg, len_offset, reg_len);
     MarkPossibleNullPointerException(opt_flags);
   } else {
     ForceImplicitNullCheck(rl_array.reg, opt_flags);
@@ -1217,7 +1217,7 @@
   bool constant_index = rl_index.is_const;
 
   int data_offset;
-  if (size == kLong || size == kDouble) {
+  if (size == k64 || size == kDouble) {
     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
   } else {
     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
@@ -1254,7 +1254,7 @@
     reg_len = AllocTemp();
     // NOTE: max live temps(4) here.
     /* Get len */
-    LoadWordDisp(rl_array.reg, len_offset, reg_len);
+    Load32Disp(rl_array.reg, len_offset, reg_len);
     MarkPossibleNullPointerException(opt_flags);
   } else {
     ForceImplicitNullCheck(rl_array.reg, opt_flags);
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 1053a8f..305e89b 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -656,7 +656,7 @@
   if (info->live && info->dirty) {
     info->dirty = false;
     int v_reg = mir_graph_->SRegToVReg(info->s_reg);
-    StoreBaseDisp(rs_rARM_SP, VRegOffset(v_reg), reg, kWord);
+    StoreBaseDisp(rs_rARM_SP, VRegOffset(v_reg), reg, k32);
   }
 }
 
@@ -738,8 +738,8 @@
 
 LIR* ArmMir2Lir::CheckSuspendUsingLoad() {
   RegStorage tmp = rs_r0;
-  LoadWordDisp(rs_rARM_SELF, Thread::ThreadSuspendTriggerOffset<4>().Int32Value(), tmp);
-  LIR* load2 = LoadWordDisp(tmp, 0, tmp);
+  Load32Disp(rs_rARM_SELF, Thread::ThreadSuspendTriggerOffset<4>().Int32Value(), tmp);
+  LIR* load2 = Load32Disp(tmp, 0, tmp);
   return load2;
 }
 
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 70cbdd2..6879ffc 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -699,23 +699,24 @@
 
   if (ARM_FPREG(r_dest.GetReg())) {
     if (ARM_SINGLEREG(r_dest.GetReg())) {
-      DCHECK((size == kWord) || (size == kSingle));
+      DCHECK((size == k32) || (size == kSingle));
       opcode = kThumb2Vldrs;
       size = kSingle;
     } else {
       DCHECK(ARM_DOUBLEREG(r_dest.GetReg()));
-      DCHECK((size == kLong) || (size == kDouble));
+      DCHECK((size == k64) || (size == kDouble));
       DCHECK_EQ((r_dest.GetReg() & 0x1), 0);
       opcode = kThumb2Vldrd;
       size = kDouble;
     }
   } else {
     if (size == kSingle)
-      size = kWord;
+      size = k32;
   }
 
   switch (size) {
     case kDouble:  // fall-through
+    // Intentional fall-though.
     case kSingle:
       reg_ptr = AllocTemp();
       if (scale) {
@@ -727,7 +728,9 @@
       load = NewLIR3(opcode, r_dest.GetReg(), reg_ptr.GetReg(), 0);
       FreeTemp(reg_ptr);
       return load;
-    case kWord:
+    case k32:
+    // Intentional fall-though.
+    case kReference:
       opcode = (thumb_form) ? kThumbLdrRRR : kThumb2LdrRRR;
       break;
     case kUnsignedHalf:
@@ -764,23 +767,24 @@
 
   if (ARM_FPREG(r_src.GetReg())) {
     if (ARM_SINGLEREG(r_src.GetReg())) {
-      DCHECK((size == kWord) || (size == kSingle));
+      DCHECK((size == k32) || (size == kSingle));
       opcode = kThumb2Vstrs;
       size = kSingle;
     } else {
       DCHECK(ARM_DOUBLEREG(r_src.GetReg()));
-      DCHECK((size == kLong) || (size == kDouble));
+      DCHECK((size == k64) || (size == kDouble));
       DCHECK_EQ((r_src.GetReg() & 0x1), 0);
       opcode = kThumb2Vstrd;
       size = kDouble;
     }
   } else {
     if (size == kSingle)
-      size = kWord;
+      size = k32;
   }
 
   switch (size) {
     case kDouble:  // fall-through
+    // Intentional fall-though.
     case kSingle:
       reg_ptr = AllocTemp();
       if (scale) {
@@ -792,14 +796,18 @@
       store = NewLIR3(opcode, r_src.GetReg(), reg_ptr.GetReg(), 0);
       FreeTemp(reg_ptr);
       return store;
-    case kWord:
+    case k32:
+    // Intentional fall-though.
+    case kReference:
       opcode = (thumb_form) ? kThumbStrRRR : kThumb2StrRRR;
       break;
     case kUnsignedHalf:
+    // Intentional fall-though.
     case kSignedHalf:
       opcode = (thumb_form) ? kThumbStrhRRR : kThumb2StrhRRR;
       break;
     case kUnsignedByte:
+    // Intentional fall-though.
     case kSignedByte:
       opcode = (thumb_form) ? kThumbStrbRRR : kThumb2StrbRRR;
       break;
@@ -832,7 +840,8 @@
   bool null_pointer_safepoint = false;
   switch (size) {
     case kDouble:
-    case kLong:
+    // Intentional fall-though.
+    case k64:
       if (ARM_FPREG(dest_low_reg)) {
         // Note: following change to avoid using pairs for doubles, replace conversion w/ DCHECK.
         if (r_dest.IsPair()) {
@@ -849,15 +858,18 @@
           load = NewLIR4(kThumb2LdrdI8, r_dest.GetLowReg(), r_dest.GetHighReg(), r_base.GetReg(),
                          displacement >> 2);
         } else {
-          load = LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), kWord, s_reg);
+          load = LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), k32, s_reg);
           null_pointer_safepoint = true;
-          LoadBaseDispBody(r_base, displacement + 4, r_dest.GetHigh(), kWord, INVALID_SREG);
+          LoadBaseDispBody(r_base, displacement + 4, r_dest.GetHigh(), k32, INVALID_SREG);
         }
         already_generated = true;
       }
       break;
     case kSingle:
-    case kWord:
+    // Intentional fall-though.
+    case k32:
+    // Intentional fall-though.
+    case kReference:
       if (ARM_FPREG(r_dest.GetReg())) {
         opcode = kThumb2Vldrs;
         if (displacement <= 1020) {
@@ -953,13 +965,17 @@
 
 LIR* ArmMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
                               int s_reg) {
-  DCHECK(!((size == kLong) || (size == kDouble)));
+  DCHECK(!((size == k64) || (size == kDouble)));
+  // TODO: base this on target.
+  if (size == kWord) {
+    size = k32;
+  }
   return LoadBaseDispBody(r_base, displacement, r_dest, size, s_reg);
 }
 
 LIR* ArmMir2Lir::LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest,
                                   int s_reg) {
-  return LoadBaseDispBody(r_base, displacement, r_dest, kLong, s_reg);
+  return LoadBaseDispBody(r_base, displacement, r_dest, k64, s_reg);
 }
 
 
@@ -975,16 +991,16 @@
   int src_low_reg = r_src.IsPair() ? r_src.GetLowReg() : r_src.GetReg();
   bool null_pointer_safepoint = false;
   switch (size) {
-    case kLong:
+    case k64:
     case kDouble:
       if (!ARM_FPREG(src_low_reg)) {
         if (displacement <= 1020) {
           store = NewLIR4(kThumb2StrdI8, r_src.GetLowReg(), r_src.GetHighReg(), r_base.GetReg(),
                           displacement >> 2);
         } else {
-          store = StoreBaseDispBody(r_base, displacement, r_src.GetLow(), kWord);
+          store = StoreBaseDispBody(r_base, displacement, r_src.GetLow(), k32);
           null_pointer_safepoint = true;
-          StoreBaseDispBody(r_base, displacement + 4, r_src.GetHigh(), kWord);
+          StoreBaseDispBody(r_base, displacement + 4, r_src.GetHigh(), k32);
         }
         already_generated = true;
       } else {
@@ -1001,7 +1017,8 @@
       }
       break;
     case kSingle:
-    case kWord:
+    case k32:
+    case kReference:
       if (ARM_FPREG(r_src.GetReg())) {
         DCHECK(ARM_SINGLEREG(r_src.GetReg()));
         opcode = kThumb2Vstrs;
@@ -1082,12 +1099,16 @@
 
 LIR* ArmMir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
                                OpSize size) {
-  DCHECK(!((size == kLong) || (size == kDouble)));
+  // TODO: base this on target.
+  if (size == kWord) {
+    size = k32;
+  }
+  DCHECK(!((size == k64) || (size == kDouble)));
   return StoreBaseDispBody(r_base, displacement, r_src, size);
 }
 
 LIR* ArmMir2Lir::StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src) {
-  return StoreBaseDispBody(r_base, displacement, r_src, kLong);
+  return StoreBaseDispBody(r_base, displacement, r_src, k64);
 }
 
 LIR* ArmMir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 6e6b8f0..9d85477 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1118,7 +1118,7 @@
 LIR *Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
                                 int offset, int check_value, LIR* target) {
   // Handle this for architectures that can't compare to memory.
-  LoadWordDisp(base_reg, offset, temp_reg);
+  Load32Disp(base_reg, offset, temp_reg);
   LIR* branch = OpCmpImmBranch(cond, temp_reg, check_value, target);
   return branch;
 }
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 8806e68..3ec31ba 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -212,8 +212,8 @@
     INTRINSIC(JavaLangFloat, FloatToRawIntBits, F_I, kIntrinsicFloatCvt, 0),
     INTRINSIC(JavaLangFloat, IntBitsToFloat, I_F, kIntrinsicFloatCvt, 0),
 
-    INTRINSIC(JavaLangInteger, ReverseBytes, I_I, kIntrinsicReverseBytes, kWord),
-    INTRINSIC(JavaLangLong, ReverseBytes, J_J, kIntrinsicReverseBytes, kLong),
+    INTRINSIC(JavaLangInteger, ReverseBytes, I_I, kIntrinsicReverseBytes, k32),
+    INTRINSIC(JavaLangLong, ReverseBytes, J_J, kIntrinsicReverseBytes, k64),
     INTRINSIC(JavaLangShort, ReverseBytes, S_S, kIntrinsicReverseBytes, kSignedHalf),
 
     INTRINSIC(JavaLangMath,       Abs, I_I, kIntrinsicAbsInt, 0),
@@ -241,12 +241,12 @@
     INTRINSIC(JavaLangThread, CurrentThread, _Thread, kIntrinsicCurrentThread, 0),
 
     INTRINSIC(LibcoreIoMemory, PeekByte, J_B, kIntrinsicPeek, kSignedByte),
-    INTRINSIC(LibcoreIoMemory, PeekIntNative, J_I, kIntrinsicPeek, kWord),
-    INTRINSIC(LibcoreIoMemory, PeekLongNative, J_J, kIntrinsicPeek, kLong),
+    INTRINSIC(LibcoreIoMemory, PeekIntNative, J_I, kIntrinsicPeek, k32),
+    INTRINSIC(LibcoreIoMemory, PeekLongNative, J_J, kIntrinsicPeek, k64),
     INTRINSIC(LibcoreIoMemory, PeekShortNative, J_S, kIntrinsicPeek, kSignedHalf),
     INTRINSIC(LibcoreIoMemory, PokeByte, JB_V, kIntrinsicPoke, kSignedByte),
-    INTRINSIC(LibcoreIoMemory, PokeIntNative, JI_V, kIntrinsicPoke, kWord),
-    INTRINSIC(LibcoreIoMemory, PokeLongNative, JJ_V, kIntrinsicPoke, kLong),
+    INTRINSIC(LibcoreIoMemory, PokeIntNative, JI_V, kIntrinsicPoke, k32),
+    INTRINSIC(LibcoreIoMemory, PokeLongNative, JJ_V, kIntrinsicPoke, k64),
     INTRINSIC(LibcoreIoMemory, PokeShortNative, JS_V, kIntrinsicPoke, kSignedHalf),
 
     INTRINSIC(SunMiscUnsafe, CompareAndSwapInt, ObjectJII_Z, kIntrinsicCas,
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 6781a9b..1c6c124 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -204,7 +204,7 @@
     // register with offset 0.  This will cause a signal if the register contains 0 (null).
     RegStorage tmp = AllocTemp();
     // TODO: for Mips, would be best to use rZERO as the bogus register target.
-    LIR* load = LoadWordDisp(reg, 0, tmp);
+    LIR* load = Load32Disp(reg, 0, tmp);
     FreeTemp(tmp);
     MarkSafepointPC(load);
   }
@@ -426,7 +426,7 @@
     for (int i = 0; i < elems; i++) {
       RegLocation loc = UpdateLoc(info->args[i]);
       if (loc.location == kLocPhysReg) {
-        StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kWord);
+        Store32Disp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg);
       }
     }
     /*
@@ -463,8 +463,8 @@
     // Generate the copy loop.  Going backwards for convenience
     LIR* target = NewLIR0(kPseudoTargetLabel);
     // Copy next element
-    LoadBaseIndexed(r_src, r_idx, r_val, 2, kWord);
-    StoreBaseIndexed(r_dst, r_idx, r_val, 2, kWord);
+    LoadBaseIndexed(r_src, r_idx, r_val, 2, k32);
+    StoreBaseIndexed(r_dst, r_idx, r_val, 2, k32);
     FreeTemp(r_val);
     OpDecAndBranch(kCondGe, r_idx, target);
     if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
@@ -476,9 +476,8 @@
     // TUNING: interleave
     for (int i = 0; i < elems; i++) {
       RegLocation rl_arg = LoadValue(info->args[i], kCoreReg);
-      StoreBaseDisp(TargetReg(kRet0),
-                    mirror::Array::DataOffset(component_size).Int32Value() + i * 4,
-                    rl_arg.reg, kWord);
+      Store32Disp(TargetReg(kRet0),
+                    mirror::Array::DataOffset(component_size).Int32Value() + i * 4, rl_arg.reg);
       // If the LoadValue caused a temp to be allocated, free it
       if (IsTemp(rl_arg.reg)) {
         FreeTemp(rl_arg.reg);
@@ -529,7 +528,7 @@
       // Fast path, static storage base is this method's class
       RegLocation rl_method  = LoadCurrMethod();
       r_base = AllocTemp();
-      LoadWordDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base);
+      LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base);
       if (IsTemp(rl_method.reg)) {
         FreeTemp(rl_method.reg);
       }
@@ -546,8 +545,8 @@
       LoadCurrMethodDirect(r_method);
       r_base = TargetReg(kArg0);
       LockTemp(r_base);
-      LoadWordDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base);
-      LoadWordDisp(r_base, mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() +
+      LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base);
+      LoadRefDisp(r_base, mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() +
                    sizeof(int32_t*) * field_info.StorageIndex(), r_base);
       // r_base now points at static storage (Class*) or NULL if the type is not yet resolved.
       if (!field_info.IsInitialized() &&
@@ -588,8 +587,10 @@
     }
     if (is_long_or_double) {
       StoreBaseDispWide(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg);
+    } else if (rl_src.ref) {
+      StoreRefDisp(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg);
     } else {
-      StoreWordDisp(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg);
+      Store32Disp(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg);
     }
     if (field_info.IsVolatile()) {
       // A load might follow the volatile store so insert a StoreLoad barrier.
@@ -620,7 +621,7 @@
       // Fast path, static storage base is this method's class
       RegLocation rl_method  = LoadCurrMethod();
       r_base = AllocTemp();
-      LoadWordDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base);
+      LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base);
     } else {
       // Medium path, static storage base in a different class which requires checks that the other
       // class is initialized
@@ -633,9 +634,9 @@
       LoadCurrMethodDirect(r_method);
       r_base = TargetReg(kArg0);
       LockTemp(r_base);
-      LoadWordDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base);
-      LoadWordDisp(r_base, mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() +
-                   sizeof(int32_t*) * field_info.StorageIndex(), r_base);
+      LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base);
+      LoadRefDisp(r_base, mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() +
+                  sizeof(int32_t*) * field_info.StorageIndex(), r_base);
       // r_base now points at static storage (Class*) or NULL if the type is not yet resolved.
       if (!field_info.IsInitialized() &&
           (mir->optimization_flags & MIR_IGNORE_CLINIT_CHECK) == 0) {
@@ -668,8 +669,10 @@
 
     if (is_long_or_double) {
       LoadBaseDispWide(r_base, field_info.FieldOffset().Int32Value(), rl_result.reg, INVALID_SREG);
+    } else if (rl_result.ref) {
+      LoadRefDisp(r_base, field_info.FieldOffset().Int32Value(), rl_result.reg);
     } else {
-      LoadWordDisp(r_base, field_info.FieldOffset().Int32Value(), rl_result.reg);
+      Load32Disp(r_base, field_info.FieldOffset().Int32Value(), rl_result.reg);
     }
     FreeTemp(r_base);
 
@@ -800,7 +803,7 @@
     } else {
       rl_result = EvalLoc(rl_dest, reg_class, true);
       GenNullCheck(rl_obj.reg, opt_flags);
-      LoadBaseDisp(rl_obj.reg, field_info.FieldOffset().Int32Value(), rl_result.reg, kWord,
+      LoadBaseDisp(rl_obj.reg, field_info.FieldOffset().Int32Value(), rl_result.reg, k32,
                    rl_obj.s_reg_low);
       MarkPossibleNullPointerException(opt_flags);
       if (field_info.IsVolatile()) {
@@ -864,7 +867,7 @@
         // There might have been a store before this volatile one so insert StoreStore barrier.
         GenMemBarrier(kStoreStore);
       }
-      StoreBaseDisp(rl_obj.reg, field_info.FieldOffset().Int32Value(), rl_src.reg, kWord);
+      Store32Disp(rl_obj.reg, field_info.FieldOffset().Int32Value(), rl_src.reg);
       MarkPossibleNullPointerException(opt_flags);
       if (field_info.IsVolatile()) {
         // A load might follow the volatile store so insert a StoreLoad barrier.
@@ -913,11 +916,11 @@
     // We're don't need access checks, load type from dex cache
     int32_t dex_cache_offset =
         mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value();
-    LoadWordDisp(rl_method.reg, dex_cache_offset, res_reg);
+    Load32Disp(rl_method.reg, dex_cache_offset, res_reg);
     int32_t offset_of_type =
         mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() + (sizeof(mirror::Class*)
                           * type_idx);
-    LoadWordDisp(res_reg, offset_of_type, rl_result.reg);
+    Load32Disp(res_reg, offset_of_type, rl_result.reg);
     if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file,
         type_idx) || SLOW_TYPE_PATH) {
       // Slow path, at runtime test if type is null and if so initialize
@@ -982,11 +985,11 @@
       r_method = TargetReg(kArg2);
       LoadCurrMethodDirect(r_method);
     }
-    LoadWordDisp(r_method, mirror::ArtMethod::DexCacheStringsOffset().Int32Value(),
-                 TargetReg(kArg0));
+    LoadRefDisp(r_method, mirror::ArtMethod::DexCacheStringsOffset().Int32Value(),
+                TargetReg(kArg0));
 
     // Might call out to helper, which will return resolved string in kRet0
-    LoadWordDisp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0));
+    Load32Disp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0));
     if (cu_->instruction_set == kThumb2 ||
         cu_->instruction_set == kMips) {
       //  OpRegImm(kOpCmp, TargetReg(kRet0), 0);  // Is resolved?
@@ -1036,8 +1039,8 @@
     RegLocation rl_method = LoadCurrMethod();
     RegStorage res_reg = AllocTemp();
     RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    LoadWordDisp(rl_method.reg, mirror::ArtMethod::DexCacheStringsOffset().Int32Value(), res_reg);
-    LoadWordDisp(res_reg, offset_of_string, rl_result.reg);
+    LoadRefDisp(rl_method.reg, mirror::ArtMethod::DexCacheStringsOffset().Int32Value(), res_reg);
+    Load32Disp(res_reg, offset_of_string, rl_result.reg);
     StoreValue(rl_dest, rl_result);
   }
 }
@@ -1122,19 +1125,20 @@
 
   LoadCurrMethodDirect(check_class);
   if (use_declaring_class) {
-    LoadWordDisp(check_class, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), check_class);
-    LoadWordDisp(object.reg,  mirror::Object::ClassOffset().Int32Value(), object_class);
+    LoadRefDisp(check_class, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), check_class);
+    LoadRefDisp(object.reg,  mirror::Object::ClassOffset().Int32Value(), object_class);
   } else {
-    LoadWordDisp(check_class, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
-                 check_class);
-    LoadWordDisp(object.reg,  mirror::Object::ClassOffset().Int32Value(), object_class);
+    LoadRefDisp(check_class, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+                check_class);
+    LoadRefDisp(object.reg,  mirror::Object::ClassOffset().Int32Value(), object_class);
     int32_t offset_of_type =
       mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() +
       (sizeof(mirror::Class*) * type_idx);
-    LoadWordDisp(check_class, offset_of_type, check_class);
+    LoadRefDisp(check_class, offset_of_type, check_class);
   }
 
   LIR* ne_branchover = NULL;
+  // FIXME: what should we be comparing here? compressed or decompressed references?
   if (cu_->instruction_set == kThumb2) {
     OpRegReg(kOpCmp, check_class, object_class);  // Same?
     LIR* it = OpIT(kCondEq, "");   // if-convert the test
@@ -1180,17 +1184,17 @@
     LoadValueDirectFixed(rl_src, TargetReg(kArg0));  // kArg0 <= ref
   } else if (use_declaring_class) {
     LoadValueDirectFixed(rl_src, TargetReg(kArg0));  // kArg0 <= ref
-    LoadWordDisp(TargetReg(kArg1), mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+    LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
                  class_reg);
   } else {
     // Load dex cache entry into class_reg (kArg2)
     LoadValueDirectFixed(rl_src, TargetReg(kArg0));  // kArg0 <= ref
-    LoadWordDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
-                 class_reg);
+    LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+                class_reg);
     int32_t offset_of_type =
         mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() + (sizeof(mirror::Class*)
         * type_idx);
-    LoadWordDisp(class_reg, offset_of_type, class_reg);
+    LoadRefDisp(class_reg, offset_of_type, class_reg);
     if (!can_assume_type_is_in_dex_cache) {
       // Need to test presence of type in dex cache at runtime
       LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL);
@@ -1214,7 +1218,7 @@
 
   /* load object->klass_ */
   DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
-  LoadWordDisp(TargetReg(kArg0),  mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1));
+  LoadRefDisp(TargetReg(kArg0),  mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1));
   /* kArg0 is ref, kArg1 is ref->klass_, kArg2 is class */
   LIR* branchover = NULL;
   if (type_known_final) {
@@ -1317,16 +1321,16 @@
                             type_idx, TargetReg(kArg1), true);
     OpRegCopy(class_reg, TargetReg(kRet0));  // Align usage with fast path
   } else if (use_declaring_class) {
-    LoadWordDisp(TargetReg(kArg1), mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
-                 class_reg);
+    LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+                class_reg);
   } else {
     // Load dex cache entry into class_reg (kArg2)
-    LoadWordDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
-                 class_reg);
+    LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+                class_reg);
     int32_t offset_of_type =
         mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() +
         (sizeof(mirror::Class*) * type_idx);
-    LoadWordDisp(class_reg, offset_of_type, class_reg);
+    LoadRefDisp(class_reg, offset_of_type, class_reg);
     if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file, type_idx)) {
       // Need to test presence of type in dex cache at runtime
       LIR* hop_branch = OpCmpImmBranch(kCondEq, class_reg, 0, NULL);
@@ -1374,8 +1378,8 @@
       GenerateTargetLabel();
 
       if (load_) {
-        m2l_->LoadWordDisp(m2l_->TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(),
-                           m2l_->TargetReg(kArg1));
+        m2l_->LoadRefDisp(m2l_->TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(),
+                          m2l_->TargetReg(kArg1));
       }
       m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pCheckCast), m2l_->TargetReg(kArg2),
                                     m2l_->TargetReg(kArg1), true);
@@ -1401,7 +1405,7 @@
     LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, NULL);
     /* load object->klass_ */
     DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
-    LoadWordDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1));
+    LoadRefDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1));
 
     LIR* branch2 = OpCmpBranch(kCondNe, TargetReg(kArg1), class_reg, NULL);
     LIR* cont = NewLIR0(kPseudoTargetLabel);
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 758096b..4d8941e 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -360,7 +360,11 @@
   rl_src.reg = TargetReg(kArg0);
   rl_src.home = false;
   MarkLive(rl_src.reg, rl_src.s_reg_low);
-  StoreValue(rl_method, rl_src);
+  if (rl_method.wide) {
+    StoreValueWide(rl_method, rl_src);
+  } else {
+    StoreValue(rl_method, rl_src);
+  }
   // If Method* has been promoted, explicitly flush
   if (rl_method.location == kLocPhysReg) {
     StoreWordDisp(TargetReg(kSp), 0, TargetReg(kArg0));
@@ -425,16 +429,15 @@
         }
       }
       if (need_flush) {
-        StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, kWord);
+        Store32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), reg);
       }
     } else {
       // If arriving in frame & promoted
       if (v_map->core_location == kLocPhysReg) {
-        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i),
-                     RegStorage::Solo32(v_map->core_reg));
+        Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->core_reg));
       }
       if (v_map->fp_location == kLocPhysReg) {
-        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg));
+        Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg));
       }
     }
   }
@@ -476,9 +479,9 @@
       cg->LoadCurrMethodDirect(cg->TargetReg(kArg0));
       break;
     case 1:  // Get method->dex_cache_resolved_methods_
-      cg->LoadWordDisp(cg->TargetReg(kArg0),
-                       mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
-                       cg->TargetReg(kArg0));
+      cg->LoadRefDisp(cg->TargetReg(kArg0),
+                      mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
+                      cg->TargetReg(kArg0));
       // Set up direct code if known.
       if (direct_code != 0) {
         if (direct_code != static_cast<unsigned int>(-1)) {
@@ -491,9 +494,9 @@
       break;
     case 2:  // Grab target method*
       CHECK_EQ(cu->dex_file, target_method.dex_file);
-      cg->LoadWordDisp(cg->TargetReg(kArg0),
-                       mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() +
-                       (target_method.dex_method_index * 4), cg->TargetReg(kArg0));
+      cg->LoadRefDisp(cg->TargetReg(kArg0),
+                      mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() +
+                      (target_method.dex_method_index * 4), cg->TargetReg(kArg0));
       break;
     case 3:  // Grab the code from the method*
       if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
@@ -537,18 +540,18 @@
     case 1:  // Is "this" null? [use kArg1]
       cg->GenNullCheck(cg->TargetReg(kArg1), info->opt_flags);
       // get this->klass_ [use kArg1, set kInvokeTgt]
-      cg->LoadWordDisp(cg->TargetReg(kArg1), mirror::Object::ClassOffset().Int32Value(),
-                       cg->TargetReg(kInvokeTgt));
+      cg->LoadRefDisp(cg->TargetReg(kArg1), mirror::Object::ClassOffset().Int32Value(),
+                      cg->TargetReg(kInvokeTgt));
       cg->MarkPossibleNullPointerException(info->opt_flags);
       break;
     case 2:  // Get this->klass_->vtable [usr kInvokeTgt, set kInvokeTgt]
-      cg->LoadWordDisp(cg->TargetReg(kInvokeTgt), mirror::Class::VTableOffset().Int32Value(),
-                       cg->TargetReg(kInvokeTgt));
+      cg->LoadRefDisp(cg->TargetReg(kInvokeTgt), mirror::Class::VTableOffset().Int32Value(),
+                      cg->TargetReg(kInvokeTgt));
       break;
     case 3:  // Get target method [use kInvokeTgt, set kArg0]
-      cg->LoadWordDisp(cg->TargetReg(kInvokeTgt), (method_idx * 4) +
-                       mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value(),
-                       cg->TargetReg(kArg0));
+      cg->LoadRefDisp(cg->TargetReg(kInvokeTgt), (method_idx * 4) +
+                      mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value(),
+                      cg->TargetReg(kArg0));
       break;
     case 4:  // Get the compiled code address [uses kArg0, sets kInvokeTgt]
       if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
@@ -592,15 +595,17 @@
     case 2:  // Is "this" null? [use kArg1]
       cg->GenNullCheck(cg->TargetReg(kArg1), info->opt_flags);
       // Get this->klass_ [use kArg1, set kInvokeTgt]
-      cg->LoadWordDisp(cg->TargetReg(kArg1), mirror::Object::ClassOffset().Int32Value(),
-                       cg->TargetReg(kInvokeTgt));
+      cg->LoadRefDisp(cg->TargetReg(kArg1), mirror::Object::ClassOffset().Int32Value(),
+                      cg->TargetReg(kInvokeTgt));
       cg->MarkPossibleNullPointerException(info->opt_flags);
       break;
     case 3:  // Get this->klass_->imtable [use kInvokeTgt, set kInvokeTgt]
-      cg->LoadWordDisp(cg->TargetReg(kInvokeTgt), mirror::Class::ImTableOffset().Int32Value(),
-                       cg->TargetReg(kInvokeTgt));
+      // NOTE: native pointer.
+      cg->LoadRefDisp(cg->TargetReg(kInvokeTgt), mirror::Class::ImTableOffset().Int32Value(),
+                      cg->TargetReg(kInvokeTgt));
       break;
     case 4:  // Get target method [use kInvokeTgt, set kArg0]
+      // NOTE: native pointer.
       cg->LoadWordDisp(cg->TargetReg(kInvokeTgt), ((method_idx % ClassLinker::kImtSize) * 4) +
                        mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value(),
                        cg->TargetReg(kArg0));
@@ -753,11 +758,11 @@
       } else {
         // kArg2 & rArg3 can safely be used here
         reg = TargetReg(kArg3);
-        LoadWordDisp(TargetReg(kSp), SRegOffset(rl_arg.s_reg_low) + 4, reg);
+        Load32Disp(TargetReg(kSp), SRegOffset(rl_arg.s_reg_low) + 4, reg);
         call_state = next_call_insn(cu_, info, call_state, target_method,
                                     vtable_idx, direct_code, direct_method, type);
       }
-      StoreBaseDisp(TargetReg(kSp), (next_use + 1) * 4, reg, kWord);
+      Store32Disp(TargetReg(kSp), (next_use + 1) * 4, reg);
       call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                                   direct_code, direct_method, type);
       next_use++;
@@ -791,7 +796,7 @@
         StoreBaseDispWide(TargetReg(kSp), outs_offset, RegStorage::MakeRegPair(low_reg, high_reg));
         next_use += 2;
       } else {
-        StoreWordDisp(TargetReg(kSp), outs_offset, low_reg);
+        Store32Disp(TargetReg(kSp), outs_offset, low_reg);
         next_use++;
       }
       call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
@@ -811,7 +816,7 @@
       // In lieu of generating a check for kArg1 being null, we need to
       // perform a load when doing implicit checks.
       RegStorage tmp = AllocTemp();
-      LoadWordDisp(TargetReg(kArg1), 0, tmp);
+      Load32Disp(TargetReg(kArg1), 0, tmp);
       MarkPossibleNullPointerException(info->opt_flags);
       FreeTemp(tmp);
     }
@@ -862,7 +867,7 @@
     } else {
       loc = UpdateLoc(loc);
       if ((next_arg >= 3) && (loc.location == kLocPhysReg)) {
-        StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kWord);
+        Store32Disp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg);
       }
       next_arg++;
     }
@@ -997,8 +1002,8 @@
         RegStorage temp = TargetReg(kArg3);
 
         // Now load the argument VR and store to the outs.
-        LoadWordDisp(TargetReg(kSp), current_src_offset, temp);
-        StoreWordDisp(TargetReg(kSp), current_dest_offset, temp);
+        Load32Disp(TargetReg(kSp), current_src_offset, temp);
+        Store32Disp(TargetReg(kSp), current_dest_offset, temp);
       }
 
       current_src_offset += bytes_to_move;
@@ -1027,7 +1032,7 @@
       // In lieu of generating a check for kArg1 being null, we need to
       // perform a load when doing implicit checks.
       RegStorage tmp = AllocTemp();
-      LoadWordDisp(TargetReg(kArg1), 0, tmp);
+      Load32Disp(TargetReg(kArg1), 0, tmp);
       MarkPossibleNullPointerException(info->opt_flags);
       FreeTemp(tmp);
     }
@@ -1087,12 +1092,12 @@
     reg_ptr = AllocTemp();
     if (range_check) {
       reg_max = AllocTemp();
-      LoadWordDisp(rl_obj.reg, count_offset, reg_max);
+      Load32Disp(rl_obj.reg, count_offset, reg_max);
       MarkPossibleNullPointerException(info->opt_flags);
     }
-    LoadWordDisp(rl_obj.reg, offset_offset, reg_off);
+    Load32Disp(rl_obj.reg, offset_offset, reg_off);
     MarkPossibleNullPointerException(info->opt_flags);
-    LoadWordDisp(rl_obj.reg, value_offset, reg_ptr);
+    Load32Disp(rl_obj.reg, value_offset, reg_ptr);
     if (range_check) {
       // Set up a launch pad to allow retry in case of bounds violation */
       OpRegReg(kOpCmp, rl_idx.reg, reg_max);
@@ -1115,8 +1120,8 @@
     }
     reg_off = AllocTemp();
     reg_ptr = AllocTemp();
-    LoadWordDisp(rl_obj.reg, offset_offset, reg_off);
-    LoadWordDisp(rl_obj.reg, value_offset, reg_ptr);
+    Load32Disp(rl_obj.reg, offset_offset, reg_off);
+    Load32Disp(rl_obj.reg, value_offset, reg_ptr);
   }
   if (rl_idx.is_const) {
     OpRegImm(kOpAdd, reg_off, mir_graph_->ConstantValue(rl_idx.orig_sreg));
@@ -1158,7 +1163,7 @@
   RegLocation rl_dest = InlineTarget(info);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   GenNullCheck(rl_obj.reg, info->opt_flags);
-  LoadWordDisp(rl_obj.reg, mirror::String::CountOffset().Int32Value(), rl_result.reg);
+  Load32Disp(rl_obj.reg, mirror::String::CountOffset().Int32Value(), rl_result.reg);
   MarkPossibleNullPointerException(info->opt_flags);
   if (is_empty) {
     // dst = (dst == 0);
@@ -1182,9 +1187,9 @@
     return false;
   }
   RegLocation rl_src_i = info->args[0];
-  RegLocation rl_dest = (size == kLong) ? InlineTargetWide(info) : InlineTarget(info);  // result reg
+  RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);  // result reg
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  if (size == kLong) {
+  if (size == k64) {
     RegLocation rl_i = LoadValueWide(rl_src_i, kCoreReg);
     RegStorage r_i_low = rl_i.reg.GetLow();
     if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) {
@@ -1199,8 +1204,8 @@
     }
     StoreValueWide(rl_dest, rl_result);
   } else {
-    DCHECK(size == kWord || size == kSignedHalf);
-    OpKind op = (size == kWord) ? kOpRev : kOpRevsh;
+    DCHECK(size == k32 || size == kSignedHalf);
+    OpKind op = (size == k32) ? kOpRev : kOpRevsh;
     RegLocation rl_i = LoadValue(rl_src_i, kCoreReg);
     OpRegReg(op, rl_result.reg, rl_i.reg);
     StoreValue(rl_dest, rl_result);
@@ -1402,7 +1407,7 @@
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   ThreadOffset<4> offset = Thread::PeerOffset<4>();
   if (cu_->instruction_set == kThumb2 || cu_->instruction_set == kMips) {
-    LoadWordDisp(TargetReg(kSelf), offset.Int32Value(), rl_result.reg);
+    Load32Disp(TargetReg(kSelf), offset.Int32Value(), rl_result.reg);
   } else {
     CHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
     reinterpret_cast<X86Mir2Lir*>(this)->OpRegThreadMem(kOpMov, rl_result.reg.GetReg(), offset);
@@ -1429,7 +1434,7 @@
   if (is_long) {
     if (cu_->instruction_set == kX86) {
       LoadBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_result.reg.GetLow(),
-                          rl_result.reg.GetHigh(), kLong, INVALID_SREG);
+                          rl_result.reg.GetHigh(), k64, INVALID_SREG);
     } else {
       RegStorage rl_temp_offset = AllocTemp();
       OpRegRegReg(kOpAdd, rl_temp_offset, rl_object.reg, rl_offset.reg);
@@ -1437,7 +1442,7 @@
       FreeTemp(rl_temp_offset.GetReg());
     }
   } else {
-    LoadBaseIndexed(rl_object.reg, rl_offset.reg, rl_result.reg, 0, kWord);
+    LoadBaseIndexed(rl_object.reg, rl_offset.reg, rl_result.reg, 0, k32);
   }
 
   if (is_volatile) {
@@ -1477,7 +1482,7 @@
     rl_value = LoadValueWide(rl_src_value, kCoreReg);
     if (cu_->instruction_set == kX86) {
       StoreBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_value.reg.GetLow(),
-                           rl_value.reg.GetHigh(), kLong, INVALID_SREG);
+                           rl_value.reg.GetHigh(), k64, INVALID_SREG);
     } else {
       RegStorage rl_temp_offset = AllocTemp();
       OpRegRegReg(kOpAdd, rl_temp_offset, rl_object.reg, rl_offset.reg);
@@ -1486,7 +1491,7 @@
     }
   } else {
     rl_value = LoadValue(rl_src_value, kCoreReg);
-    StoreBaseIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0, kWord);
+    StoreBaseIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0, k32);
   }
 
   // Free up the temp early, to ensure x86 doesn't run out of temporaries in MarkGCCard.
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index 208eadd..9808f7f 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -65,7 +65,7 @@
         OpRegCopy(RegStorage::Solo32(promotion_map_[pmap_index].core_reg), temp_reg);
       } else {
         // Lives in the frame, need to store.
-        StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), temp_reg, kWord);
+        StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), temp_reg, k32);
       }
       if (!zero_reg.Valid()) {
         FreeTemp(temp_reg);
@@ -74,15 +74,6 @@
   }
 }
 
-/* Load a word at base + displacement.  Displacement must be word multiple */
-LIR* Mir2Lir::LoadWordDisp(RegStorage r_base, int displacement, RegStorage r_dest) {
-  return LoadBaseDisp(r_base, displacement, r_dest, kWord, INVALID_SREG);
-}
-
-LIR* Mir2Lir::StoreWordDisp(RegStorage r_base, int displacement, RegStorage r_src) {
-  return StoreBaseDisp(r_base, displacement, r_src, kWord);
-}
-
 /*
  * Load a Dalvik register into a physical register.  Take care when
  * using this routine, as it doesn't perform any bookkeeping regarding
@@ -93,11 +84,17 @@
   if (rl_src.location == kLocPhysReg) {
     OpRegCopy(r_dest, rl_src.reg);
   } else if (IsInexpensiveConstant(rl_src)) {
+    // On 64-bit targets, will sign extend.  Make sure constant reference is always NULL.
+    DCHECK(!rl_src.ref || (mir_graph_->ConstantValue(rl_src) == 0));
     LoadConstantNoClobber(r_dest, mir_graph_->ConstantValue(rl_src));
   } else {
     DCHECK((rl_src.location == kLocDalvikFrame) ||
            (rl_src.location == kLocCompilerTemp));
-    LoadWordDisp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest);
+    if (rl_src.ref) {
+      LoadRefDisp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest);
+    } else {
+      Load32Disp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest);
+    }
   }
 }
 
@@ -194,7 +191,7 @@
   ResetDefLoc(rl_dest);
   if (IsDirty(rl_dest.reg) && oat_live_out(rl_dest.s_reg_low)) {
     def_start = last_lir_insn_;
-    StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, kWord);
+    Store32Disp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg);
     MarkClean(rl_dest);
     def_end = last_lir_insn_;
     if (!rl_dest.ref) {
@@ -306,7 +303,7 @@
   if (IsDirty(rl_dest.reg) &&
       oat_live_out(rl_dest.s_reg_low)) {
     LIR *def_start = last_lir_insn_;
-    StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, kWord);
+    Store32Disp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg);
     MarkClean(rl_dest);
     LIR *def_end = last_lir_insn_;
     if (!rl_dest.ref) {
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index a938478..a237ac7 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -112,11 +112,11 @@
   RegStorage r_key = AllocTemp();
   LIR* loop_label = NewLIR0(kPseudoTargetLabel);
   LIR* exit_branch = OpCmpBranch(kCondEq, r_base, r_end, NULL);
-  LoadWordDisp(r_base, 0, r_key);
+  Load32Disp(r_base, 0, r_key);
   OpRegImm(kOpAdd, r_base, 8);
   OpCmpBranch(kCondNe, rl_src.reg, r_key, loop_label);
   RegStorage r_disp = AllocTemp();
-  LoadWordDisp(r_base, -4, r_disp);
+  Load32Disp(r_base, -4, r_disp);
   OpRegRegReg(kOpAdd, rs_rRA, rs_rRA, r_disp);
   OpReg(kOpBx, rs_rRA);
 
@@ -200,7 +200,7 @@
 
   // Load the displacement from the switch table
   RegStorage r_disp = AllocTemp();
-  LoadBaseIndexed(r_base, r_key, r_disp, 2, kWord);
+  LoadBaseIndexed(r_base, r_key, r_disp, 2, k32);
 
   // Add to rAP and go
   OpRegRegReg(kOpAdd, rs_rRA, rs_rRA, r_disp);
@@ -263,9 +263,9 @@
   int ex_offset = Thread::ExceptionOffset<4>().Int32Value();
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage reset_reg = AllocTemp();
-  LoadWordDisp(rs_rMIPS_SELF, ex_offset, rl_result.reg);
+  Load32Disp(rs_rMIPS_SELF, ex_offset, rl_result.reg);
   LoadConstant(reset_reg, 0);
-  StoreWordDisp(rs_rMIPS_SELF, ex_offset, reset_reg);
+  Store32Disp(rs_rMIPS_SELF, ex_offset, reset_reg);
   FreeTemp(reset_reg);
   StoreValue(rl_dest, rl_result);
 }
@@ -277,6 +277,7 @@
   RegStorage reg_card_base = AllocTemp();
   RegStorage reg_card_no = AllocTemp();
   LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
+  // NOTE: native pointer.
   LoadWordDisp(rs_rMIPS_SELF, Thread::CardTableOffset<4>().Int32Value(), reg_card_base);
   OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
   StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte);
@@ -310,7 +311,7 @@
   RegStorage new_sp = AllocTemp();
   if (!skip_overflow_check) {
     /* Load stack limit */
-    LoadWordDisp(rs_rMIPS_SELF, Thread::StackEndOffset<4>().Int32Value(), check_reg);
+    Load32Disp(rs_rMIPS_SELF, Thread::StackEndOffset<4>().Int32Value(), check_reg);
   }
   /* Spill core callee saves */
   SpillCoreRegs();
@@ -328,7 +329,7 @@
         m2l_->ResetDefTracking();
         GenerateTargetLabel();
         // LR is offset 0 since we push in reverse order.
-        m2l_->LoadWordDisp(rs_rMIPS_SP, 0, rs_rRA);
+        m2l_->Load32Disp(rs_rMIPS_SP, 0, rs_rRA);
         m2l_->OpRegImm(kOpAdd, rs_rMIPS_SP, sp_displace_);
         m2l_->ClobberCallerSave();
         ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow);
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 2375720..b38a8d2 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -480,7 +480,7 @@
   rl_array = LoadValue(rl_array, kCoreReg);
   rl_index = LoadValue(rl_index, kCoreReg);
 
-  if (size == kLong || size == kDouble) {
+  if (size == k64 || size == kDouble) {
     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
   } else {
     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
@@ -495,12 +495,12 @@
   if (needs_range_check) {
     reg_len = AllocTemp();
     /* Get len */
-    LoadWordDisp(rl_array.reg, len_offset, reg_len);
+    Load32Disp(rl_array.reg, len_offset, reg_len);
   }
   /* reg_ptr -> array data */
   OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
   FreeTemp(rl_array.reg.GetReg());
-  if ((size == kLong) || (size == kDouble)) {
+  if ((size == k64) || (size == kDouble)) {
     if (scale) {
       RegStorage r_new_index = AllocTemp();
       OpRegRegImm(kOpLsl, r_new_index, rl_index.reg, scale);
@@ -544,7 +544,7 @@
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
 
-  if (size == kLong || size == kDouble) {
+  if (size == k64 || size == kDouble) {
     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
   } else {
     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
@@ -572,12 +572,12 @@
     reg_len = AllocTemp();
     // NOTE: max live temps(4) here.
     /* Get len */
-    LoadWordDisp(rl_array.reg, len_offset, reg_len);
+    Load32Disp(rl_array.reg, len_offset, reg_len);
   }
   /* reg_ptr -> array data */
   OpRegImm(kOpAdd, reg_ptr, data_offset);
   /* at this point, reg_ptr points to array, 2 live temps */
-  if ((size == kLong) || (size == kDouble)) {
+  if ((size == k64) || (size == kDouble)) {
     // TUNING: specific wide routine that can handle fp regs
     if (scale) {
       RegStorage r_new_index = AllocTemp();
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 3e02fae..7f4cd5e 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -369,7 +369,7 @@
   if (info->live && info->dirty) {
     info->dirty = false;
     int v_reg = mir_graph_->SRegToVReg(info->s_reg);
-    StoreBaseDisp(rs_rMIPS_SP, VRegOffset(v_reg), reg, kWord);
+    Store32Disp(rs_rMIPS_SP, VRegOffset(v_reg), reg);
   }
 }
 
@@ -531,12 +531,14 @@
  * there is a trap in the shadow.  Allocate a temp register.
  */
 RegStorage MipsMir2Lir::LoadHelper(ThreadOffset<4> offset) {
+  // NOTE: native pointer.
   LoadWordDisp(rs_rMIPS_SELF, offset.Int32Value(), rs_rT9);
   return rs_rT9;
 }
 
 LIR* MipsMir2Lir::CheckSuspendUsingLoad() {
   RegStorage tmp = AllocTemp();
+  // NOTE: native pointer.
   LoadWordDisp(rs_rMIPS_SELF, Thread::ThreadSuspendTriggerOffset<4>().Int32Value(), tmp);
   LIR *inst = LoadWordDisp(tmp, 0, tmp);
   FreeTemp(tmp);
@@ -553,7 +555,7 @@
   for (int reg = 0; mask; mask >>= 1, reg++) {
     if (mask & 0x1) {
       offset -= 4;
-      StoreWordDisp(rs_rMIPS_SP, offset, RegStorage::Solo32(reg));
+      Store32Disp(rs_rMIPS_SP, offset, RegStorage::Solo32(reg));
     }
   }
 }
@@ -567,7 +569,7 @@
   for (int reg = 0; mask; mask >>= 1, reg++) {
     if (mask & 0x1) {
       offset -= 4;
-      LoadWordDisp(rs_rMIPS_SP, offset, RegStorage::Solo32(reg));
+      Load32Disp(rs_rMIPS_SP, offset, RegStorage::Solo32(reg));
     }
   }
   OpRegImm(kOpAdd, rs_rSP, frame_size_);
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index c959510..12775e1 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -357,11 +357,11 @@
 
   if (MIPS_FPREG(r_dest.GetReg())) {
     DCHECK(MIPS_SINGLEREG(r_dest.GetReg()));
-    DCHECK((size == kWord) || (size == kSingle));
+    DCHECK((size == k32) || (size == kSingle));
     size = kSingle;
   } else {
     if (size == kSingle)
-      size = kWord;
+      size = k32;
   }
 
   if (!scale) {
@@ -375,7 +375,8 @@
     case kSingle:
       opcode = kMipsFlwc1;
       break;
-    case kWord:
+    case k32:
+    case kReference:
       opcode = kMipsLw;
       break;
     case kUnsignedHalf:
@@ -408,11 +409,11 @@
 
   if (MIPS_FPREG(r_src.GetReg())) {
     DCHECK(MIPS_SINGLEREG(r_src.GetReg()));
-    DCHECK((size == kWord) || (size == kSingle));
+    DCHECK((size == k32) || (size == kSingle));
     size = kSingle;
   } else {
     if (size == kSingle)
-      size = kWord;
+      size = k32;
   }
 
   if (!scale) {
@@ -426,7 +427,8 @@
     case kSingle:
       opcode = kMipsFswc1;
       break;
-    case kWord:
+    case k32:
+    case kReference:
       opcode = kMipsSw;
       break;
     case kUnsignedHalf:
@@ -463,7 +465,7 @@
   bool pair = false;
 
   switch (size) {
-    case kLong:
+    case k64:
     case kDouble:
       pair = true;
       opcode = kMipsLw;
@@ -481,8 +483,9 @@
       short_form = IS_SIMM16_2WORD(displacement);
       DCHECK_EQ((displacement & 0x3), 0);
       break;
-    case kWord:
+    case k32:
     case kSingle:
+    case kReference:
       opcode = kMipsLw;
       if (MIPS_FPREG(r_dest.GetReg())) {
         opcode = kMipsFlwc1;
@@ -544,13 +547,17 @@
 
 LIR* MipsMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
                                OpSize size, int s_reg) {
+  // TODO: base this on target.
+  if (size == kWord) {
+    size = k32;
+  }
   return LoadBaseDispBody(r_base, displacement, r_dest, RegStorage::InvalidReg(), size,
                           s_reg);
 }
 
 LIR* MipsMir2Lir::LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest,
                                    int s_reg) {
-  return LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), r_dest.GetHigh(), kLong, s_reg);
+  return LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), r_dest.GetHigh(), k64, s_reg);
 }
 
 LIR* MipsMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement,
@@ -563,7 +570,7 @@
   bool pair = false;
 
   switch (size) {
-    case kLong:
+    case k64:
     case kDouble:
       pair = true;
       opcode = kMipsSw;
@@ -580,8 +587,9 @@
       short_form = IS_SIMM16_2WORD(displacement);
       DCHECK_EQ((displacement & 0x3), 0);
       break;
-    case kWord:
+    case k32:
     case kSingle:
+    case kReference:
       opcode = kMipsSw;
       if (MIPS_FPREG(r_src.GetReg())) {
         opcode = kMipsFswc1;
@@ -635,11 +643,15 @@
 
 LIR* MipsMir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
                                 OpSize size) {
+  // TODO: base this on target.
+  if (size == kWord) {
+    size = k32;
+  }
   return StoreBaseDispBody(r_base, displacement, r_src, RegStorage::InvalidReg(), size);
 }
 
 LIR* MipsMir2Lir::StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src) {
-  return StoreBaseDispBody(r_base, displacement, r_src.GetLow(), r_src.GetHigh(), kLong);
+  return StoreBaseDispBody(r_base, displacement, r_src.GetLow(), r_src.GetHigh(), k64);
 }
 
 LIR* MipsMir2Lir::OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) {
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 6fcdf70..ca51872 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -63,14 +63,14 @@
     } else {
       reg_arg_high = AllocTemp();
       int offset_high = offset + sizeof(uint32_t);
-      LoadWordDisp(TargetReg(kSp), offset_high, reg_arg_high);
+      Load32Disp(TargetReg(kSp), offset_high, reg_arg_high);
     }
   }
 
   // If the low part is not in a register yet, we need to load it.
   if (!reg_arg_low.Valid()) {
     reg_arg_low = AllocTemp();
-    LoadWordDisp(TargetReg(kSp), offset, reg_arg_low);
+    Load32Disp(TargetReg(kSp), offset, reg_arg_low);
   }
 
   if (wide) {
@@ -96,7 +96,7 @@
     if (reg.Valid()) {
       OpRegCopy(rl_dest.reg, reg);
     } else {
-      LoadWordDisp(TargetReg(kSp), offset, rl_dest.reg);
+      Load32Disp(TargetReg(kSp), offset, rl_dest.reg);
     }
   } else {
     RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
@@ -107,10 +107,10 @@
     } else if (reg_arg_low.Valid() && !reg_arg_high.Valid()) {
       OpRegCopy(rl_dest.reg, reg_arg_low);
       int offset_high = offset + sizeof(uint32_t);
-      LoadWordDisp(TargetReg(kSp), offset_high, rl_dest.reg.GetHigh());
+      Load32Disp(TargetReg(kSp), offset_high, rl_dest.reg.GetHigh());
     } else if (!reg_arg_low.Valid() && reg_arg_high.Valid()) {
       OpRegCopy(rl_dest.reg.GetHigh(), reg_arg_high);
-      LoadWordDisp(TargetReg(kSp), offset, rl_dest.reg.GetLow());
+      Load32Disp(TargetReg(kSp), offset, rl_dest.reg.GetLow());
     } else {
       LoadBaseDispWide(TargetReg(kSp), offset, rl_dest.reg, INVALID_SREG);
     }
@@ -137,7 +137,7 @@
   if (wide) {
     LoadBaseDispWide(reg_obj, data.field_offset, rl_dest.reg, INVALID_SREG);
   } else {
-    LoadWordDisp(reg_obj, data.field_offset, rl_dest.reg);
+    Load32Disp(reg_obj, data.field_offset, rl_dest.reg);
   }
   if (data.is_volatile) {
     // Without context sensitive analysis, we must issue the most conservative barriers.
@@ -175,7 +175,7 @@
   if (wide) {
     StoreBaseDispWide(reg_obj, data.field_offset, reg_src);
   } else {
-    StoreBaseDisp(reg_obj, data.field_offset, reg_src, kWord);
+    Store32Disp(reg_obj, data.field_offset, reg_src);
   }
   if (data.is_volatile) {
     // A load might follow the volatile store so insert a StoreLoad barrier.
@@ -449,7 +449,7 @@
       rl_src[0] = LoadValue(rl_src[0], kCoreReg);
       GenNullCheck(rl_src[0].reg, opt_flags);
       rl_result = EvalLoc(rl_dest, kCoreReg, true);
-      LoadWordDisp(rl_src[0].reg, len_offset, rl_result.reg);
+      Load32Disp(rl_src[0].reg, len_offset, rl_result.reg);
       MarkPossibleNullPointerException(opt_flags);
       StoreValue(rl_dest, rl_result);
       break;
@@ -562,11 +562,13 @@
       }
 
     case Instruction::AGET_WIDE:
-      GenArrayGet(opt_flags, kLong, rl_src[0], rl_src[1], rl_dest, 3);
+      GenArrayGet(opt_flags, k64, rl_src[0], rl_src[1], rl_dest, 3);
+      break;
+    case Instruction::AGET_OBJECT:
+      GenArrayGet(opt_flags, kReference, rl_src[0], rl_src[1], rl_dest, 2);
       break;
     case Instruction::AGET:
-    case Instruction::AGET_OBJECT:
-      GenArrayGet(opt_flags, kWord, rl_src[0], rl_src[1], rl_dest, 2);
+      GenArrayGet(opt_flags, k32, rl_src[0], rl_src[1], rl_dest, 2);
       break;
     case Instruction::AGET_BOOLEAN:
       GenArrayGet(opt_flags, kUnsignedByte, rl_src[0], rl_src[1], rl_dest, 0);
@@ -581,10 +583,10 @@
       GenArrayGet(opt_flags, kSignedHalf, rl_src[0], rl_src[1], rl_dest, 1);
       break;
     case Instruction::APUT_WIDE:
-      GenArrayPut(opt_flags, kLong, rl_src[1], rl_src[2], rl_src[0], 3, false);
+      GenArrayPut(opt_flags, k64, rl_src[1], rl_src[2], rl_src[0], 3, false);
       break;
     case Instruction::APUT:
-      GenArrayPut(opt_flags, kWord, rl_src[1], rl_src[2], rl_src[0], 2, false);
+      GenArrayPut(opt_flags, k32, rl_src[1], rl_src[2], rl_src[0], 2, false);
       break;
     case Instruction::APUT_OBJECT: {
       bool is_null = mir_graph_->IsConstantNullRef(rl_src[0]);
@@ -597,7 +599,7 @@
       if (is_null || is_safe) {
         // Store of constant null doesn't require an assignability test and can be generated inline
         // without fixed register usage or a card mark.
-        GenArrayPut(opt_flags, kWord, rl_src[1], rl_src[2], rl_src[0], 2, !is_null);
+        GenArrayPut(opt_flags, kReference, rl_src[1], rl_src[2], rl_src[0], 2, !is_null);
       } else {
         GenArrayObjPut(opt_flags, rl_src[1], rl_src[2], rl_src[0]);
       }
@@ -613,15 +615,15 @@
       break;
 
     case Instruction::IGET_OBJECT:
-      GenIGet(mir, opt_flags, kWord, rl_dest, rl_src[0], false, true);
+      GenIGet(mir, opt_flags, kReference, rl_dest, rl_src[0], false, true);
       break;
 
     case Instruction::IGET_WIDE:
-      GenIGet(mir, opt_flags, kLong, rl_dest, rl_src[0], true, false);
+      GenIGet(mir, opt_flags, k64, rl_dest, rl_src[0], true, false);
       break;
 
     case Instruction::IGET:
-      GenIGet(mir, opt_flags, kWord, rl_dest, rl_src[0], false, false);
+      GenIGet(mir, opt_flags, k32, rl_dest, rl_src[0], false, false);
       break;
 
     case Instruction::IGET_CHAR:
@@ -638,15 +640,15 @@
       break;
 
     case Instruction::IPUT_WIDE:
-      GenIPut(mir, opt_flags, kLong, rl_src[0], rl_src[1], true, false);
+      GenIPut(mir, opt_flags, k64, rl_src[0], rl_src[1], true, false);
       break;
 
     case Instruction::IPUT_OBJECT:
-      GenIPut(mir, opt_flags, kWord, rl_src[0], rl_src[1], false, true);
+      GenIPut(mir, opt_flags, kReference, rl_src[0], rl_src[1], false, true);
       break;
 
     case Instruction::IPUT:
-      GenIPut(mir, opt_flags, kWord, rl_src[0], rl_src[1], false, false);
+      GenIPut(mir, opt_flags, k32, rl_src[0], rl_src[1], false, false);
       break;
 
     case Instruction::IPUT_BOOLEAN:
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 1f69eb5..d0762eb 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -109,6 +109,11 @@
 #define REG_USE23            (REG_USE2 | REG_USE3)
 #define REG_USE123           (REG_USE1 | REG_USE2 | REG_USE3)
 
+// TODO: #includes need a cleanup
+#ifndef INVALID_SREG
+#define INVALID_SREG (-1)
+#endif
+
 struct BasicBlock;
 struct CallInfo;
 struct CompilationUnit;
@@ -725,14 +730,42 @@
     RegLocation LoadCurrMethod();
     void LoadCurrMethodDirect(RegStorage r_tgt);
     LIR* LoadConstant(RegStorage r_dest, int value);
-    LIR* LoadWordDisp(RegStorage r_base, int displacement, RegStorage r_dest);
+    // Natural word size.
+    LIR* LoadWordDisp(RegStorage r_base, int displacement, RegStorage r_dest) {
+      return LoadBaseDisp(r_base, displacement, r_dest, kWord, INVALID_SREG);
+    }
+    // Load 32 bits, regardless of target.
+    LIR* Load32Disp(RegStorage r_base, int displacement, RegStorage r_dest)  {
+      return LoadBaseDisp(r_base, displacement, r_dest, k32, INVALID_SREG);
+    }
+    // Load a reference at base + displacement and decompress into register.
+    LIR* LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest) {
+      return LoadBaseDisp(r_base, displacement, r_dest, kReference, INVALID_SREG);
+    }
+    // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
     RegLocation LoadValue(RegLocation rl_src, RegisterClass op_kind);
+    // Load Dalvik value with 64-bit memory storage.
     RegLocation LoadValueWide(RegLocation rl_src, RegisterClass op_kind);
+    // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
     void LoadValueDirect(RegLocation rl_src, RegStorage r_dest);
+    // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
     void LoadValueDirectFixed(RegLocation rl_src, RegStorage r_dest);
+    // Load Dalvik value with 64-bit memory storage.
     void LoadValueDirectWide(RegLocation rl_src, RegStorage r_dest);
+    // Load Dalvik value with 64-bit memory storage.
     void LoadValueDirectWideFixed(RegLocation rl_src, RegStorage r_dest);
-    LIR* StoreWordDisp(RegStorage r_base, int displacement, RegStorage r_src);
+    // Store an item of natural word size.
+    LIR* StoreWordDisp(RegStorage r_base, int displacement, RegStorage r_src) {
+      return StoreBaseDisp(r_base, displacement, r_src, kWord);
+    }
+    // Store an uncompressed reference into a compressed 32-bit container.
+    LIR* StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src) {
+      return StoreBaseDisp(r_base, displacement, r_src, kReference);
+    }
+    // Store 32 bits, regardless of target.
+    LIR* Store32Disp(RegStorage r_base, int displacement, RegStorage r_src) {
+      return StoreBaseDisp(r_base, displacement, r_src, k32);
+    }
 
     /**
      * @brief Used to do the final store in the destination as per bytecode semantics.
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 729b30d..746ae9e 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -251,7 +251,8 @@
     // We have been asked to save the address of the method start for later use.
     setup_method_address_[0] = NewLIR1(kX86StartOfMethod, rX86_ARG0);
     int displacement = SRegOffset(base_of_code_->s_reg_low);
-    setup_method_address_[1] = StoreBaseDisp(rs_rX86_SP, displacement, rs_rX86_ARG0, kWord);
+    // Native pointer - must be natural word size.
+    setup_method_address_[1] = StoreWordDisp(rs_rX86_SP, displacement, rs_rX86_ARG0);
   }
 
   FreeTemp(rX86_ARG0);
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index ee5387f..f7b0c9d 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -193,7 +193,7 @@
     } else {
       rl_result = EvalLoc(rl_dest, kFPReg, true);
 
-      LoadWordDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg);
+      Load32Disp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg);
 
       StoreFinalValue(rl_dest, rl_result);
     }
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index a23a3bf..4dffa43 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -704,15 +704,15 @@
 bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
   RegLocation rl_src_address = info->args[0];  // long address
   rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
-  RegLocation rl_dest = size == kLong ? InlineTargetWide(info) : InlineTarget(info);
+  RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info);
   RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  if (size == kLong) {
+  if (size == k64) {
     // Unaligned access is allowed on x86.
     LoadBaseDispWide(rl_address.reg, 0, rl_result.reg, INVALID_SREG);
     StoreValueWide(rl_dest, rl_result);
   } else {
-    DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord);
+    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
     // Unaligned access is allowed on x86.
     LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, INVALID_SREG);
     StoreValue(rl_dest, rl_result);
@@ -725,12 +725,12 @@
   rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
   RegLocation rl_src_value = info->args[2];  // [size] value
   RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
-  if (size == kLong) {
+  if (size == k64) {
     // Unaligned access is allowed on x86.
     RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg);
     StoreBaseDispWide(rl_address.reg, 0, rl_value.reg);
   } else {
-    DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord);
+    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
     // Unaligned access is allowed on x86.
     RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
     StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size);
@@ -780,6 +780,7 @@
     int srcObjSp = IsInReg(this, rl_src_obj, rs_rSI) ? 0
                 : (IsInReg(this, rl_src_obj, rs_rDI) ? 4
                 : (SRegOffset(rl_src_obj.s_reg_low) + push_offset));
+    // FIXME: needs 64-bit update.
     LoadWordDisp(TargetReg(kSp), srcObjSp, rs_rDI);
     int srcOffsetSp = IsInReg(this, rl_src_offset, rs_rSI) ? 0
                    : (IsInReg(this, rl_src_offset, rs_rDI) ? 4
@@ -1024,7 +1025,7 @@
       NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
       break;
     case 1:
-      LoadBaseDisp(rs_rX86_SP, displacement, dest, kWord, sreg);
+      LoadBaseDisp(rs_rX86_SP, displacement, dest, k32, sreg);
       break;
     default:
       m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(), rX86_SP,
@@ -1130,7 +1131,7 @@
     NewLIR2(kX86Mov32RR, r1, rl_src1.reg.GetHighReg());
   } else {
     LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1,
-                 kWord, GetSRegHi(rl_src1.s_reg_low));
+                 k32, GetSRegHi(rl_src1.s_reg_low));
   }
 
   if (is_square) {
@@ -1153,7 +1154,7 @@
       NewLIR2(kX86Mov32RR, r0, rl_src2.reg.GetHighReg());
     } else {
       LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0,
-                   kWord, GetSRegHi(rl_src2.s_reg_low));
+                   k32, GetSRegHi(rl_src2.s_reg_low));
     }
 
     // EAX <- EAX * 1L  (2H * 1L)
@@ -1185,7 +1186,7 @@
     NewLIR2(kX86Mov32RR, r0, rl_src2.reg.GetLowReg());
   } else {
     LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0,
-                 kWord, rl_src2.s_reg_low);
+                 k32, rl_src2.s_reg_low);
   }
 
   // EDX:EAX <- 2L * 1L (double precision)
@@ -1405,7 +1406,7 @@
   rl_array = LoadValue(rl_array, kCoreReg);
 
   int data_offset;
-  if (size == kLong || size == kDouble) {
+  if (size == k64 || size == kDouble) {
     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
   } else {
     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
@@ -1434,7 +1435,7 @@
     }
   }
   rl_result = EvalLoc(rl_dest, reg_class, true);
-  if ((size == kLong) || (size == kDouble)) {
+  if ((size == k64) || (size == kDouble)) {
     LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg.GetLow(),
                         rl_result.reg.GetHigh(), size, INVALID_SREG);
     StoreValueWide(rl_dest, rl_result);
@@ -1455,7 +1456,7 @@
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
 
-  if (size == kLong || size == kDouble) {
+  if (size == k64 || size == kDouble) {
     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
   } else {
     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
@@ -1484,7 +1485,7 @@
       GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
     }
   }
-  if ((size == kLong) || (size == kDouble)) {
+  if ((size == k64) || (size == kDouble)) {
     rl_src = LoadValueWide(rl_src, reg_class);
   } else {
     rl_src = LoadValue(rl_src, reg_class);
@@ -1871,22 +1872,22 @@
 
   if (rl_method.location == kLocPhysReg) {
     if (use_declaring_class) {
-      LoadWordDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+      LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
                    check_class);
     } else {
-      LoadWordDisp(rl_method.reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+      LoadRefDisp(rl_method.reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                    check_class);
-      LoadWordDisp(check_class, offset_of_type, check_class);
+      LoadRefDisp(check_class, offset_of_type, check_class);
     }
   } else {
     LoadCurrMethodDirect(check_class);
     if (use_declaring_class) {
-      LoadWordDisp(check_class, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+      LoadRefDisp(check_class, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
                    check_class);
     } else {
-      LoadWordDisp(check_class, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+      LoadRefDisp(check_class, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                    check_class);
-      LoadWordDisp(check_class, offset_of_type, check_class);
+      LoadRefDisp(check_class, offset_of_type, check_class);
     }
   }
 
@@ -1927,17 +1928,17 @@
     LoadValueDirectFixed(rl_src, TargetReg(kArg0));
   } else if (use_declaring_class) {
     LoadValueDirectFixed(rl_src, TargetReg(kArg0));
-    LoadWordDisp(TargetReg(kArg1), mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+    LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
                  class_reg);
   } else {
     // Load dex cache entry into class_reg (kArg2).
     LoadValueDirectFixed(rl_src, TargetReg(kArg0));
-    LoadWordDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+    LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                  class_reg);
     int32_t offset_of_type =
         mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() + (sizeof(mirror::Class*)
         * type_idx);
-    LoadWordDisp(class_reg, offset_of_type, class_reg);
+    LoadRefDisp(class_reg, offset_of_type, class_reg);
     if (!can_assume_type_is_in_dex_cache) {
       // Need to test presence of type in dex cache at runtime.
       LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL);
@@ -1961,7 +1962,7 @@
 
   /* Load object->klass_. */
   DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
-  LoadWordDisp(TargetReg(kArg0),  mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1));
+  LoadRefDisp(TargetReg(kArg0),  mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1));
   /* kArg0 is ref, kArg1 is ref->klass_, kArg2 is class. */
   LIR* branchover = nullptr;
   if (type_known_final) {
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 5a8ad7a..209285a 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -369,12 +369,13 @@
 }
 
 void X86Mir2Lir::FlushReg(RegStorage reg) {
+  // FIXME: need to handle 32 bits in 64-bit register as well as wide values held in single reg.
   DCHECK(!reg.IsPair());
   RegisterInfo* info = GetRegInfo(reg.GetReg());
   if (info->live && info->dirty) {
     info->dirty = false;
     int v_reg = mir_graph_->SRegToVReg(info->s_reg);
-    StoreBaseDisp(rs_rX86_SP, VRegOffset(v_reg), reg, kWord);
+    StoreBaseDisp(rs_rX86_SP, VRegOffset(v_reg), reg, k32);
   }
 }
 
@@ -1061,7 +1062,7 @@
   NewLIR1(kX86Push32R, rDI);
 
   // Compute the number of words to search in to rCX.
-  LoadWordDisp(rs_rDX, count_offset, rs_rCX);
+  Load32Disp(rs_rDX, count_offset, rs_rCX);
   LIR *length_compare = nullptr;
   int start_value = 0;
   bool is_index_on_stack = false;
@@ -1101,7 +1102,7 @@
       } else {
         // Load the start index from stack, remembering that we pushed EDI.
         int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
-        LoadWordDisp(rs_rX86_SP, displacement, rs_rBX);
+        Load32Disp(rs_rX86_SP, displacement, rs_rBX);
         OpRegReg(kOpXor, rs_rDI, rs_rDI);
         OpRegReg(kOpCmp, rs_rBX, rs_rDI);
         OpCondRegReg(kOpCmov, kCondLt, rs_rBX, rs_rDI);
@@ -1120,8 +1121,8 @@
 
   // Load the address of the string into EBX.
   // The string starts at VALUE(String) + 2 * OFFSET(String) + DATA_OFFSET.
-  LoadWordDisp(rs_rDX, value_offset, rs_rDI);
-  LoadWordDisp(rs_rDX, offset_offset, rs_rBX);
+  Load32Disp(rs_rDX, value_offset, rs_rDI);
+  Load32Disp(rs_rDX, offset_offset, rs_rBX);
   OpLea(rs_rBX, rs_rDI, rs_rBX, 1, data_offset);
 
   // Now compute into EDI where the search will start.
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index e9faa7f..00bebd2 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -554,7 +554,7 @@
   bool is64bit = false;
   X86OpCode opcode = kX86Nop;
   switch (size) {
-    case kLong:
+    case k64:
     case kDouble:
       // TODO: use regstorage attributes here.
       is64bit = true;
@@ -567,8 +567,9 @@
       // TODO: double store is to unaligned address
       DCHECK_EQ((displacement & 0x3), 0);
       break;
-    case kWord:
+    case k32:
     case kSingle:
+    case kReference:  // TODO: update for reference decompression on 64-bit targets.
       opcode = is_array ? kX86Mov32RA : kX86Mov32RM;
       if (X86_FPREG(r_dest.GetReg())) {
         opcode = is_array ? kX86MovssRA : kX86MovssRM;
@@ -669,6 +670,10 @@
 
 LIR* X86Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement,
                   RegStorage r_dest, OpSize size, int s_reg) {
+  // TODO: base this on target.
+  if (size == kWord) {
+    size = k32;
+  }
   return LoadBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement,
                              r_dest, RegStorage::InvalidReg(), size, s_reg);
 }
@@ -676,7 +681,7 @@
 LIR* X86Mir2Lir::LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest,
                                   int s_reg) {
   return LoadBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement,
-                             r_dest.GetLow(), r_dest.GetHigh(), kLong, s_reg);
+                             r_dest.GetLow(), r_dest.GetHigh(), k64, s_reg);
 }
 
 LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
@@ -690,7 +695,7 @@
   bool is64bit = false;
   X86OpCode opcode = kX86Nop;
   switch (size) {
-    case kLong:
+    case k64:
     case kDouble:
       is64bit = true;
       if (X86_FPREG(r_src.GetReg())) {
@@ -702,8 +707,9 @@
       // TODO: double store is to unaligned address
       DCHECK_EQ((displacement & 0x3), 0);
       break;
-    case kWord:
+    case k32:
     case kSingle:
+    case kReference:
       opcode = is_array ? kX86Mov32AR : kX86Mov32MR;
       if (X86_FPREG(r_src.GetReg())) {
         opcode = is_array ? kX86MovssAR : kX86MovssMR;
@@ -763,13 +769,17 @@
 
 LIR* X86Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement,
                                RegStorage r_src, OpSize size) {
-    return StoreBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_src,
-                                RegStorage::InvalidReg(), size, INVALID_SREG);
+  // TODO: base this on target.
+  if (size == kWord) {
+    size = k32;
+  }
+  return StoreBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_src,
+                              RegStorage::InvalidReg(), size, INVALID_SREG);
 }
 
 LIR* X86Mir2Lir::StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src) {
   return StoreBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement,
-                              r_src.GetLow(), r_src.GetHigh(), kLong, INVALID_SREG);
+                              r_src.GetLow(), r_src.GetHigh(), k64, INVALID_SREG);
 }
 
 /*
diff --git a/runtime/arch/arm/jni_entrypoints_arm.S b/runtime/arch/arm/jni_entrypoints_arm.S
index 4a69644..1be34ba 100644
--- a/runtime/arch/arm/jni_entrypoints_arm.S
+++ b/runtime/arch/arm/jni_entrypoints_arm.S
@@ -41,27 +41,3 @@
     pop    {r0, r1, r2, r3, pc}           @ restore regs and return to caller to handle exception
     .cfi_adjust_cfa_offset -20
 END art_jni_dlsym_lookup_stub
-
-    /*
-     * Entry point of native methods when JNI bug compatibility is enabled.
-     */
-    .extern artWorkAroundAppJniBugs
-ENTRY art_work_around_app_jni_bugs
-    @ save registers that may contain arguments and LR that will be crushed by a call
-    push {r0-r3, lr}
-    .save {r0-r3, lr}
-    .cfi_adjust_cfa_offset 16
-    .cfi_rel_offset r0, 0
-    .cfi_rel_offset r1, 4
-    .cfi_rel_offset r2, 8
-    .cfi_rel_offset r3, 12
-    sub sp, #12      @ 3 words of space for alignment
-    mov r0, r9       @ pass Thread::Current
-    mov r1, sp       @ pass SP
-    bl  artWorkAroundAppJniBugs  @ (Thread*, SP)
-    add sp, #12      @ rewind stack
-    mov r12, r0      @ save target address
-    pop {r0-r3, lr}  @ restore possibly modified argument registers
-    .cfi_adjust_cfa_offset -16
-    bx  r12          @ tail call into JNI routine
-END art_work_around_app_jni_bugs
diff --git a/runtime/arch/arm64/jni_entrypoints_arm64.S b/runtime/arch/arm64/jni_entrypoints_arm64.S
index ba783ab..c59a304 100644
--- a/runtime/arch/arm64/jni_entrypoints_arm64.S
+++ b/runtime/arch/arm64/jni_entrypoints_arm64.S
@@ -55,41 +55,3 @@
 1:
   ret             // restore regs and return to caller to handle exception.
 END art_jni_dlsym_lookup_stub
-
-    /*
-     * Entry point of native methods when JNI bug compatibility is enabled.
-     */
-    .extern artWorkAroundAppJniBugs
-ENTRY art_work_around_app_jni_bugs
-  // spill regs.
-  stp   x29, x30, [sp, #-16]!
-  mov   x29, sp
-  stp   d6, d7,   [sp, #-16]!
-  stp   d4, d5,   [sp, #-16]!
-  stp   d2, d3,   [sp, #-16]!
-  stp   d0, d1,   [sp, #-16]!
-  stp   x6, x7,   [sp, #-16]!
-  stp   x4, x5,   [sp, #-16]!
-  stp   x2, x3,   [sp, #-16]!
-  stp   x0, x1,   [sp, #-16]!
-
-  mov   x0, x19   // Thread::Current.
-  mov   x1, sp    // SP.
-  bl    artWorkAroundAppJniBugs   // (Thread*, SP).
-  mov   x17, x0   // save target return.
-
-  // load spill regs.
-  ldp   x0, x1,   [sp], #16
-  ldp   x2, x3,   [sp], #16
-  ldp   x4, x5,   [sp], #16
-  ldp   x6, x7,   [sp], #16
-  ldp   d0, d1,   [sp], #16
-  ldp   d2, d3,   [sp], #16
-  ldp   d4, d5,   [sp], #16
-  ldp   d6, d7,   [sp], #16
-  ldp   x29, x30, [sp], #16
-
-  //tail call into JNI routine.
-  br    x17
-END art_work_around_app_jni_bugs
-
diff --git a/runtime/arch/mips/jni_entrypoints_mips.S b/runtime/arch/mips/jni_entrypoints_mips.S
index f9ca7df..e5f4a79 100644
--- a/runtime/arch/mips/jni_entrypoints_mips.S
+++ b/runtime/arch/mips/jni_entrypoints_mips.S
@@ -54,36 +54,3 @@
     jr    $ra
     nop
 END art_jni_dlsym_lookup_stub
-
-    /*
-     * Entry point of native methods when JNI bug compatibility is enabled.
-     */
-    .extern artWorkAroundAppJniBugs
-ENTRY art_work_around_app_jni_bugs
-    GENERATE_GLOBAL_POINTER
-    # save registers that may contain arguments and LR that will be crushed by a call
-    addiu    $sp, $sp, -32
-    .cfi_adjust_cfa_offset 32
-    sw       $ra, 28($sp)
-    .cfi_rel_offset 31, 28
-    sw       $a3, 24($sp)
-    .cfi_rel_offset 7, 28
-    sw       $a2, 20($sp)
-    .cfi_rel_offset 6, 28
-    sw       $a1, 16($sp)
-    .cfi_rel_offset 5, 28
-    sw       $a0, 12($sp)
-    .cfi_rel_offset 4, 28
-    move     $a0, rSELF       # pass Thread::Current
-    jal      artWorkAroundAppJniBugs  # (Thread*, $sp)
-    move     $a1, $sp         # pass $sp
-    move     $t9, $v0         # save target address
-    lw       $a0, 12($sp)
-    lw       $a1, 16($sp)
-    lw       $a2, 20($sp)
-    lw       $a3, 24($sp)
-    lw       $ra, 28($sp)
-    jr       $t9              # tail call into JNI routine
-    addiu    $sp, $sp, 32
-    .cfi_adjust_cfa_offset -32
-END art_work_around_app_jni_bugs
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index 960c26d..b52941b 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -765,14 +765,9 @@
     // Verify that the current thread is (a) attached and (b) associated with
     // this particular instance of JNIEnv.
     if (soa_.Env() != threadEnv) {
-      if (soa_.Vm()->work_around_app_jni_bugs) {
-        // If we're keeping broken code limping along, we need to suppress the abort...
-        LOG(ERROR) << "APP BUG DETECTED: thread " << *self << " using JNIEnv* from thread " << *soa_.Self();
-      } else {
-        JniAbortF(function_name_, "thread %s using JNIEnv* from thread %s",
-                  ToStr<Thread>(*self).c_str(), ToStr<Thread>(*soa_.Self()).c_str());
-        return;
-      }
+      JniAbortF(function_name_, "thread %s using JNIEnv* from thread %s",
+                ToStr<Thread>(*self).c_str(), ToStr<Thread>(*soa_.Self()).c_str());
+      return;
     }
 
     // Verify that, if this thread previously made a critical "get" call, we
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index cbefa6a..0f2d6a9 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -279,7 +279,7 @@
   return nullptr;
 }
 
-void RosAlloc::FreePages(Thread* self, void* ptr) {
+size_t RosAlloc::FreePages(Thread* self, void* ptr) {
   lock_.AssertHeld(self);
   size_t pm_idx = ToPageMapIndex(ptr);
   DCHECK_LT(pm_idx, page_map_size_);
@@ -298,7 +298,7 @@
     LOG(FATAL) << "Unreachable - RosAlloc::FreePages() : " << "pm_idx=" << pm_idx << ", pm_type="
                << static_cast<int>(pm_type) << ", ptr=" << std::hex
                << reinterpret_cast<intptr_t>(ptr);
-    return;
+    return 0;
   }
   // Update the page map and count the number of pages.
   size_t num_pages = 1;
@@ -422,6 +422,7 @@
     LOG(INFO) << "RosAlloc::FreePages() : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
               << " into free_page_runs_";
   }
+  return num_pages;
 }
 
 void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) {
@@ -460,12 +461,11 @@
   return r;
 }
 
-void RosAlloc::FreeInternal(Thread* self, void* ptr) {
+size_t RosAlloc::FreeInternal(Thread* self, void* ptr) {
   DCHECK_LE(base_, ptr);
   DCHECK_LT(ptr, base_ + footprint_);
   size_t pm_idx = RoundDownToPageMapIndex(ptr);
-  bool free_from_run = false;
-  Run* run = NULL;
+  Run* run = nullptr;
   {
     MutexLock mu(self, lock_);
     DCHECK_LT(pm_idx, page_map_size_);
@@ -477,16 +477,14 @@
     switch (page_map_[pm_idx]) {
       case kPageMapEmpty:
         LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
-        return;
+        return 0;
       case kPageMapLargeObject:
-        FreePages(self, ptr);
-        return;
+        return FreePages(self, ptr) * kPageSize;
       case kPageMapLargeObjectPart:
         LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
-        return;
+        return 0;
       case kPageMapRun:
       case kPageMapRunPart: {
-        free_from_run = true;
         size_t pi = pm_idx;
         DCHECK(page_map_[pi] == kPageMapRun || page_map_[pi] == kPageMapRunPart);
         // Find the beginning of the run.
@@ -501,18 +499,18 @@
       }
       default:
         LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
-        return;
+        return 0;
     }
   }
-  if (LIKELY(free_from_run)) {
-    DCHECK(run != NULL);
-    FreeFromRun(self, ptr, run);
-  }
+  DCHECK(run != nullptr);
+  const size_t size = IndexToBracketSize(run->size_bracket_idx_);
+  FreeFromRun(self, ptr, run);
+  return size;
 }
 
-void RosAlloc::Free(Thread* self, void* ptr) {
+size_t RosAlloc::Free(Thread* self, void* ptr) {
   ReaderMutexLock rmu(self, bulk_free_lock_);
-  FreeInternal(self, ptr);
+  return FreeInternal(self, ptr);
 }
 
 RosAlloc::Run* RosAlloc::RefillRun(Thread* self, size_t idx) {
@@ -1077,13 +1075,14 @@
 // the page map entry won't change. Disabled for now.
 static constexpr bool kReadPageMapEntryWithoutLockInBulkFree = false;
 
-void RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) {
+size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) {
+  size_t freed_bytes = 0;
   if (false) {
     // Used only to test Free() as GC uses only BulkFree().
     for (size_t i = 0; i < num_ptrs; ++i) {
-      FreeInternal(self, ptrs[i]);
+      freed_bytes += FreeInternal(self, ptrs[i]);
     }
-    return;
+    return freed_bytes;
   }
 
   WriterMutexLock wmu(self, bulk_free_lock_);
@@ -1126,14 +1125,15 @@
         DCHECK_EQ(run->magic_num_, kMagicNum);
       } else if (page_map_entry == kPageMapLargeObject) {
         MutexLock mu(self, lock_);
-        FreePages(self, ptr);
+        freed_bytes += FreePages(self, ptr) * kPageSize;
         continue;
       } else {
         LOG(FATAL) << "Unreachable - page map type: " << page_map_entry;
       }
-      DCHECK(run != NULL);
+      DCHECK(run != nullptr);
       // Set the bit in the bulk free bit map.
       run->MarkBulkFreeBitMap(ptr);
+      freed_bytes += IndexToBracketSize(run->size_bracket_idx_);
 #ifdef HAVE_ANDROID_OS
       if (!run->to_be_bulk_freed_) {
         run->to_be_bulk_freed_ = true;
@@ -1171,7 +1171,7 @@
           run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
           DCHECK_EQ(run->magic_num_, kMagicNum);
         } else if (page_map_entry == kPageMapLargeObject) {
-          FreePages(self, ptr);
+          freed_bytes += FreePages(self, ptr) * kPageSize;
         } else {
           LOG(FATAL) << "Unreachable - page map type: " << page_map_entry;
         }
@@ -1180,6 +1180,7 @@
         DCHECK(run != NULL);
         // Set the bit in the bulk free bit map.
         run->MarkBulkFreeBitMap(ptr);
+        freed_bytes += IndexToBracketSize(run->size_bracket_idx_);
 #ifdef HAVE_ANDROID_OS
         if (!run->to_be_bulk_freed_) {
           run->to_be_bulk_freed_ = true;
@@ -1306,6 +1307,7 @@
       }
     }
   }
+  return freed_bytes;
 }
 
 std::string RosAlloc::DumpPageMap() {
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 5d9d75c..0c508b7 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -493,7 +493,8 @@
   // Page-granularity alloc/free
   void* AllocPages(Thread* self, size_t num_pages, byte page_map_type)
       EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  void FreePages(Thread* self, void* ptr) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  // Returns how many pages were freed.
+  size_t FreePages(Thread* self, void* ptr) EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
   // Allocate/free a run slot.
   void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated)
@@ -506,7 +507,7 @@
   Run* RefillRun(Thread* self, size_t idx) LOCKS_EXCLUDED(lock_);
 
   // The internal of non-bulk Free().
-  void FreeInternal(Thread* self, void* ptr) LOCKS_EXCLUDED(lock_);
+  size_t FreeInternal(Thread* self, void* ptr) LOCKS_EXCLUDED(lock_);
 
   // Allocates large objects.
   void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
@@ -518,9 +519,9 @@
   ~RosAlloc();
   void* Alloc(Thread* self, size_t size, size_t* bytes_allocated)
       LOCKS_EXCLUDED(lock_);
-  void Free(Thread* self, void* ptr)
+  size_t Free(Thread* self, void* ptr)
       LOCKS_EXCLUDED(bulk_free_lock_);
-  void BulkFree(Thread* self, void** ptrs, size_t num_ptrs)
+  size_t BulkFree(Thread* self, void** ptrs, size_t num_ptrs)
       LOCKS_EXCLUDED(bulk_free_lock_);
   // Returns the size of the allocated slot for a given allocated memory chunk.
   size_t UsableSize(void* ptr);
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index 5a7d941..a5a6da0 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -33,6 +33,10 @@
 namespace space {
 
 static constexpr bool kPrefetchDuringRosAllocFreeList = true;
+static constexpr size_t kPrefetchLookAhead = 8;
+// Use this only for verification, it is not safe to use since the class of the object may have
+// been freed.
+static constexpr bool kVerifyFreedBytes = false;
 
 // TODO: Fix
 // template class ValgrindMallocSpace<RosAllocSpace, allocator::RosAlloc*>;
@@ -172,27 +176,24 @@
     CHECK(ptr != NULL);
     CHECK(Contains(ptr)) << "Free (" << ptr << ") not in bounds of heap " << *this;
   }
-  const size_t bytes_freed = AllocationSizeNonvirtual(ptr, nullptr);
   if (kRecentFreeCount > 0) {
     MutexLock mu(self, lock_);
     RegisterRecentFree(ptr);
   }
-  rosalloc_->Free(self, ptr);
-  return bytes_freed;
+  return rosalloc_->Free(self, ptr);
 }
 
 size_t RosAllocSpace::FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) {
-  DCHECK(ptrs != NULL);
+  DCHECK(ptrs != nullptr);
 
-  // Don't need the lock to calculate the size of the freed pointers.
-  size_t bytes_freed = 0;
+  size_t verify_bytes = 0;
   for (size_t i = 0; i < num_ptrs; i++) {
-    mirror::Object* ptr = ptrs[i];
-    const size_t look_ahead = 8;
-    if (kPrefetchDuringRosAllocFreeList && i + look_ahead < num_ptrs) {
-      __builtin_prefetch(reinterpret_cast<char*>(ptrs[i + look_ahead]));
+    if (kPrefetchDuringRosAllocFreeList && i + kPrefetchLookAhead < num_ptrs) {
+      __builtin_prefetch(reinterpret_cast<char*>(ptrs[i + kPrefetchLookAhead]));
     }
-    bytes_freed += AllocationSizeNonvirtual(ptr, nullptr);
+    if (kVerifyFreedBytes) {
+      verify_bytes += AllocationSizeNonvirtual(ptrs[i], nullptr);
+    }
   }
 
   if (kRecentFreeCount > 0) {
@@ -216,7 +217,10 @@
     CHECK_EQ(num_broken_ptrs, 0u);
   }
 
-  rosalloc_->BulkFree(self, reinterpret_cast<void**>(ptrs), num_ptrs);
+  const size_t bytes_freed = rosalloc_->BulkFree(self, reinterpret_cast<void**>(ptrs), num_ptrs);
+  if (kVerifyFreedBytes) {
+    CHECK_EQ(verify_bytes, bytes_freed);
+  }
   return bytes_freed;
 }
 
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index bbad884..987df91 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -254,20 +254,11 @@
 
   int idx = ExtractIndex(iref);
 
-  JavaVMExt* vm = Runtime::Current()->GetJavaVM();
   if (GetIndirectRefKind(iref) == kSirtOrInvalid &&
       Thread::Current()->SirtContains(reinterpret_cast<jobject>(iref))) {
     LOG(WARNING) << "Attempt to remove local SIRT entry from IRT, ignoring";
     return true;
   }
-  if (GetIndirectRefKind(iref) == kSirtOrInvalid && vm->work_around_app_jni_bugs) {
-    mirror::Object* direct_pointer = reinterpret_cast<mirror::Object*>(iref);
-    idx = Find(direct_pointer, bottomIndex, topIndex, table_);
-    if (idx == -1) {
-      LOG(WARNING) << "Trying to work around app JNI bugs, but didn't find " << iref << " in table!";
-      return false;
-    }
-  }
 
   if (idx < bottomIndex) {
     // Wrong segment.
@@ -285,7 +276,7 @@
   if (idx == topIndex-1) {
     // Top-most entry.  Scan up and consume holes.
 
-    if (!vm->work_around_app_jni_bugs && !CheckEntry("remove", iref, idx)) {
+    if (!CheckEntry("remove", iref, idx)) {
       return false;
     }
 
@@ -321,7 +312,7 @@
       LOG(INFO) << "--- WEIRD: removing null entry " << idx;
       return false;
     }
-    if (!vm->work_around_app_jni_bugs && !CheckEntry("remove", iref, idx)) {
+    if (!CheckEntry("remove", iref, idx)) {
       return false;
     }
 
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index f7aeffd..38aeaee 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -2444,23 +2444,6 @@
       if (static_cast<JNIEnvExt*>(env)->self->SirtContains(java_object)) {
         return JNILocalRefType;
       }
-
-      if (!static_cast<JNIEnvExt*>(env)->vm->work_around_app_jni_bugs) {
-        return JNIInvalidRefType;
-      }
-
-      // If we're handing out direct pointers, check whether it's a direct pointer to a local
-      // reference.
-      {
-        ScopedObjectAccess soa(env);
-        if (soa.Decode<mirror::Object*>(java_object) ==
-            reinterpret_cast<mirror::Object*>(java_object)) {
-          mirror::Object* object = reinterpret_cast<mirror::Object*>(java_object);
-          if (soa.Env()->locals.ContainsDirectPointer(object)) {
-            return JNILocalRefType;
-          }
-        }
-      }
       return JNIInvalidRefType;
     }
     LOG(FATAL) << "IndirectRefKind[" << kind << "]";
@@ -2993,7 +2976,6 @@
       check_jni(false),
       force_copy(false),  // TODO: add a way to enable this
       trace(options->jni_trace_),
-      work_around_app_jni_bugs(false),
       pins_lock("JNI pin table lock", kPinTableLock),
       pin_table("pin table", kPinTableInitial, kPinTableMax),
       globals_lock("JNI global reference table lock"),
@@ -3044,7 +3026,6 @@
   if (force_copy) {
     os << " (with forcecopy)";
   }
-  os << "; workarounds are " << (work_around_app_jni_bugs ? "on" : "off");
   Thread* self = Thread::Current();
   {
     MutexLock mu(self, pins_lock);
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index 42796db..ec911b2 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -110,9 +110,6 @@
   // Extra diagnostics.
   std::string trace;
 
-  // Used to provide compatibility for apps that assumed direct references.
-  bool work_around_app_jni_bugs;
-
   // Used to hold references to pinned primitive arrays.
   Mutex pins_lock DEFAULT_MUTEX_ACQUIRED_AFTER;
   ReferenceTable pin_table GUARDED_BY(pins_lock);
@@ -149,7 +146,7 @@
   void PopFrame();
 
   template<typename T>
-  T AddLocalReference(mirror::Object* obj, bool jni_work_arounds)
+  T AddLocalReference(mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Offset SegmentStateOffset();
@@ -216,7 +213,7 @@
 };
 
 template<typename T>
-inline T JNIEnvExt::AddLocalReference(mirror::Object* obj, bool jni_work_arounds) {
+inline T JNIEnvExt::AddLocalReference(mirror::Object* obj) {
   IndirectRef ref = locals.Add(local_ref_cookie, obj);
 
   // TODO: fix this to understand PushLocalFrame, so we can turn it on.
@@ -231,9 +228,6 @@
     }
   }
 
-  if (jni_work_arounds) {
-    return reinterpret_cast<T>(obj);
-  }
   return reinterpret_cast<T>(ref);
 }
 
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 2151fc7..f3303a8 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -351,30 +351,15 @@
   return native_method != jni_stub;
 }
 
-extern "C" void art_work_around_app_jni_bugs(JNIEnv*, jobject);
 void ArtMethod::RegisterNative(Thread* self, const void* native_method, bool is_fast) {
   DCHECK(Thread::Current() == self);
   CHECK(IsNative()) << PrettyMethod(this);
   CHECK(!IsFastNative()) << PrettyMethod(this);
   CHECK(native_method != NULL) << PrettyMethod(this);
-  if (!self->GetJniEnv()->vm->work_around_app_jni_bugs) {
-    if (is_fast) {
-      SetAccessFlags(GetAccessFlags() | kAccFastNative);
-    }
-    SetNativeMethod(native_method);
-  } else {
-    // We've been asked to associate this method with the given native method but are working
-    // around JNI bugs, that include not giving Object** SIRT references to native methods. Direct
-    // the native method to runtime support and store the target somewhere runtime support will
-    // find it.
-#if defined(__i386__) || defined(__x86_64__)
-    UNIMPLEMENTED(FATAL);
-#else
-    SetNativeMethod(reinterpret_cast<void*>(art_work_around_app_jni_bugs));
-#endif
-    SetFieldPtr<false>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, gc_map_),
-                       reinterpret_cast<const uint8_t*>(native_method), false);
+  if (is_fast) {
+    SetAccessFlags(GetAccessFlags() | kAccFastNative);
   }
+  SetNativeMethod(native_method);
 }
 
 void ArtMethod::UnregisterNative(Thread* self) {
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 5c5eaa1..76c5866 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -164,23 +164,12 @@
 }
 
 static void VMRuntime_setTargetSdkVersionNative(JNIEnv* env, jobject, jint targetSdkVersion) {
-  // This is the target SDK version of the app we're about to run.
+  // This is the target SDK version of the app we're about to run. It is intended that this a place
+  // where workarounds can be enabled.
   // Note that targetSdkVersion may be CUR_DEVELOPMENT (10000).
   // Note that targetSdkVersion may be 0, meaning "current".
-  if (targetSdkVersion > 0 && targetSdkVersion <= 13 /* honeycomb-mr2 */) {
-    Runtime* runtime = Runtime::Current();
-    JavaVMExt* vm = runtime->GetJavaVM();
-    if (vm->check_jni) {
-      LOG(INFO) << "CheckJNI enabled: not enabling JNI app bug workarounds.";
-    } else {
-      LOG(INFO) << "Turning on JNI app bug workarounds for target SDK version "
-          << targetSdkVersion << "...";
-
-      vm->work_around_app_jni_bugs = true;
-      LOG(WARNING) << "Permenantly disabling heap compaction due to jni workarounds";
-      Runtime::Current()->GetHeap()->DisableCompaction();
-    }
-  }
+  UNUSED(env);
+  UNUSED(targetSdkVersion);
 }
 
 static void VMRuntime_registerNativeAllocation(JNIEnv* env, jobject, jint bytes) {
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index 0b84005..86db893 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -88,6 +88,7 @@
     case kSuspended:                      return kJavaRunnable;
     // Don't add a 'default' here so the compiler can spot incompatible enum changes.
   }
+  LOG(ERROR) << "Unexpected thread state: " << internal_thread_state;
   return -1;  // Unreachable.
 }
 
diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index ebc5452..404c616 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h
@@ -171,7 +171,7 @@
 
     DCHECK_NE((reinterpret_cast<uintptr_t>(obj) & 0xffff0000), 0xebad0000);
 
-    return Env()->AddLocalReference<T>(obj, Vm()->work_around_app_jni_bugs);
+    return Env()->AddLocalReference<T>(obj);
   }
 
   template<typename T>
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 3408dd3..998579d 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1248,10 +1248,6 @@
       // Read from SIRT.
       result = reinterpret_cast<StackReference<mirror::Object>*>(obj)->AsMirrorPtr();
       VerifyObject(result);
-    } else if (Runtime::Current()->GetJavaVM()->work_around_app_jni_bugs) {
-      // Assume an invalid local reference is actually a direct pointer.
-      result = reinterpret_cast<mirror::Object*>(obj);
-      VerifyObject(result);
     } else {
       result = kInvalidIndirectRefObject;
     }