Merge "Use more compact encoding for DWARF flags."
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index 372fe2b..4d6c058 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -28,6 +28,8 @@
 
 namespace art {
 
+static constexpr size_t kMips64DoublewordSize = 8;
+
 /* This file contains codegen for the Mips ISA */
 LIR* MipsMir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
   int opcode;
@@ -760,7 +762,25 @@
 
   if (cu_->target64) {
     if (short_form) {
-      load = res = NewLIR3(opcode, r_dest.GetReg(), displacement, r_base.GetReg());
+      if (!IsAligned<kMips64DoublewordSize>(displacement) && opcode == kMips64Ld) {
+        RegStorage r_tmp = AllocTemp();
+        load = res = NewLIR3(kMips64Lwu, r_dest.GetReg(), displacement + LOWORD_OFFSET,
+                             r_base.GetReg());
+        load2 = NewLIR3(kMips64Lwu, r_tmp.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+        NewLIR3(kMips64Dsll32, r_tmp.GetReg(), r_tmp.GetReg(), 0x0);
+        NewLIR3(kMipsOr, r_dest.GetReg(), r_dest.GetReg(), r_tmp.GetReg());
+        FreeTemp(r_tmp);
+      } else if (!IsAligned<kMips64DoublewordSize>(displacement) && opcode == kMipsFldc1) {
+        RegStorage r_tmp = AllocTemp();
+        r_dest = Fp64ToSolo32(r_dest);
+        load = res = NewLIR3(kMipsFlwc1, r_dest.GetReg(), displacement + LOWORD_OFFSET,
+                             r_base.GetReg());
+        load2 = NewLIR3(kMipsLw, r_tmp.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+        NewLIR2(kMipsMthc1, r_tmp.GetReg(), r_dest.GetReg());
+        FreeTemp(r_tmp);
+      } else {
+        load = res = NewLIR3(opcode, r_dest.GetReg(), displacement, r_base.GetReg());
+      }
     } else {
       RegStorage r_tmp = (r_base == r_dest) ? AllocTemp() : r_dest;
       res = OpRegRegImm(kOpAdd, r_tmp, r_base, displacement);
@@ -771,7 +791,12 @@
 
     if (mem_ref_type_ == ResourceMask::kDalvikReg) {
       DCHECK_EQ(r_base, TargetPtrReg(kSp));
-      AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
+      AnnotateDalvikRegAccess(load, (displacement + LOWORD_OFFSET) >> 2,
+                              true /* is_load */, r_dest.Is64Bit() /* is64bit */);
+      if (load2 != nullptr) {
+        AnnotateDalvikRegAccess(load2, (displacement + HIWORD_OFFSET) >> 2,
+                                true /* is_load */, r_dest.Is64Bit() /* is64bit */);
+      }
     }
     return res;
   }
@@ -932,7 +957,24 @@
 
   if (cu_->target64) {
     if (short_form) {
-      store = res = NewLIR3(opcode, r_src.GetReg(), displacement, r_base.GetReg());
+      if (!IsAligned<kMips64DoublewordSize>(displacement) && opcode == kMips64Sd) {
+        RegStorage r_tmp = AllocTemp();
+        res = NewLIR2(kMipsMove, r_tmp.GetReg(), r_src.GetReg());
+        store = NewLIR3(kMipsSw, r_tmp.GetReg(), displacement + LOWORD_OFFSET, r_base.GetReg());
+        NewLIR3(kMips64Dsrl32, r_tmp.GetReg(), r_tmp.GetReg(), 0x0);
+        store2 = NewLIR3(kMipsSw, r_tmp.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+        FreeTemp(r_tmp);
+      } else if (!IsAligned<kMips64DoublewordSize>(displacement) && opcode == kMipsFsdc1) {
+        RegStorage r_tmp = AllocTemp();
+        r_src = Fp64ToSolo32(r_src);
+        store = res = NewLIR3(kMipsFswc1, r_src.GetReg(), displacement + LOWORD_OFFSET,
+                              r_base.GetReg());
+        NewLIR2(kMipsMfhc1, r_tmp.GetReg(), r_src.GetReg());
+        store2 = NewLIR3(kMipsSw, r_tmp.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+        FreeTemp(r_tmp);
+      } else {
+        store = res = NewLIR3(opcode, r_src.GetReg(), displacement, r_base.GetReg());
+      }
     } else {
       RegStorage r_scratch = AllocTemp();
       res = OpRegRegImm(kOpAdd, r_scratch, r_base, displacement);
@@ -942,7 +984,12 @@
 
     if (mem_ref_type_ == ResourceMask::kDalvikReg) {
       DCHECK_EQ(r_base, TargetPtrReg(kSp));
-      AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
+      AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
+                              false /* is_load */, r_src.Is64Bit() /* is64bit */);
+      if (store2 != nullptr) {
+        AnnotateDalvikRegAccess(store2, (displacement + HIWORD_OFFSET) >> 2,
+                                false /* is_load */, r_src.Is64Bit() /* is64bit */);
+      }
     }
     return res;
   }
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index f1b7458..6bc2a13 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -921,7 +921,7 @@
       std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve)
      : exceptions_to_resolve_(exceptions_to_resolve) {}
 
-  virtual bool Visit(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  virtual bool operator()(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     const auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
     for (auto& m : c->GetMethods(pointer_size)) {
       ResolveExceptionsForMethod(&m, pointer_size);
@@ -975,7 +975,7 @@
   explicit RecordImageClassesVisitor(std::unordered_set<std::string>* image_classes)
       : image_classes_(image_classes) {}
 
-  bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     std::string temp;
     image_classes_->insert(klass->GetDescriptor(&temp));
     return true;
@@ -1142,7 +1142,7 @@
    public:
     explicit FindImageClassesVisitor(ClinitImageUpdate* data) : data_(data) {}
 
-    bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    bool operator()(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
       std::string temp;
       const char* name = klass->GetDescriptor(&temp);
       if (data_->image_class_descriptors_->find(name) != data_->image_class_descriptors_->end()) {
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 60dfcfb..d63fed5 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -714,7 +714,7 @@
 
 class ComputeLazyFieldsForClassesVisitor : public ClassVisitor {
  public:
-  bool Visit(Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     StackHandleScope<1> hs(Thread::Current());
     mirror::Class::ComputeName(hs.NewHandle(c));
     return true;
@@ -852,14 +852,14 @@
  public:
   explicit NonImageClassesVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
 
-  bool Visit(Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     if (!image_writer_->KeepClass(klass)) {
       classes_to_prune_.insert(klass);
     }
     return true;
   }
 
-  std::unordered_set<mirror::Class*> classes_to_prune_;
+  mutable std::unordered_set<mirror::Class*> classes_to_prune_;
   ImageWriter* const image_writer_;
 };
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index a59024e..4179fab 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1556,21 +1556,13 @@
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register temp = temps.AcquireW();
   size_t status_offset = mirror::Class::StatusOffset().SizeValue();
-  bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
   // Even if the initialized flag is set, we need to ensure consistent memory ordering.
-  if (use_acquire_release) {
-    // TODO(vixl): Let the MacroAssembler handle MemOperand.
-    __ Add(temp, class_reg, status_offset);
-    __ Ldar(temp, HeapOperand(temp));
-    __ Cmp(temp, mirror::Class::kStatusInitialized);
-    __ B(lt, slow_path->GetEntryLabel());
-  } else {
-    __ Ldr(temp, HeapOperand(class_reg, status_offset));
-    __ Cmp(temp, mirror::Class::kStatusInitialized);
-    __ B(lt, slow_path->GetEntryLabel());
-    __ Dmb(InnerShareable, BarrierReads);
-  }
+  // TODO(vixl): Let the MacroAssembler handle MemOperand.
+  __ Add(temp, class_reg, status_offset);
+  __ Ldar(temp, HeapOperand(temp));
+  __ Cmp(temp, mirror::Class::kStatusInitialized);
+  __ B(lt, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -1716,9 +1708,7 @@
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
   Primitive::Type field_type = field_info.GetFieldType();
   BlockPoolsScope block_pools(GetVIXLAssembler());
-
   MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
-  bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
   if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // Object FieldGet with Baker's read barrier case.
@@ -1736,26 +1726,15 @@
         offset,
         temp,
         /* needs_null_check */ true,
-        field_info.IsVolatile() && use_acquire_release);
-    if (field_info.IsVolatile() && !use_acquire_release) {
-      // For IRIW sequential consistency kLoadAny is not sufficient.
-      codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
-    }
+        field_info.IsVolatile());
   } else {
     // General case.
     if (field_info.IsVolatile()) {
-      if (use_acquire_release) {
-        // Note that a potential implicit null check is handled in this
-        // CodeGeneratorARM64::LoadAcquire call.
-        // NB: LoadAcquire will record the pc info if needed.
-        codegen_->LoadAcquire(
-            instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
-      } else {
-        codegen_->Load(field_type, OutputCPURegister(instruction), field);
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        // For IRIW sequential consistency kLoadAny is not sufficient.
-        codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
-      }
+      // Note that a potential implicit null check is handled in this
+      // CodeGeneratorARM64::LoadAcquire call.
+      // NB: LoadAcquire will record the pc info if needed.
+      codegen_->LoadAcquire(
+          instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
     } else {
       codegen_->Load(field_type, OutputCPURegister(instruction), field);
       codegen_->MaybeRecordImplicitNullCheck(instruction);
@@ -1791,7 +1770,6 @@
   CPURegister source = value;
   Offset offset = field_info.GetFieldOffset();
   Primitive::Type field_type = field_info.GetFieldType();
-  bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
   {
     // We use a block to end the scratch scope before the write barrier, thus
@@ -1807,15 +1785,8 @@
     }
 
     if (field_info.IsVolatile()) {
-      if (use_acquire_release) {
-        codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset));
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-      } else {
-        codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
-        codegen_->Store(field_type, source, HeapOperand(obj, offset));
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
-      }
+      codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset));
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
     } else {
       codegen_->Store(field_type, source, HeapOperand(obj, offset));
       codegen_->MaybeRecordImplicitNullCheck(instruction);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 3c928de..cefcb95 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -106,7 +106,7 @@
 }
 
 #define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, x).Int32Value()
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, x).Int32Value()
 
 class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
@@ -437,7 +437,7 @@
 
 #undef __
 #define __ down_cast<Mips64Assembler*>(GetAssembler())->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, x).Int32Value()
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, x).Int32Value()
 
 void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) {
   // Ensure that we fix up branches.
@@ -486,12 +486,12 @@
 void ParallelMoveResolverMIPS64::RestoreScratch(int reg) {
   // Pop reg
   __ Ld(GpuRegister(reg), SP, 0);
-  __ DecreaseFrameSize(kMips64WordSize);
+  __ DecreaseFrameSize(kMips64DoublewordSize);
 }
 
 void ParallelMoveResolverMIPS64::SpillScratch(int reg) {
   // Push reg
-  __ IncreaseFrameSize(kMips64WordSize);
+  __ IncreaseFrameSize(kMips64DoublewordSize);
   __ Sd(GpuRegister(reg), SP, 0);
 }
 
@@ -503,7 +503,7 @@
   // automatically unspilled when the scratch scope object is destroyed).
   ScratchRegisterScope ensure_scratch(this, TMP, V0, codegen_->GetNumberOfCoreRegisters());
   // If V0 spills onto the stack, SP-relative offsets need to be adjusted.
-  int stack_offset = ensure_scratch.IsSpilled() ? kMips64WordSize : 0;
+  int stack_offset = ensure_scratch.IsSpilled() ? kMips64DoublewordSize : 0;
   __ LoadFromOffset(load_type,
                     GpuRegister(ensure_scratch.GetRegister()),
                     SP,
@@ -562,7 +562,7 @@
   for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
     GpuRegister reg = kCoreCalleeSaves[i];
     if (allocated_registers_.ContainsCoreRegister(reg)) {
-      ofs -= kMips64WordSize;
+      ofs -= kMips64DoublewordSize;
       __ Sd(reg, SP, ofs);
       __ cfi().RelOffset(DWARFReg(reg), ofs);
     }
@@ -571,7 +571,7 @@
   for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
     FpuRegister reg = kFpuCalleeSaves[i];
     if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
-      ofs -= kMips64WordSize;
+      ofs -= kMips64DoublewordSize;
       __ Sdc1(reg, SP, ofs);
       // TODO: __ cfi().RelOffset(DWARFReg(reg), ofs);
     }
@@ -609,7 +609,7 @@
       FpuRegister reg = kFpuCalleeSaves[i];
       if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
         __ Ldc1(reg, SP, ofs);
-        ofs += kMips64WordSize;
+        ofs += kMips64DoublewordSize;
         // TODO: __ cfi().Restore(DWARFReg(reg));
       }
     }
@@ -618,7 +618,7 @@
       GpuRegister reg = kCoreCalleeSaves[i];
       if (allocated_registers_.ContainsCoreRegister(reg)) {
         __ Ld(reg, SP, ofs);
-        ofs += kMips64WordSize;
+        ofs += kMips64DoublewordSize;
         __ cfi().Restore(DWARFReg(reg));
       }
     }
@@ -976,7 +976,7 @@
   __ LoadFromOffset(kLoadDoubleword,
                     card,
                     TR,
-                    Thread::CardTableOffset<kMips64WordSize>().Int32Value());
+                    Thread::CardTableOffset<kMips64DoublewordSize>().Int32Value());
   __ Dsrl(temp, object, gc::accounting::CardTable::kCardShift);
   __ Daddu(temp, card, temp);
   __ Sb(card, temp, 0);
@@ -994,10 +994,11 @@
   blocked_core_registers_[SP] = true;
   blocked_core_registers_[RA] = true;
 
-  // AT and TMP(T8) are used as temporary/scratch registers
-  // (similar to how AT is used by MIPS assemblers).
+  // AT, TMP(T8) and TMP2(T3) are used as temporary/scratch
+  // registers (similar to how AT is used by MIPS assemblers).
   blocked_core_registers_[AT] = true;
   blocked_core_registers_[TMP] = true;
+  blocked_core_registers_[TMP2] = true;
   blocked_fpu_registers_[FTMP] = true;
 
   // Reserve suspend and thread registers.
@@ -1021,22 +1022,22 @@
 
 size_t CodeGeneratorMIPS64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   __ StoreToOffset(kStoreDoubleword, GpuRegister(reg_id), SP, stack_index);
-  return kMips64WordSize;
+  return kMips64DoublewordSize;
 }
 
 size_t CodeGeneratorMIPS64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
   __ LoadFromOffset(kLoadDoubleword, GpuRegister(reg_id), SP, stack_index);
-  return kMips64WordSize;
+  return kMips64DoublewordSize;
 }
 
 size_t CodeGeneratorMIPS64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
   __ StoreFpuToOffset(kStoreDoubleword, FpuRegister(reg_id), SP, stack_index);
-  return kMips64WordSize;
+  return kMips64DoublewordSize;
 }
 
 size_t CodeGeneratorMIPS64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
   __ LoadFpuFromOffset(kLoadDoubleword, FpuRegister(reg_id), SP, stack_index);
-  return kMips64WordSize;
+  return kMips64DoublewordSize;
 }
 
 void CodeGeneratorMIPS64::DumpCoreRegister(std::ostream& stream, int reg) const {
@@ -1051,7 +1052,7 @@
                                      HInstruction* instruction,
                                      uint32_t dex_pc,
                                      SlowPathCode* slow_path) {
-  InvokeRuntime(GetThreadOffset<kMips64WordSize>(entrypoint).Int32Value(),
+  InvokeRuntime(GetThreadOffset<kMips64DoublewordSize>(entrypoint).Int32Value(),
                 instruction,
                 dex_pc,
                 slow_path);
@@ -1091,7 +1092,7 @@
   __ LoadFromOffset(kLoadUnsignedHalfword,
                     TMP,
                     TR,
-                    Thread::ThreadFlagsOffset<kMips64WordSize>().Int32Value());
+                    Thread::ThreadFlagsOffset<kMips64DoublewordSize>().Int32Value());
   if (successor == nullptr) {
     __ Bnezc(TMP, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());
@@ -3014,7 +3015,7 @@
       invoke->GetImtIndex() % mirror::Class::kImtSize, kMips64PointerSize).Uint32Value();
   Location receiver = invoke->GetLocations()->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64DoublewordSize);
 
   // Set the hidden argument.
   __ LoadConst32(invoke->GetLocations()->GetTemp(1).AsRegister<GpuRegister>(),
@@ -3190,7 +3191,7 @@
                         T9,
                         callee_method.AsRegister<GpuRegister>(),
                         ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                            kMips64WordSize).Int32Value());
+                            kMips64DoublewordSize).Int32Value());
       // T9()
       __ Jalr(T9);
       __ Nop();
@@ -3228,7 +3229,7 @@
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kMips64PointerSize).SizeValue();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64DoublewordSize);
 
   // temp = object->GetClass();
   __ LoadFromOffset(kLoadUnsignedWord, temp, receiver, class_offset);
@@ -3306,7 +3307,7 @@
 }
 
 static int32_t GetExceptionTlsOffset() {
-  return Thread::ExceptionOffset<kMips64WordSize>().Int32Value();
+  return Thread::ExceptionOffset<kMips64DoublewordSize>().Int32Value();
 }
 
 void LocationsBuilderMIPS64::VisitLoadException(HLoadException* load) {
@@ -3546,7 +3547,8 @@
   if (instruction->IsStringAlloc()) {
     // String is allocated through StringFactory. Call NewEmptyString entry point.
     GpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
-    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
+    MemberOffset code_offset =
+        ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64DoublewordSize);
     __ LoadFromOffset(kLoadDoubleword, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
     __ LoadFromOffset(kLoadDoubleword, T9, temp, code_offset.Int32Value());
     __ Jalr(T9);
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 08e5615..c836f83 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -27,10 +27,6 @@
 namespace art {
 namespace mips64 {
 
-// Use a local definition to prevent copying mistakes.
-static constexpr size_t kMips64WordSize = kMips64PointerSize;
-
-
 // InvokeDexCallingConvention registers
 
 static constexpr GpuRegister kParameterCoreRegisters[] =
@@ -274,9 +270,9 @@
 
   void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
 
-  size_t GetWordSize() const OVERRIDE { return kMips64WordSize; }
+  size_t GetWordSize() const OVERRIDE { return kMips64DoublewordSize; }
 
-  size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64WordSize; }
+  size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64DoublewordSize; }
 
   uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
     return assembler_.GetLabelLocation(GetLabelOf(block));
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 98d041a..5dce83a 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -780,7 +780,6 @@
   Register offset = XRegisterFrom(offset_loc);  // Long offset.
   Location trg_loc = locations->Out();
   Register trg = RegisterFrom(trg_loc, type);
-  bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
 
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
@@ -788,19 +787,11 @@
     Register temp = temps.AcquireW();
     codegen->GenerateArrayLoadWithBakerReadBarrier(
         invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
-    if (is_volatile && !use_acquire_release) {
-      __ Dmb(InnerShareable, BarrierReads);
-    }
   } else {
     // Other cases.
     MemOperand mem_op(base.X(), offset);
     if (is_volatile) {
-      if (use_acquire_release) {
-        codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true);
-      } else {
-        codegen->Load(type, trg, mem_op);
-        __ Dmb(InnerShareable, BarrierReads);
-      }
+      codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true);
     } else {
       codegen->Load(type, trg, mem_op);
     }
@@ -914,8 +905,6 @@
   Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
   Register value = RegisterFrom(locations->InAt(3), type);
   Register source = value;
-  bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
-
   MemOperand mem_op(base.X(), offset);
 
   {
@@ -932,15 +921,7 @@
     }
 
     if (is_volatile || is_ordered) {
-      if (use_acquire_release) {
-        codegen->StoreRelease(type, source, mem_op);
-      } else {
-        __ Dmb(InnerShareable, BarrierAll);
-        codegen->Store(type, source, mem_op);
-        if (is_volatile) {
-          __ Dmb(InnerShareable, BarrierReads);
-        }
-      }
+      codegen->StoreRelease(type, source, mem_op);
     } else {
       codegen->Store(type, source, mem_op);
     }
@@ -1037,7 +1018,6 @@
 }
 
 static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) {
-  bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
   vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
 
   Register out = WRegisterFrom(locations->Out());                  // Boolean result.
@@ -1078,43 +1058,20 @@
   // result = tmp_value != 0;
 
   vixl::Label loop_head, exit_loop;
-  if (use_acquire_release) {
-    __ Bind(&loop_head);
-    // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
-    // the reference stored in the object before attempting the CAS,
-    // similar to the one in the art::Unsafe_compareAndSwapObject JNI
-    // implementation.
-    //
-    // Note that this code is not (yet) used when read barriers are
-    // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
-    DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
-    __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
-    __ Cmp(tmp_value, expected);
-    __ B(&exit_loop, ne);
-    __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
-    __ Cbnz(tmp_32, &loop_head);
-  } else {
-    // Emit a `Dmb(InnerShareable, BarrierAll)` (DMB ISH) instruction
-    // instead of a `Dmb(InnerShareable, BarrierWrites)` (DMB ISHST)
-    // one, as the latter allows a preceding load to be delayed past
-    // the STXR instruction below.
-    __ Dmb(InnerShareable, BarrierAll);
-    __ Bind(&loop_head);
-    // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
-    // the reference stored in the object before attempting the CAS,
-    // similar to the one in the art::Unsafe_compareAndSwapObject JNI
-    // implementation.
-    //
-    // Note that this code is not (yet) used when read barriers are
-    // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
-    DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
-    __ Ldxr(tmp_value, MemOperand(tmp_ptr));
-    __ Cmp(tmp_value, expected);
-    __ B(&exit_loop, ne);
-    __ Stxr(tmp_32, value, MemOperand(tmp_ptr));
-    __ Cbnz(tmp_32, &loop_head);
-    __ Dmb(InnerShareable, BarrierAll);
-  }
+  __ Bind(&loop_head);
+  // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
+  // the reference stored in the object before attempting the CAS,
+  // similar to the one in the art::Unsafe_compareAndSwapObject JNI
+  // implementation.
+  //
+  // Note that this code is not (yet) used when read barriers are
+  // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
+  DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
+  __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
+  __ Cmp(tmp_value, expected);
+  __ B(&exit_loop, ne);
+  __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
+  __ Cbnz(tmp_32, &loop_head);
   __ Bind(&exit_loop);
   __ Cset(out, eq);
 
@@ -1499,6 +1456,209 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+static void GenSignum(LocationSummary* locations, bool is_long, vixl::MacroAssembler* masm) {
+  Location op1 = locations->InAt(0);
+  Location out = locations->Out();
+
+  Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1);
+  Register out_reg = WRegisterFrom(out);
+
+  __ Cmp(op1_reg, 0);
+  __ Cset(out_reg, gt);           // out == +1 if GT or 0 otherwise
+  __ Cinv(out_reg, out_reg, lt);  // out == -1 if LT or unchanged otherwise
+}
+
+void IntrinsicLocationsBuilderARM64::VisitIntegerSignum(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitIntegerSignum(HInvoke* invoke) {
+  GenSignum(invoke->GetLocations(), /* is_long */ false, GetVIXLAssembler());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitLongSignum(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitLongSignum(HInvoke* invoke) {
+  GenSignum(invoke->GetLocations(), /* is_long */ true,  GetVIXLAssembler());
+}
+
+static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
+  DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(0)->GetType()));
+  DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
+
+  LocationSummary* const locations = new (arena) LocationSummary(invoke,
+                                                                 LocationSummary::kCall,
+                                                                 kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+
+  locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
+}
+
+static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
+  DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(0)->GetType()));
+  DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(1)->GetType()));
+  DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
+
+  LocationSummary* const locations = new (arena) LocationSummary(invoke,
+                                                                 LocationSummary::kCall,
+                                                                 kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+
+  locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
+  locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
+}
+
+static void GenFPToFPCall(HInvoke* invoke,
+                          vixl::MacroAssembler* masm,
+                          CodeGeneratorARM64* codegen,
+                          QuickEntrypointEnum entry) {
+  __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArm64WordSize>(entry).Int32Value()));
+  __ Blr(lr);
+  codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickCos);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickSin);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAcos);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAsin);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAtan);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickCbrt);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickCosh);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickExp);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickExpm1);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickLog);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickLog10);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickSinh);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickTan);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickTanh);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAtan2);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickHypot);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickNextAfter);
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
@@ -1514,24 +1674,6 @@
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 
-UNIMPLEMENTED_INTRINSIC(MathCos)
-UNIMPLEMENTED_INTRINSIC(MathSin)
-UNIMPLEMENTED_INTRINSIC(MathAcos)
-UNIMPLEMENTED_INTRINSIC(MathAsin)
-UNIMPLEMENTED_INTRINSIC(MathAtan)
-UNIMPLEMENTED_INTRINSIC(MathAtan2)
-UNIMPLEMENTED_INTRINSIC(MathCbrt)
-UNIMPLEMENTED_INTRINSIC(MathCosh)
-UNIMPLEMENTED_INTRINSIC(MathExp)
-UNIMPLEMENTED_INTRINSIC(MathExpm1)
-UNIMPLEMENTED_INTRINSIC(MathHypot)
-UNIMPLEMENTED_INTRINSIC(MathLog)
-UNIMPLEMENTED_INTRINSIC(MathLog10)
-UNIMPLEMENTED_INTRINSIC(MathNextAfter)
-UNIMPLEMENTED_INTRINSIC(MathSinh)
-UNIMPLEMENTED_INTRINSIC(MathTan)
-UNIMPLEMENTED_INTRINSIC(MathTanh)
-
 UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
 UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
@@ -1541,8 +1683,6 @@
 UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
-UNIMPLEMENTED_INTRINSIC(IntegerSignum)
-UNIMPLEMENTED_INTRINSIC(LongSignum)
 
 // Rotate operations are handled as HRor instructions.
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index cba84fa..f681d1f 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1429,8 +1429,7 @@
   __ LoadFromOffset(kLoadDoubleword,
                     TMP,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize,
-                                            pStringCompareTo).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, pStringCompareTo).Int32Value());
   __ Jalr(TMP);
   __ Nop();
   __ Bind(slow_path->GetExitLabel());
@@ -1583,7 +1582,7 @@
   __ LoadFromOffset(kLoadDoubleword,
                     TMP,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pIndexOf).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, pIndexOf).Int32Value());
   __ Jalr(TMP);
   __ Nop();
 
@@ -1659,7 +1658,8 @@
   __ LoadFromOffset(kLoadDoubleword,
                     TMP,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pAllocStringFromBytes).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize,
+                                            pAllocStringFromBytes).Int32Value());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Jalr(TMP);
   __ Nop();
@@ -1685,7 +1685,8 @@
   __ LoadFromOffset(kLoadDoubleword,
                     TMP,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pAllocStringFromChars).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize,
+                                            pAllocStringFromChars).Int32Value());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Jalr(TMP);
   __ Nop();
@@ -1716,7 +1717,8 @@
   __ LoadFromOffset(kLoadDoubleword,
                     TMP,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pAllocStringFromString).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize,
+                                            pAllocStringFromString).Int32Value());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Jalr(TMP);
   __ Nop();
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index f9ff2df..ab480ca 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -300,10 +300,17 @@
   EmitRtd(0x1f, rt, rd, 0x5, 0x24);
 }
 
-void Mips64Assembler::Dext(GpuRegister rt, GpuRegister rs, int pos, int size_less_one) {
-  DCHECK(0 <= pos && pos < 32) << pos;
-  DCHECK(0 <= size_less_one && size_less_one < 32) << size_less_one;
-  EmitR(0x1f, rs, rt, static_cast<GpuRegister>(size_less_one), pos, 3);
+void Mips64Assembler::Dext(GpuRegister rt, GpuRegister rs, int pos, int size) {
+  CHECK(IsUint<5>(pos)) << pos;
+  CHECK(IsUint<5>(size - 1)) << size;
+  EmitR(0x1f, rs, rt, static_cast<GpuRegister>(size - 1), pos, 0x3);
+}
+
+void Mips64Assembler::Dinsu(GpuRegister rt, GpuRegister rs, int pos, int size) {
+  CHECK(IsUint<5>(pos - 32)) << pos;
+  CHECK(IsUint<5>(size - 1)) << size;
+  CHECK(IsUint<5>(pos + size - 33)) << pos << " + " << size;
+  EmitR(0x1f, rs, rt, static_cast<GpuRegister>(pos + size - 33), pos - 32, 0x6);
 }
 
 void Mips64Assembler::Wsbh(GpuRegister rd, GpuRegister rt) {
@@ -311,22 +318,22 @@
 }
 
 void Mips64Assembler::Sc(GpuRegister rt, GpuRegister base, int16_t imm9) {
-  DCHECK((-256 <= imm9) && (imm9 < 256));
+  CHECK(IsInt<9>(imm9));
   EmitI(0x1f, base, rt, ((imm9 & 0x1FF) << 7) | 0x26);
 }
 
 void Mips64Assembler::Scd(GpuRegister rt, GpuRegister base, int16_t imm9) {
-  DCHECK((-256 <= imm9) && (imm9 < 256));
+  CHECK(IsInt<9>(imm9));
   EmitI(0x1f, base, rt, ((imm9 & 0x1FF) << 7) | 0x27);
 }
 
 void Mips64Assembler::Ll(GpuRegister rt, GpuRegister base, int16_t imm9) {
-  DCHECK((-256 <= imm9) && (imm9 < 256));
+  CHECK(IsInt<9>(imm9));
   EmitI(0x1f, base, rt, ((imm9 & 0x1FF) << 7) | 0x36);
 }
 
 void Mips64Assembler::Lld(GpuRegister rt, GpuRegister base, int16_t imm9) {
-  DCHECK((-256 <= imm9) && (imm9 < 256));
+  CHECK(IsInt<9>(imm9));
   EmitI(0x1f, base, rt, ((imm9 & 0x1FF) << 7) | 0x37);
 }
 
@@ -967,10 +974,18 @@
   EmitFR(0x11, 0x00, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
 }
 
+void Mips64Assembler::Mfhc1(GpuRegister rt, FpuRegister fs) {
+  EmitFR(0x11, 0x03, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
+}
+
 void Mips64Assembler::Mtc1(GpuRegister rt, FpuRegister fs) {
   EmitFR(0x11, 0x04, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
 }
 
+void Mips64Assembler::Mthc1(GpuRegister rt, FpuRegister fs) {
+  EmitFR(0x11, 0x07, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
+}
+
 void Mips64Assembler::Dmfc1(GpuRegister rt, FpuRegister fs) {
   EmitFR(0x11, 0x01, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
 }
@@ -1787,11 +1802,13 @@
 
 void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base,
                                      int32_t offset) {
-  if (!IsInt<16>(offset)) {
-    LoadConst32(AT, offset);
+  if (!IsInt<16>(offset) ||
+      (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
     Daddu(AT, AT, base);
     base = AT;
-    offset = 0;
+    offset &= (kMips64DoublewordSize - 1);
   }
 
   switch (type) {
@@ -1808,32 +1825,51 @@
       Lhu(reg, base, offset);
       break;
     case kLoadWord:
+      CHECK_ALIGNED(offset, kMips64WordSize);
       Lw(reg, base, offset);
       break;
     case kLoadUnsignedWord:
+      CHECK_ALIGNED(offset, kMips64WordSize);
       Lwu(reg, base, offset);
       break;
     case kLoadDoubleword:
-      Ld(reg, base, offset);
+      if (!IsAligned<kMips64DoublewordSize>(offset)) {
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Lwu(reg, base, offset);
+        Lwu(TMP2, base, offset + kMips64WordSize);
+        Dinsu(reg, TMP2, 32, 32);
+      } else {
+        Ld(reg, base, offset);
+      }
       break;
   }
 }
 
 void Mips64Assembler::LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base,
                                         int32_t offset) {
-  if (!IsInt<16>(offset)) {
-    LoadConst32(AT, offset);
+  if (!IsInt<16>(offset) ||
+      (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
     Daddu(AT, AT, base);
     base = AT;
-    offset = 0;
+    offset &= (kMips64DoublewordSize - 1);
   }
 
   switch (type) {
     case kLoadWord:
+      CHECK_ALIGNED(offset, kMips64WordSize);
       Lwc1(reg, base, offset);
       break;
     case kLoadDoubleword:
-      Ldc1(reg, base, offset);
+      if (!IsAligned<kMips64DoublewordSize>(offset)) {
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Lwc1(reg, base, offset);
+        Lw(TMP2, base, offset + kMips64WordSize);
+        Mthc1(TMP2, reg);
+      } else {
+        Ldc1(reg, base, offset);
+      }
       break;
     default:
       LOG(FATAL) << "UNREACHABLE";
@@ -1869,11 +1905,13 @@
 
 void Mips64Assembler::StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base,
                                     int32_t offset) {
-  if (!IsInt<16>(offset)) {
-    LoadConst32(AT, offset);
+  if (!IsInt<16>(offset) ||
+      (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
     Daddu(AT, AT, base);
     base = AT;
-    offset = 0;
+    offset &= (kMips64DoublewordSize - 1);
   }
 
   switch (type) {
@@ -1884,10 +1922,18 @@
       Sh(reg, base, offset);
       break;
     case kStoreWord:
+      CHECK_ALIGNED(offset, kMips64WordSize);
       Sw(reg, base, offset);
       break;
     case kStoreDoubleword:
-      Sd(reg, base, offset);
+      if (!IsAligned<kMips64DoublewordSize>(offset)) {
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Sw(reg, base, offset);
+        Dsrl32(TMP2, reg, 0);
+        Sw(TMP2, base, offset + kMips64WordSize);
+      } else {
+        Sd(reg, base, offset);
+      }
       break;
     default:
       LOG(FATAL) << "UNREACHABLE";
@@ -1896,19 +1942,29 @@
 
 void Mips64Assembler::StoreFpuToOffset(StoreOperandType type, FpuRegister reg, GpuRegister base,
                                        int32_t offset) {
-  if (!IsInt<16>(offset)) {
-    LoadConst32(AT, offset);
+  if (!IsInt<16>(offset) ||
+      (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
     Daddu(AT, AT, base);
     base = AT;
-    offset = 0;
+    offset &= (kMips64DoublewordSize - 1);
   }
 
   switch (type) {
     case kStoreWord:
+      CHECK_ALIGNED(offset, kMips64WordSize);
       Swc1(reg, base, offset);
       break;
     case kStoreDoubleword:
-      Sdc1(reg, base, offset);
+      if (!IsAligned<kMips64DoublewordSize>(offset)) {
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Mfhc1(TMP2, reg);
+        Swc1(reg, base, offset);
+        Sw(TMP2, base, offset + kMips64WordSize);
+      } else {
+        Sdc1(reg, base, offset);
+      }
       break;
     default:
       LOG(FATAL) << "UNREACHABLE";
@@ -2053,7 +2109,7 @@
   StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value());
 }
 
-void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs,
+void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs,
                                                  FrameOffset fr_offs,
                                                  ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
@@ -2062,7 +2118,7 @@
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value());
 }
 
-void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) {
+void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs) {
   StoreToOffset(kStoreDoubleword, SP, S1, thr_offs.Int32Value());
 }
 
@@ -2080,7 +2136,7 @@
 }
 
 void Mips64Assembler::LoadFromThread64(ManagedRegister mdest,
-                                       ThreadOffset<kMipsDoublewordSize> src,
+                                       ThreadOffset<kMips64DoublewordSize> src,
                                        size_t size) {
   return EmitLoad(mdest, S1, src.Int32Value(), size);
 }
@@ -2102,7 +2158,7 @@
     // Negate the 32-bit ref
     Dsubu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister());
     // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64
-    Dext(dest.AsGpuRegister(), dest.AsGpuRegister(), 0, 31);
+    Dext(dest.AsGpuRegister(), dest.AsGpuRegister(), 0, 32);
   }
 }
 
@@ -2115,7 +2171,7 @@
 }
 
 void Mips64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest,
-                                             ThreadOffset<kMipsDoublewordSize> offs) {
+                                             ThreadOffset<kMips64DoublewordSize> offs) {
   Mips64ManagedRegister dest = mdest.AsMips64();
   CHECK(dest.IsGpuRegister());
   LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), S1, offs.Int32Value());
@@ -2160,7 +2216,7 @@
 }
 
 void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
-                                             ThreadOffset<kMipsDoublewordSize> thr_offs,
+                                             ThreadOffset<kMips64DoublewordSize> thr_offs,
                                              ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
@@ -2168,7 +2224,7 @@
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, fr_offs.Int32Value());
 }
 
-void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs,
+void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs,
                                            FrameOffset fr_offs,
                                            ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
@@ -2372,7 +2428,7 @@
   // TODO: place reference map on call
 }
 
-void Mips64Assembler::CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset ATTRIBUTE_UNUSED,
+void Mips64Assembler::CallFromThread64(ThreadOffset<kMips64DoublewordSize> offset ATTRIBUTE_UNUSED,
                                        ManagedRegister mscratch ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
 }
@@ -2392,7 +2448,7 @@
   LoadFromOffset(kLoadDoubleword,
                  scratch.AsGpuRegister(),
                  S1,
-                 Thread::ExceptionOffset<kMipsDoublewordSize>().Int32Value());
+                 Thread::ExceptionOffset<kMips64DoublewordSize>().Int32Value());
   Bnezc(scratch.AsGpuRegister(), exception_blocks_.back().Entry());
 }
 
@@ -2409,7 +2465,7 @@
   LoadFromOffset(kLoadDoubleword,
                  T9,
                  S1,
-                 QUICK_ENTRYPOINT_OFFSET(kMipsDoublewordSize, pDeliverException).Int32Value());
+                 QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, pDeliverException).Int32Value());
   Jr(T9);
   Nop();
 
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 3262640..71f5e00 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -31,7 +31,8 @@
 namespace art {
 namespace mips64 {
 
-static constexpr size_t kMipsDoublewordSize = 8;
+static constexpr size_t kMips64WordSize = 4;
+static constexpr size_t kMips64DoublewordSize = 8;
 
 enum LoadOperandType {
   kLoadSignedByte,
@@ -151,7 +152,8 @@
   void Seh(GpuRegister rd, GpuRegister rt);
   void Dsbh(GpuRegister rd, GpuRegister rt);
   void Dshd(GpuRegister rd, GpuRegister rt);
-  void Dext(GpuRegister rs, GpuRegister rt, int pos, int size_less_one);  // MIPS64
+  void Dext(GpuRegister rs, GpuRegister rt, int pos, int size);  // MIPS64
+  void Dinsu(GpuRegister rt, GpuRegister rs, int pos, int size);  // MIPS64
   void Wsbh(GpuRegister rd, GpuRegister rt);
   void Sc(GpuRegister rt, GpuRegister base, int16_t imm9 = 0);
   void Scd(GpuRegister rt, GpuRegister base, int16_t imm9 = 0);
@@ -301,7 +303,9 @@
   void Cvtdl(FpuRegister fd, FpuRegister fs);
 
   void Mfc1(GpuRegister rt, FpuRegister fs);
+  void Mfhc1(GpuRegister rt, FpuRegister fs);
   void Mtc1(GpuRegister rt, FpuRegister fs);
+  void Mthc1(GpuRegister rt, FpuRegister fs);
   void Dmfc1(GpuRegister rt, FpuRegister fs);  // MIPS64
   void Dmtc1(GpuRegister rt, FpuRegister fs);  // MIPS64
   void Lwc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
@@ -378,10 +382,10 @@
 
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs,
+  void StoreStackOffsetToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs, FrameOffset fr_offs,
                                   ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) OVERRIDE;
+  void StoreStackPointerToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs) OVERRIDE;
 
   void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off,
                      ManagedRegister mscratch) OVERRIDE;
@@ -390,7 +394,7 @@
   void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE;
 
   void LoadFromThread64(ManagedRegister mdest,
-                        ThreadOffset<kMipsDoublewordSize> src,
+                        ThreadOffset<kMips64DoublewordSize> src,
                         size_t size) OVERRIDE;
 
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
@@ -401,15 +405,15 @@
   void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE;
 
   void LoadRawPtrFromThread64(ManagedRegister mdest,
-                              ThreadOffset<kMipsDoublewordSize> offs) OVERRIDE;
+                              ThreadOffset<kMips64DoublewordSize> offs) OVERRIDE;
 
   // Copying routines.
   void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE;
 
-  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<kMipsDoublewordSize> thr_offs,
+  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<kMips64DoublewordSize> thr_offs,
                               ManagedRegister mscratch) OVERRIDE;
 
-  void CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs,
+  void CopyRawPtrToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs, FrameOffset fr_offs,
                             ManagedRegister mscratch) OVERRIDE;
 
   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE;
@@ -466,7 +470,7 @@
   // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE;
-  void CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset,
+  void CallFromThread64(ThreadOffset<kMips64DoublewordSize> offset,
                         ManagedRegister mscratch) OVERRIDE;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 7d79be2..b758d64 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -543,6 +543,30 @@
   DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncLD, "trunc.l.d ${reg1}, ${reg2}"), "trunc.l.d");
 }
 
+TEST_F(AssemblerMIPS64Test, Mfc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Mfc1, "mfc1 ${reg1}, ${reg2}"), "Mfc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Mfhc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Mfhc1, "mfhc1 ${reg1}, ${reg2}"), "Mfhc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Mtc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Mtc1, "mtc1 ${reg1}, ${reg2}"), "Mtc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Mthc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Mthc1, "mthc1 ${reg1}, ${reg2}"), "Mthc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Dmfc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Dmfc1, "dmfc1 ${reg1}, ${reg2}"), "Dmfc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Dmtc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Dmtc1, "dmtc1 ${reg1}, ${reg2}"), "Dmtc1");
+}
+
 ////////////////
 // CALL / JMP //
 ////////////////
@@ -827,6 +851,44 @@
   DriverStr(RepeatRR(&mips64::Mips64Assembler::Dshd, "dshd ${reg1}, ${reg2}"), "dshd");
 }
 
+TEST_F(AssemblerMIPS64Test, Dext) {
+  std::vector<mips64::GpuRegister*> reg1_registers = GetRegisters();
+  std::vector<mips64::GpuRegister*> reg2_registers = GetRegisters();
+  WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * 33 * 16);
+  std::ostringstream expected;
+  for (mips64::GpuRegister* reg1 : reg1_registers) {
+    for (mips64::GpuRegister* reg2 : reg2_registers) {
+      for (int32_t pos = 0; pos < 32; pos++) {
+        for (int32_t size = 1; size <= 32; size++) {
+          __ Dext(*reg1, *reg2, pos, size);
+          expected << "dext $" << *reg1 << ", $" << *reg2 << ", " << pos << ", " << size << "\n";
+        }
+      }
+    }
+  }
+
+  DriverStr(expected.str(), "Dext");
+}
+
+TEST_F(AssemblerMIPS64Test, Dinsu) {
+  std::vector<mips64::GpuRegister*> reg1_registers = GetRegisters();
+  std::vector<mips64::GpuRegister*> reg2_registers = GetRegisters();
+  WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * 33 * 16);
+  std::ostringstream expected;
+  for (mips64::GpuRegister* reg1 : reg1_registers) {
+    for (mips64::GpuRegister* reg2 : reg2_registers) {
+      for (int32_t pos = 32; pos < 64; pos++) {
+        for (int32_t size = 1; pos + size <= 64; size++) {
+          __ Dinsu(*reg1, *reg2, pos, size);
+          expected << "dinsu $" << *reg1 << ", $" << *reg2 << ", " << pos << ", " << size << "\n";
+        }
+      }
+    }
+  }
+
+  DriverStr(expected.str(), "Dinsu");
+}
+
 TEST_F(AssemblerMIPS64Test, Wsbh) {
   DriverStr(RepeatRR(&mips64::Mips64Assembler::Wsbh, "wsbh ${reg1}, ${reg2}"), "wsbh");
 }
@@ -942,4 +1004,638 @@
   DriverStr(RepeatRR(&mips64::Mips64Assembler::Dclo, "dclo ${reg1}, ${reg2}"), "dclo");
 }
 
+TEST_F(AssemblerMIPS64Test, LoadFromOffset) {
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 1);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x7FFF);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x8001);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 1);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x7FFF);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x8001);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 2);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x7FFE);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x8002);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 2);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x7FFE);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x8002);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 4);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x7FFC);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x8004);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 4);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x7FFC);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x8004);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 4);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x7FFC);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x8004);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  const char* expected =
+      "lb $a0, 0($a0)\n"
+      "lb $a0, 0($a1)\n"
+      "lb $a0, 1($a1)\n"
+      "lb $a0, 256($a1)\n"
+      "lb $a0, 1000($a1)\n"
+      "lb $a0, 0x7FFF($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lb $a0, 1($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+      "lb $a0, -256($a1)\n"
+      "lb $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+
+      "lbu $a0, 0($a0)\n"
+      "lbu $a0, 0($a1)\n"
+      "lbu $a0, 1($a1)\n"
+      "lbu $a0, 256($a1)\n"
+      "lbu $a0, 1000($a1)\n"
+      "lbu $a0, 0x7FFF($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lbu $a0, 1($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+      "lbu $a0, -256($a1)\n"
+      "lbu $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+
+      "lh $a0, 0($a0)\n"
+      "lh $a0, 0($a1)\n"
+      "lh $a0, 2($a1)\n"
+      "lh $a0, 256($a1)\n"
+      "lh $a0, 1000($a1)\n"
+      "lh $a0, 0x7FFE($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lh $a0, 2($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+      "lh $a0, -256($a1)\n"
+      "lh $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+
+      "lhu $a0, 0($a0)\n"
+      "lhu $a0, 0($a1)\n"
+      "lhu $a0, 2($a1)\n"
+      "lhu $a0, 256($a1)\n"
+      "lhu $a0, 1000($a1)\n"
+      "lhu $a0, 0x7FFE($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lhu $a0, 2($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+      "lhu $a0, -256($a1)\n"
+      "lhu $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+
+      "lw $a0, 0($a0)\n"
+      "lw $a0, 0($a1)\n"
+      "lw $a0, 4($a1)\n"
+      "lw $a0, 256($a1)\n"
+      "lw $a0, 1000($a1)\n"
+      "lw $a0, 0x7FFC($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lw $a0, 4($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+      "lw $a0, -256($a1)\n"
+      "lw $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+
+      "lwu $a0, 0($a0)\n"
+      "lwu $a0, 0($a1)\n"
+      "lwu $a0, 4($a1)\n"
+      "lwu $a0, 256($a1)\n"
+      "lwu $a0, 1000($a1)\n"
+      "lwu $a0, 0x7FFC($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 4($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 0($at)\n"
+      "lwu $a0, -256($a1)\n"
+      "lwu $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 0($at)\n"
+
+      "ld $a0, 0($a0)\n"
+      "ld $a0, 0($a1)\n"
+      "lwu $a0, 4($a1)\n"
+      "lwu $t3, 8($a1)\n"
+      "dins $a0, $t3, 32, 32\n"
+      "ld $a0, 256($a1)\n"
+      "ld $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x7FF8\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 4($at)\n"
+      "lwu $t3, 8($at)\n"
+      "dins $a0, $t3, 32, 32\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "ld $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 4($at)\n"
+      "lwu $t3, 8($at)\n"
+      "dins $a0, $t3, 32, 32\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "ld $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "ld $a0, 0($at)\n"
+      "ld $a0, -256($a1)\n"
+      "ld $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "ld $a0, 0($at)\n";
+  DriverStr(expected, "LoadFromOffset");
+}
+
+TEST_F(AssemblerMIPS64Test, LoadFpuFromOffset) {
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 4);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 256);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0x7FFC);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0x8000);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0x8004);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0x10000);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0x12345678);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, -256);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, -32768);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0xABCDEF00);
+
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 4);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 256);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0x7FFC);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0x8000);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0x8004);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0x10000);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0x12345678);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, -256);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, -32768);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0xABCDEF00);
+
+  const char* expected =
+      "lwc1 $f0, 0($a0)\n"
+      "lwc1 $f0, 4($a0)\n"
+      "lwc1 $f0, 256($a0)\n"
+      "lwc1 $f0, 0x7FFC($a0)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 4($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+      "lwc1 $f0, -256($a0)\n"
+      "lwc1 $f0, -32768($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+
+      "ldc1 $f0, 0($a0)\n"
+      "lwc1 $f0, 4($a0)\n"
+      "lw $t3, 8($a0)\n"
+      "mthc1 $t3, $f0\n"
+      "ldc1 $f0, 256($a0)\n"
+      "ori $at, $zero, 0x7FF8\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 4($at)\n"
+      "lw $t3, 8($at)\n"
+      "mthc1 $t3, $f0\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 4($at)\n"
+      "lw $t3, 8($at)\n"
+      "mthc1 $t3, $f0\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n"
+      "ldc1 $f0, -256($a0)\n"
+      "ldc1 $f0, -32768($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n";
+  DriverStr(expected, "LoadFpuFromOffset");
+}
+
+TEST_F(AssemblerMIPS64Test, StoreToOffset) {
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A0, 0);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 1);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 256);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 1000);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0x7FFF);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0x8000);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0x8001);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0x10000);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0x12345678);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, -256);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, -32768);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A0, 0);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 2);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 256);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 1000);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0x7FFE);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0x8000);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0x8002);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0x10000);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0x12345678);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, -256);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, -32768);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A0, 0);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 4);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 256);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 1000);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0x7FFC);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0x8000);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0x8004);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0x10000);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0x12345678);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, -256);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, -32768);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A0, 0);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 4);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 256);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 1000);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x7FFC);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x8000);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x8004);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x10000);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x12345678);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, -256);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, -32768);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  const char* expected =
+      "sb $a0, 0($a0)\n"
+      "sb $a0, 0($a1)\n"
+      "sb $a0, 1($a1)\n"
+      "sb $a0, 256($a1)\n"
+      "sb $a0, 1000($a1)\n"
+      "sb $a0, 0x7FFF($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sb $a0, 1($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+      "sb $a0, -256($a1)\n"
+      "sb $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+
+      "sh $a0, 0($a0)\n"
+      "sh $a0, 0($a1)\n"
+      "sh $a0, 2($a1)\n"
+      "sh $a0, 256($a1)\n"
+      "sh $a0, 1000($a1)\n"
+      "sh $a0, 0x7FFE($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sh $a0, 2($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+      "sh $a0, -256($a1)\n"
+      "sh $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+
+      "sw $a0, 0($a0)\n"
+      "sw $a0, 0($a1)\n"
+      "sw $a0, 4($a1)\n"
+      "sw $a0, 256($a1)\n"
+      "sw $a0, 1000($a1)\n"
+      "sw $a0, 0x7FFC($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 4($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+      "sw $a0, -256($a1)\n"
+      "sw $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+
+      "sd $a0, 0($a0)\n"
+      "sd $a0, 0($a1)\n"
+      "sw $a0, 4($a1)\n"
+      "dsrl32 $t3, $a0, 0\n"
+      "sw $t3, 8($a1)\n"
+      "sd $a0, 256($a1)\n"
+      "sd $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x7FF8\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 4($at)\n"
+      "dsrl32 $t3, $a0, 0\n"
+      "sw $t3, 8($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sd $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 4($at)\n"
+      "dsrl32 $t3, $a0, 0\n"
+      "sw $t3, 8($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "sd $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "sd $a0, 0($at)\n"
+      "sd $a0, -256($a1)\n"
+      "sd $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "sd $a0, 0($at)\n";
+  DriverStr(expected, "StoreToOffset");
+}
+
+TEST_F(AssemblerMIPS64Test, StoreFpuToOffset) {
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 4);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 256);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0x7FFC);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0x8000);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0x8004);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0x10000);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0x12345678);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, -256);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, -32768);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0xABCDEF00);
+
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 4);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 256);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0x7FFC);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0x8000);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0x8004);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0x10000);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0x12345678);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, -256);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, -32768);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0xABCDEF00);
+
+  const char* expected =
+      "swc1 $f0, 0($a0)\n"
+      "swc1 $f0, 4($a0)\n"
+      "swc1 $f0, 256($a0)\n"
+      "swc1 $f0, 0x7FFC($a0)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "swc1 $f0, 4($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+      "swc1 $f0, -256($a0)\n"
+      "swc1 $f0, -32768($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+
+      "sdc1 $f0, 0($a0)\n"
+      "mfhc1 $t3, $f0\n"
+      "swc1 $f0, 4($a0)\n"
+      "sw $t3, 8($a0)\n"
+      "sdc1 $f0, 256($a0)\n"
+      "ori $at, $zero, 0x7FF8\n"
+      "daddu $at, $at, $a0\n"
+      "mfhc1 $t3, $f0\n"
+      "swc1 $f0, 4($at)\n"
+      "sw $t3, 8($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "mfhc1 $t3, $f0\n"
+      "swc1 $f0, 4($at)\n"
+      "sw $t3, 8($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n"
+      "sdc1 $f0, -256($a0)\n"
+      "sdc1 $f0, -32768($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n";
+  DriverStr(expected, "StoreFpuToOffset");
+}
+
+#undef __
+
 }  // namespace art
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 5c8ff8f..4db9411 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -113,6 +113,25 @@
   qpoints->pShrLong = nullptr;
   qpoints->pUshrLong = nullptr;
 
+  // More math.
+  qpoints->pCos = cos;
+  qpoints->pSin = sin;
+  qpoints->pAcos = acos;
+  qpoints->pAsin = asin;
+  qpoints->pAtan = atan;
+  qpoints->pAtan2 = atan2;
+  qpoints->pCbrt = cbrt;
+  qpoints->pCosh = cosh;
+  qpoints->pExp = exp;
+  qpoints->pExpm1 = expm1;
+  qpoints->pHypot = hypot;
+  qpoints->pLog = log;
+  qpoints->pLog10 = log10;
+  qpoints->pNextAfter = nextafter;
+  qpoints->pSinh = sinh;
+  qpoints->pTan = tan;
+  qpoints->pTanh = tanh;
+
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
   qpoints->pStringCompareTo = art_quick_string_compareto;
diff --git a/runtime/arch/arm64/instruction_set_features_arm64.h b/runtime/arch/arm64/instruction_set_features_arm64.h
index 805131f..abd7e83 100644
--- a/runtime/arch/arm64/instruction_set_features_arm64.h
+++ b/runtime/arch/arm64/instruction_set_features_arm64.h
@@ -66,14 +66,6 @@
       return fix_cortex_a53_843419_;
   }
 
-  // NOTE: This flag can be tunned on a CPU basis. In general all ARMv8 CPUs
-  // should prefer the Acquire-Release semantics over the explicit DMBs when
-  // handling load/store-volatile. For a specific use case see the ARM64
-  // Optimizing backend.
-  bool PreferAcquireRelease() const {
-    return true;
-  }
-
   virtual ~Arm64InstructionSetFeatures() {}
 
  protected:
diff --git a/runtime/arch/arm64/instruction_set_features_arm64_test.cc b/runtime/arch/arm64/instruction_set_features_arm64_test.cc
index 599f24e..027e59c 100644
--- a/runtime/arch/arm64/instruction_set_features_arm64_test.cc
+++ b/runtime/arch/arm64/instruction_set_features_arm64_test.cc
@@ -30,8 +30,6 @@
   EXPECT_TRUE(arm64_features->Equals(arm64_features.get()));
   EXPECT_STREQ("smp,a53", arm64_features->GetFeatureString().c_str());
   EXPECT_EQ(arm64_features->AsBitmap(), 3U);
-  // See the comments in instruction_set_features_arm64.h.
-  EXPECT_TRUE(arm64_features->AsArm64InstructionSetFeatures()->PreferAcquireRelease());
 }
 
 }  // namespace art
diff --git a/runtime/arch/mips64/registers_mips64.h b/runtime/arch/mips64/registers_mips64.h
index 1d07d47..b027c95 100644
--- a/runtime/arch/mips64/registers_mips64.h
+++ b/runtime/arch/mips64/registers_mips64.h
@@ -61,6 +61,7 @@
   RA   = 31,  // Return address.
   TR   = S1,  // ART Thread Register
   TMP  = T8,  // scratch register (in addition to AT)
+  TMP2 = T3,  // scratch register (in addition to AT, reserved for assembler)
   kNumberOfGpuRegisters = 32,
   kNoGpuRegister = -1  // Signals an illegal register.
 };
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 74eb722..28540c8 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -41,17 +41,15 @@
 
 namespace art {
 
+template <ReadBarrierOption kReadBarrierOption>
 inline mirror::Class* ArtMethod::GetDeclaringClassUnchecked() {
   GcRootSource gc_root_source(this);
-  return declaring_class_.Read(&gc_root_source);
+  return declaring_class_.Read<kReadBarrierOption>(&gc_root_source);
 }
 
-inline mirror::Class* ArtMethod::GetDeclaringClassNoBarrier() {
-  return declaring_class_.Read<kWithoutReadBarrier>();
-}
-
+template <ReadBarrierOption kReadBarrierOption>
 inline mirror::Class* ArtMethod::GetDeclaringClass() {
-  mirror::Class* result = GetDeclaringClassUnchecked();
+  mirror::Class* result = GetDeclaringClassUnchecked<kReadBarrierOption>();
   if (kIsDebugBuild) {
     if (!IsRuntimeMethod()) {
       CHECK(result != nullptr) << this;
@@ -79,24 +77,28 @@
 
 // AssertSharedHeld doesn't work in GetAccessFlags, so use a NO_THREAD_SAFETY_ANALYSIS helper.
 // TODO: Figure out why ASSERT_SHARED_CAPABILITY doesn't work.
-ALWAYS_INLINE
-static inline void DoGetAccessFlagsHelper(ArtMethod* method) NO_THREAD_SAFETY_ANALYSIS {
-  CHECK(method->IsRuntimeMethod() || method->GetDeclaringClass()->IsIdxLoaded() ||
-        method->GetDeclaringClass()->IsErroneous());
+template <ReadBarrierOption kReadBarrierOption>
+ALWAYS_INLINE static inline void DoGetAccessFlagsHelper(ArtMethod* method)
+    NO_THREAD_SAFETY_ANALYSIS {
+  CHECK(method->IsRuntimeMethod() ||
+        method->GetDeclaringClass<kReadBarrierOption>()->IsIdxLoaded() ||
+        method->GetDeclaringClass<kReadBarrierOption>()->IsErroneous());
 }
 
+template <ReadBarrierOption kReadBarrierOption>
 inline uint32_t ArtMethod::GetAccessFlags() {
   if (kIsDebugBuild) {
     Thread* self = Thread::Current();
     if (!Locks::mutator_lock_->IsSharedHeld(self)) {
       ScopedObjectAccess soa(self);
-      CHECK(IsRuntimeMethod() || GetDeclaringClass()->IsIdxLoaded() ||
-            GetDeclaringClass()->IsErroneous());
+      CHECK(IsRuntimeMethod() ||
+            GetDeclaringClass<kReadBarrierOption>()->IsIdxLoaded() ||
+            GetDeclaringClass<kReadBarrierOption>()->IsErroneous());
     } else {
       // We cannot use SOA in this case. We might be holding the lock, but may not be in the
       // runnable state (e.g., during GC).
       Locks::mutator_lock_->AssertSharedHeld(self);
-      DoGetAccessFlagsHelper(this);
+      DoGetAccessFlagsHelper<kReadBarrierOption>(this);
     }
   }
   return access_flags_;
@@ -469,7 +471,7 @@
 
 template <typename Visitor>
 inline void ArtMethod::UpdateObjectsForImageRelocation(const Visitor& visitor) {
-  mirror::Class* old_class = GetDeclaringClassNoBarrier();
+  mirror::Class* old_class = GetDeclaringClassUnchecked<kWithoutReadBarrier>();
   mirror::Class* new_class = visitor(old_class);
   if (old_class != new_class) {
     SetDeclaringClass(new_class);
@@ -486,9 +488,9 @@
   }
 }
 
-template <typename Visitor>
+template <ReadBarrierOption kReadBarrierOption, typename Visitor>
 inline void ArtMethod::UpdateEntrypoints(const Visitor& visitor) {
-  if (IsNative()) {
+  if (IsNative<kReadBarrierOption>()) {
     const void* old_native_code = GetEntryPointFromJni();
     const void* new_native_code = visitor(old_native_code);
     if (old_native_code != new_native_code) {
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 440e796..ce23c2a 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -57,11 +57,10 @@
                                         jobject jlr_method)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE mirror::Class* GetDeclaringClass() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE mirror::Class* GetDeclaringClassNoBarrier()
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE mirror::Class* GetDeclaringClassUnchecked()
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -77,6 +76,7 @@
 
   // Note: GetAccessFlags acquires the mutator lock in debug mode to check that it is not called for
   // a proxy method.
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE uint32_t GetAccessFlags();
 
   void SetAccessFlags(uint32_t new_access_flags) {
@@ -154,8 +154,9 @@
     return (GetAccessFlags() & kAccDefault) != 0;
   }
 
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsNative() {
-    return (GetAccessFlags() & kAccNative) != 0;
+    return (GetAccessFlags<kReadBarrierOption>() & kAccNative) != 0;
   }
 
   bool IsFastNative() {
@@ -485,7 +486,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Update entry points by passing them through the visitor.
-  template <typename Visitor>
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier, typename Visitor>
   ALWAYS_INLINE void UpdateEntrypoints(const Visitor& visitor);
 
  protected:
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 5ef199c..c739490 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1182,11 +1182,15 @@
   ClassTable* const table_;
 };
 
-void ClassLinker::UpdateAppImageClassLoadersAndDexCaches(
+bool ClassLinker::UpdateAppImageClassLoadersAndDexCaches(
     gc::space::ImageSpace* space,
     Handle<mirror::ClassLoader> class_loader,
     Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches,
-    bool added_class_table) {
+    bool added_class_table,
+    bool* out_forward_dex_cache_array,
+    std::string* out_error_msg) {
+  DCHECK(out_forward_dex_cache_array != nullptr);
+  DCHECK(out_error_msg != nullptr);
   Thread* const self = Thread::Current();
   gc::Heap* const heap = Runtime::Current()->GetHeap();
   const ImageHeader& header = space->GetImageHeader();
@@ -1194,8 +1198,11 @@
   // class loader fields.
   WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
   ClassTable* table = InsertClassTableForClassLoader(class_loader.Get());
-  // TODO: Store class table in the image to avoid manually adding the classes.
-  for (int32_t i = 0, num_dex_caches = dex_caches->GetLength(); i < num_dex_caches; i++) {
+  // Dex cache array fixup is all or nothing, we must reject app images that have mixed since we
+  // rely on clobering the dex cache arrays in the image to forward to bss.
+  size_t num_dex_caches_with_bss_arrays = 0;
+  const size_t num_dex_caches = dex_caches->GetLength();
+  for (size_t i = 0; i < num_dex_caches; i++) {
     mirror::DexCache* const dex_cache = dex_caches->Get(i);
     const DexFile* const dex_file = dex_cache->GetDexFile();
     // If the oat file expects the dex cache arrays to be in the BSS, then allocate there and
@@ -1209,22 +1216,23 @@
     CHECK_EQ(num_types, dex_cache->NumResolvedTypes());
     CHECK_EQ(num_methods, dex_cache->NumResolvedMethods());
     CHECK_EQ(num_fields, dex_cache->NumResolvedFields());
-    if (dex_file->GetOatDexFile() != nullptr &&
-        dex_file->GetOatDexFile()->GetDexCacheArrays() != nullptr) {
+    const OatFile::OatDexFile* oat_dex_file = dex_file->GetOatDexFile();
+    if (oat_dex_file != nullptr && oat_dex_file->GetDexCacheArrays() != nullptr) {
+      ++num_dex_caches_with_bss_arrays;
       DexCacheArraysLayout layout(image_pointer_size_, dex_file);
-      uint8_t* const raw_arrays = dex_file->GetOatDexFile()->GetDexCacheArrays();
-      // The space is not yet visible to the GC, we can avoid the read barriers and use
-      // std::copy_n.
+      uint8_t* const raw_arrays = oat_dex_file->GetDexCacheArrays();
+      // The space is not yet visible to the GC, we can avoid the read barriers and use std::copy_n.
       if (num_strings != 0u) {
+        GcRoot<mirror::String>* const image_resolved_strings = dex_cache->GetStrings();
         GcRoot<mirror::String>* const strings =
             reinterpret_cast<GcRoot<mirror::String>*>(raw_arrays + layout.StringsOffset());
         for (size_t j = 0; kIsDebugBuild && j < num_strings; ++j) {
           DCHECK(strings[j].IsNull());
         }
-        std::copy_n(dex_cache->GetStrings(), num_strings, strings);
+        std::copy_n(image_resolved_strings, num_strings, strings);
+        *reinterpret_cast<GcRoot<mirror::String>**>(image_resolved_strings) = strings;
         dex_cache->SetStrings(strings);
       }
-
       if (num_types != 0u) {
         GcRoot<mirror::Class>* const image_resolved_types = dex_cache->GetResolvedTypes();
         GcRoot<mirror::Class>* const types =
@@ -1282,6 +1290,9 @@
           // Update the class loader from the one in the image class loader to the one that loaded
           // the app image.
           klass->SetClassLoader(class_loader.Get());
+          // The resolved type could be from another dex cache, go through the dex cache just in
+          // case.
+          klass->SetDexCacheStrings(klass->GetDexCache()->GetStrings());
           // If there are multiple dex caches, there may be the same class multiple times
           // in different dex caches. Check for this since inserting will add duplicates
           // otherwise.
@@ -1326,7 +1337,6 @@
               CHECK_EQ(table->LookupByDescriptor(super_class), super_class);
             }
           }
-          DCHECK_EQ(klass->GetClassLoader(), class_loader.Get());
           if (kIsDebugBuild) {
             for (ArtMethod& m : klass->GetDirectMethods(sizeof(void*))) {
               const void* code = m.GetEntryPointFromQuickCompiledCode();
@@ -1354,20 +1364,66 @@
       }
     }
   }
-  {
+  *out_forward_dex_cache_array = num_dex_caches_with_bss_arrays != 0;
+  if (*out_forward_dex_cache_array) {
+    if (num_dex_caches_with_bss_arrays != num_dex_caches) {
+      // Reject application image since we cannot forward only some of the dex cache arrays.
+      // TODO: We could get around this by having a dedicated forwarding slot. It should be an
+      // uncommon case.
+      *out_error_msg = StringPrintf("Dex caches in bss does not match total: %zu vs %zu",
+                                    num_dex_caches_with_bss_arrays,
+                                    num_dex_caches);
+      return false;
+    }
     FixupArtMethodArrayVisitor visitor(header);
     header.GetImageSection(ImageHeader::kSectionArtMethods).VisitPackedArtMethods(
-        &visitor, space->Begin(), sizeof(void*));
+        &visitor,
+        space->Begin(),
+        sizeof(void*));
     Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader.Get());
   }
   if (kIsDebugBuild) {
     ClassTable* const class_table = class_loader.Get()->GetClassTable();
     VerifyClassInTableArtMethodVisitor visitor2(class_table);
     header.GetImageSection(ImageHeader::kSectionArtMethods).VisitPackedArtMethods(
-        &visitor2, space->Begin(), sizeof(void*));
+        &visitor2,
+        space->Begin(),
+        sizeof(void*));
   }
+  return true;
 }
 
+class UpdateClassLoaderAndResolvedStringsVisitor {
+ public:
+  UpdateClassLoaderAndResolvedStringsVisitor(gc::space::ImageSpace* space,
+                                             mirror::ClassLoader* class_loader,
+                                             bool forward_strings)
+      : space_(space),
+        class_loader_(class_loader),
+        forward_strings_(forward_strings) {}
+
+  bool operator()(mirror::Class* klass) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (forward_strings_) {
+      GcRoot<mirror::String>* strings = klass->GetDexCacheStrings();
+      if (strings != nullptr) {
+        DCHECK(space_->GetImageHeader().GetImageSection(ImageHeader::kSectionDexCacheArrays).Contains(
+            reinterpret_cast<uint8_t*>(strings) - space_->Begin()))
+            << "String dex cache array for " << PrettyClass(klass) << " is not in app image";
+        GcRoot<mirror::String>* new_strings = *reinterpret_cast<GcRoot<mirror::String>**>(strings);
+        DCHECK_NE(strings, new_strings);
+        klass->SetDexCacheStrings(new_strings);
+      }
+    }
+    // Finally, update class loader.
+    klass->SetClassLoader(class_loader_);
+    return true;
+  }
+
+  gc::space::ImageSpace* const space_;
+  mirror::ClassLoader* const class_loader_;
+  const bool forward_strings_;
+};
+
 bool ClassLinker::AddImageSpace(
     gc::space::ImageSpace* space,
     Handle<mirror::ClassLoader> class_loader,
@@ -1576,21 +1632,39 @@
   if (app_image) {
     GetOrCreateAllocatorForClassLoader(class_loader.Get());  // Make sure we have a linear alloc.
   }
-  if (class_table_section.Size() > 0u) {
-    const uint64_t start_time2 = NanoTime();
+  ClassTable* class_table = nullptr;
+  {
     WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-    ClassTable* const class_table = InsertClassTableForClassLoader(class_loader.Get());
-    class_table->ReadFromMemory(space->Begin() + class_table_section.Offset());
-    if (app_image) {
-      class_table->SetClassLoader(class_loader.Get());
-    } else {
-      dex_cache_boot_image_class_lookup_required_ = false;
+    class_table = InsertClassTableForClassLoader(class_loader.Get());
+    if (class_table_section.Size() > 0u) {
+      const uint64_t start_time2 = NanoTime();
+      class_table->ReadFromMemory(space->Begin() + class_table_section.Offset());
+      if (!app_image) {
+        dex_cache_boot_image_class_lookup_required_ = false;
+      }
+      VLOG(image) << "Adding class table classes took " << PrettyDuration(NanoTime() - start_time2);
+      added_class_table = true;
     }
-    VLOG(image) << "Adding class table classes took " << PrettyDuration(NanoTime() - start_time2);
-    added_class_table = true;
   }
   if (app_image) {
-    UpdateAppImageClassLoadersAndDexCaches(space, class_loader, dex_caches, added_class_table);
+    bool forward_dex_cache_arrays = false;
+    if (!UpdateAppImageClassLoadersAndDexCaches(space,
+                                                class_loader,
+                                                dex_caches,
+                                                added_class_table,
+                                                /*out*/&forward_dex_cache_arrays,
+                                                /*out*/error_msg)) {
+      return false;
+    }
+    if (added_class_table) {
+      WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+      // Update class loader and resolved strings. If added_class_table is false, the resolved
+      // strings were already updated in UpdateAppImageClassLoadersAndDexCaches.
+      UpdateClassLoaderAndResolvedStringsVisitor visitor(space,
+                                                         class_loader.Get(),
+                                                         forward_dex_cache_arrays);
+      class_table->Visit(visitor);
+    }
   }
   VLOG(class_linker) << "Adding image space took " << PrettyDuration(NanoTime() - start_time);
   return true;
@@ -1677,7 +1751,7 @@
   void Visit(mirror::ClassLoader* class_loader)
       SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_) OVERRIDE {
     ClassTable* const class_table = class_loader->GetClassTable();
-    if (!done_ && class_table != nullptr && !class_table->Visit(visitor_)) {
+    if (!done_ && class_table != nullptr && !class_table->Visit(*visitor_)) {
       // If the visitor ClassTable returns false it means that we don't need to continue.
       done_ = true;
     }
@@ -1690,7 +1764,7 @@
 };
 
 void ClassLinker::VisitClassesInternal(ClassVisitor* visitor) {
-  if (boot_class_table_.Visit(visitor)) {
+  if (boot_class_table_.Visit(*visitor)) {
     VisitClassLoaderClassesVisitor loader_visitor(visitor);
     VisitClassLoaders(&loader_visitor);
   }
@@ -1713,7 +1787,7 @@
 
 class GetClassesInToVector : public ClassVisitor {
  public:
-  bool Visit(mirror::Class* klass) OVERRIDE {
+  bool operator()(mirror::Class* klass) OVERRIDE {
     classes_.push_back(klass);
     return true;
   }
@@ -1725,7 +1799,7 @@
   explicit GetClassInToObjectArray(mirror::ObjectArray<mirror::Class>* arr)
       : arr_(arr), index_(0) {}
 
-  bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     ++index_;
     if (index_ <= arr_->GetLength()) {
       arr_->Set(index_ - 1, klass);
@@ -1746,16 +1820,17 @@
 void ClassLinker::VisitClassesWithoutClassesLock(ClassVisitor* visitor) {
   // TODO: it may be possible to avoid secondary storage if we iterate over dex caches. The problem
   // is avoiding duplicates.
+  Thread* const self = Thread::Current();
   if (!kMovingClasses) {
+    ScopedAssertNoThreadSuspension nts(self, __FUNCTION__);
     GetClassesInToVector accumulator;
     VisitClasses(&accumulator);
     for (mirror::Class* klass : accumulator.classes_) {
-      if (!visitor->Visit(klass)) {
+      if (!visitor->operator()(klass)) {
         return;
       }
     }
   } else {
-    Thread* const self = Thread::Current();
     StackHandleScope<1> hs(self);
     auto classes = hs.NewHandle<mirror::ObjectArray<mirror::Class>>(nullptr);
     // We size the array assuming classes won't be added to the class table during the visit.
@@ -1783,7 +1858,7 @@
       // the class table grew then the loop repeats. If classes are created after the loop has
       // finished then we don't visit.
       mirror::Class* klass = classes->Get(i);
-      if (klass != nullptr && !visitor->Visit(klass)) {
+      if (klass != nullptr && !visitor->operator()(klass)) {
         return;
       }
     }
@@ -7157,7 +7232,7 @@
  public:
   explicit DumpClassVisitor(int flags) : flags_(flags) {}
 
-  bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     klass->DumpClass(LOG(ERROR), flags_);
     return true;
   }
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 5176cbd..9217c32 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -60,6 +60,13 @@
 
 enum VisitRootFlags : uint8_t;
 
+class ClassVisitor {
+ public:
+  virtual ~ClassVisitor() {}
+  // Return true to continue visiting.
+  virtual bool operator()(mirror::Class* klass) = 0;
+};
+
 class ClassLoaderVisitor {
  public:
   virtual ~ClassLoaderVisitor() {}
@@ -1007,11 +1014,13 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Locks::classlinker_classes_lock_);
 
-  void UpdateAppImageClassLoadersAndDexCaches(
+  bool UpdateAppImageClassLoadersAndDexCaches(
       gc::space::ImageSpace* space,
       Handle<mirror::ClassLoader> class_loader,
       Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches,
-      bool added_class_table)
+      bool added_class_table,
+      bool* out_forward_dex_cache_array,
+      std::string* out_error_msg)
       REQUIRES(!dex_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/class_table-inl.h b/runtime/class_table-inl.h
index aef02b6..e512906 100644
--- a/runtime/class_table-inl.h
+++ b/runtime/class_table-inl.h
@@ -28,6 +28,9 @@
       visitor.VisitRoot(root.AddressWithoutBarrier());
     }
   }
+  for (GcRoot<mirror::Object>& root : dex_files_) {
+    visitor.VisitRoot(root.AddressWithoutBarrier());
+  }
 }
 
 template<class Visitor>
@@ -42,6 +45,19 @@
   }
 }
 
+template <typename Visitor>
+bool ClassTable::Visit(Visitor& visitor) {
+  for (ClassSet& class_set : classes_) {
+    for (GcRoot<mirror::Class>& root : class_set) {
+      if (!visitor(root.Read())) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_CLASS_TABLE_INL_H_
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index 2a4f0e0..afb0556 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -73,17 +73,6 @@
   return existing;
 }
 
-bool ClassTable::Visit(ClassVisitor* visitor) {
-  for (ClassSet& class_set : classes_) {
-    for (GcRoot<mirror::Class>& root : class_set) {
-      if (!visitor->Visit(root.Read())) {
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
 size_t ClassTable::NumZygoteClasses() const {
   size_t sum = 0;
   for (size_t i = 0; i < classes_.size() - 1; ++i) {
@@ -183,12 +172,4 @@
   return read_count;
 }
 
-void ClassTable::SetClassLoader(mirror::ClassLoader* class_loader) {
-  for (const ClassSet& class_set : classes_) {
-    for (const GcRoot<mirror::Class>& root : class_set) {
-      root.Read()->SetClassLoader(class_loader);
-    }
-  }
-}
-
 }  // namespace art
diff --git a/runtime/class_table.h b/runtime/class_table.h
index 0b42035..5f2eb48 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -36,13 +36,6 @@
   class ClassLoader;
 }  // namespace mirror
 
-class ClassVisitor {
- public:
-  virtual ~ClassVisitor() {}
-  // Return true to continue visiting.
-  virtual bool Visit(mirror::Class* klass) = 0;
-};
-
 // Each loader has a ClassTable
 class ClassTable {
  public:
@@ -80,8 +73,9 @@
       NO_THREAD_SAFETY_ANALYSIS
       SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
 
-  // Return false if the callback told us to exit.
-  bool Visit(ClassVisitor* visitor)
+  // Stops visit if the visitor returns false.
+  template <typename Visitor>
+  bool Visit(Visitor& visitor)
       SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
 
   // Return the first class that matches the descriptor. Returns null if there are none.
@@ -118,11 +112,6 @@
       REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Change the class loader of all the contained classes.
-  void SetClassLoader(mirror::ClassLoader* class_loader)
-    REQUIRES(Locks::classlinker_classes_lock_)
-    SHARED_REQUIRES(Locks::mutator_lock_);
-
  private:
   class ClassDescriptorHashEquals {
    public:
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index a0f875d..904490a 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -983,7 +983,7 @@
  public:
   explicit ClassListCreator(std::vector<JDWP::RefTypeId>* classes) : classes_(classes) {}
 
-  bool Visit(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     if (!c->IsPrimitive()) {
       classes_->push_back(Dbg::GetObjectRegistry()->AddRefType(c));
     }
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 5345b89..5c5abeb 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -349,7 +349,7 @@
   // Check that the class pointer inside the object is not null and is aligned.
   // TODO: Method might be not a heap address, and GetClass could fault.
   // No read barrier because method_obj may not be a real object.
-  mirror::Class* cls = method_obj->GetDeclaringClassNoBarrier();
+  mirror::Class* cls = method_obj->GetDeclaringClassUnchecked<kWithoutReadBarrier>();
   if (cls == nullptr) {
     VLOG(signals) << "not a class";
     return false;
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 998db52..08b1a00 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -1047,7 +1047,7 @@
       if (strings != nullptr) {
         GcRoot<mirror::String>* new_strings = fixup_adapter.ForwardObject(strings);
         if (strings != new_strings) {
-          dex_cache->SetFieldPtr64<false>(mirror::DexCache::StringsOffset(), new_strings);
+          dex_cache->SetStrings(new_strings);
         }
         dex_cache->FixupStrings<kWithoutReadBarrier>(new_strings, fixup_adapter);
       }
@@ -1055,7 +1055,7 @@
       if (types != nullptr) {
         GcRoot<mirror::Class>* new_types = fixup_adapter.ForwardObject(types);
         if (types != new_types) {
-          dex_cache->SetFieldPtr64<false>(mirror::DexCache::ResolvedTypesOffset(), new_types);
+          dex_cache->SetResolvedTypes(new_types);
         }
         dex_cache->FixupResolvedTypes<kWithoutReadBarrier>(new_types, fixup_adapter);
       }
@@ -1063,7 +1063,7 @@
       if (methods != nullptr) {
         ArtMethod** new_methods = fixup_adapter.ForwardObject(methods);
         if (methods != new_methods) {
-          dex_cache->SetFieldPtr64<false>(mirror::DexCache::ResolvedMethodsOffset(), new_methods);
+          dex_cache->SetResolvedMethods(new_methods);
         }
         for (size_t j = 0, num = dex_cache->NumResolvedMethods(); j != num; ++j) {
           ArtMethod* orig = mirror::DexCache::GetElementPtrSize(new_methods, j, sizeof(void*));
@@ -1077,7 +1077,7 @@
       if (fields != nullptr) {
         ArtField** new_fields = fixup_adapter.ForwardObject(fields);
         if (fields != new_fields) {
-          dex_cache->SetFieldPtr64<false>(mirror::DexCache::ResolvedFieldsOffset(), new_fields);
+          dex_cache->SetResolvedFields(new_fields);
         }
         for (size_t j = 0, num = dex_cache->NumResolvedFields(); j != num; ++j) {
           ArtField* orig = mirror::DexCache::GetElementPtrSize(new_fields, j, sizeof(void*));
diff --git a/runtime/image.cc b/runtime/image.cc
index de00343..1f54e3e 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -24,7 +24,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '6', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '7', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index c57b1bb..7484635 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -55,7 +55,7 @@
   explicit InstallStubsClassVisitor(Instrumentation* instrumentation)
       : instrumentation_(instrumentation) {}
 
-  bool Visit(mirror::Class* klass) OVERRIDE REQUIRES(Locks::mutator_lock_) {
+  bool operator()(mirror::Class* klass) OVERRIDE REQUIRES(Locks::mutator_lock_) {
     instrumentation_->InstallStubsForClass(klass);
     return true;  // we visit all classes.
   }
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 4e0146c..b711181 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -239,11 +239,11 @@
 
 void Jit::DumpTypeInfoForLoadedTypes(ClassLinker* linker) {
   struct CollectClasses : public ClassVisitor {
-    bool Visit(mirror::Class* klass) override {
+    bool operator()(mirror::Class* klass) override {
       classes_.push_back(klass);
       return true;
     }
-    std::vector<mirror::Class*> classes_;
+    mutable std::vector<mirror::Class*> classes_;
   };
 
   if (generate_debug_info_) {
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 6daade0..262c932 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -468,7 +468,7 @@
       // the DEX code has not been quickened.
       std::string error_msg;
       for (const OatFile::OatDexFile* current : file.GetOatDexFiles()) {
-        const DexFile* const dex_file = current->OpenDexFile(&error_msg).release();
+        std::unique_ptr<const DexFile> dex_file = current->OpenDexFile(&error_msg);
         DCHECK(dex_file != nullptr);
         for (size_t i = 0, e = dex_file->NumClassDefs(); i < e; ++i) {
           DCHECK_EQ(current->GetOatClass(i).GetType(), kOatClassNoneCompiled);
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 21241d2..2abcd67 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2704,7 +2704,7 @@
   // Visiting the declaring class is necessary so that we don't unload the class of a method that
   // is executing. We need to ensure that the code stays mapped.
   void VisitDeclaringClass(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) {
-    mirror::Class* klass = method->GetDeclaringClassNoBarrier();
+    mirror::Class* klass = method->GetDeclaringClassUnchecked<kWithoutReadBarrier>();
     // klass can be null for runtime methods.
     if (klass != nullptr) {
       mirror::Object* new_ref = klass;
diff --git a/runtime/utils/dex_cache_arrays_layout-inl.h b/runtime/utils/dex_cache_arrays_layout-inl.h
index f6ee6a2..d63083c 100644
--- a/runtime/utils/dex_cache_arrays_layout-inl.h
+++ b/runtime/utils/dex_cache_arrays_layout-inl.h
@@ -87,11 +87,13 @@
 }
 
 inline size_t DexCacheArraysLayout::StringsSize(size_t num_elements) const {
-  return ArraySize(sizeof(GcRoot<mirror::String>), num_elements);
+  // App image patching relies on having enough room for a forwarding pointer in the types array.
+  return std::max(ArraySize(sizeof(GcRoot<mirror::String>), num_elements), pointer_size_);
 }
 
 inline size_t DexCacheArraysLayout::StringsAlignment() const {
-  return alignof(GcRoot<mirror::String>);
+  // App image patching relies on having enough room for a forwarding pointer in the strings array.
+  return pointer_size_;
 }
 
 inline size_t DexCacheArraysLayout::FieldOffset(uint32_t field_idx) const {
diff --git a/test/117-nopatchoat/nopatchoat.cc b/test/117-nopatchoat/nopatchoat.cc
index 1337442..82e1fc8 100644
--- a/test/117-nopatchoat/nopatchoat.cc
+++ b/test/117-nopatchoat/nopatchoat.cc
@@ -46,7 +46,7 @@
     return oat_dex_file != nullptr && oat_dex_file->GetOatFile()->IsExecutable();
   }
 
-  static bool isPic(jclass cls) {
+  static bool needsRelocation(jclass cls) {
     const OatFile::OatDexFile* oat_dex_file = getOatDexFile(cls);
 
     if (oat_dex_file == nullptr) {
@@ -54,7 +54,7 @@
     }
 
     const OatFile* oat_file = oat_dex_file->GetOatFile();
-    return oat_file->IsPic();
+    return !oat_file->IsPic() && !oat_file->IsExtractOnly();
   }
 };
 
@@ -66,8 +66,8 @@
   return NoPatchoatTest::hasExecutableOat(cls);
 }
 
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_isPic(JNIEnv*, jclass cls) {
-  return NoPatchoatTest::isPic(cls);
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_needsRelocation(JNIEnv*, jclass cls) {
+  return NoPatchoatTest::needsRelocation(cls);
 }
 
 }  // namespace art
diff --git a/test/117-nopatchoat/src/Main.java b/test/117-nopatchoat/src/Main.java
index 425cf48..816eb17 100644
--- a/test/117-nopatchoat/src/Main.java
+++ b/test/117-nopatchoat/src/Main.java
@@ -22,9 +22,9 @@
     // ANDROID_DATA has been relocated, since a non-relocated oat file always has a 0 delta.
     // Hitting this condition should be rare and ideally we would prevent it from happening but
     // there is no way to do so without major changes to the run-test framework.
-    boolean executable_correct = (isPic() ?
-        hasExecutableOat() == true :
-        hasExecutableOat() == (isDex2OatEnabled() || isRelocationDeltaZero()));
+    boolean executable_correct = (needsRelocation() ?
+        hasExecutableOat() == (isDex2OatEnabled() || isRelocationDeltaZero()) :
+        hasExecutableOat() == true);
 
     System.out.println(
         "dex2oat & patchoat are " + ((isDex2OatEnabled()) ? "enabled" : "disabled") +
@@ -49,7 +49,7 @@
 
   private native static boolean isDex2OatEnabled();
 
-  private native static boolean isPic();
+  private native static boolean needsRelocation();
 
   private native static boolean hasOatFile();
 
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index 06cfd0a..8f9a32a 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -631,7 +631,8 @@
   /// CHECK-DAG:  <<Array2>>     NullCheck [<<Get1>>]                        loop:<<InnerLoop>>
   /// CHECK-DAG:  <<Len2:i\d+>>  ArrayLength [<<Array2>>]                    loop:<<InnerLoop>>
   /// CHECK-DAG:  <<Bounds2>>    BoundsCheck [<<Index2:i\d+>>,<<Len2>>]      loop:<<InnerLoop>>
-  /// CHECK-DAG:                 InvokeStaticOrDirect [<<Get2>>]             loop:<<InnerLoop>>
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                 InvokeStaticOrDirect [<<Get2>>{{(,[ij]\d+)?}}] loop:<<InnerLoop>>
   /// CHECK-DAG:  <<Index2>>     Phi                                         loop:<<InnerLoop>>
   /// CHECK-DAG:  <<Index1>>     Phi                                         loop:<<OuterLoop:B\d+>>
   /// CHECK-DAG:  <<Field1>>     StaticFieldGet                              loop:none
@@ -644,7 +645,8 @@
   /// CHECK-DAG:  <<Get1:l\d+>>  ArrayGet [<<Array1:l\d+>>,<<Index1:i\d+>>]  loop:<<OuterLoop>>
   //  Array reference ..[j] still in inner loop, with a direct index.
   /// CHECK-DAG:  <<Get2:i\d+>>  ArrayGet [<<Array2:l\d+>>,<<Index2:i\d+>>]  loop:<<InnerLoop:B\d+>>
-  /// CHECK-DAG:                 InvokeStaticOrDirect [<<Get2>>]             loop:<<InnerLoop>>
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                 InvokeStaticOrDirect [<<Get2>>{{(,[ij]\d+)?}}] loop:<<InnerLoop>>
   /// CHECK-DAG:  <<Index2>>     Phi                                         loop:<<InnerLoop>>
   /// CHECK-DAG:  <<Index1>>     Phi                                         loop:<<OuterLoop>>
   //  Synthetic phi.
diff --git a/test/569-checker-pattern-replacement/src/Main.java b/test/569-checker-pattern-replacement/src/Main.java
index 9a85c81..e2d451c 100644
--- a/test/569-checker-pattern-replacement/src/Main.java
+++ b/test/569-checker-pattern-replacement/src/Main.java
@@ -39,7 +39,8 @@
     /// CHECK-DAG:  <<Value:l\d+>>      ParameterValue
     /// CHECK-DAG:  <<Ignored:i\d+>>    IntConstant 77
     /// CHECK-DAG:  <<ClinitCk:l\d+>>   ClinitCheck
-    /// CHECK-DAG:  <<Invoke:l\d+>>     InvokeStaticOrDirect [<<Ignored>>,<<Value>>,<<ClinitCk>>]
+    // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+    /// CHECK-DAG:  <<Invoke:l\d+>>     InvokeStaticOrDirect [<<Ignored>>,<<Value>>{{(,[ij]\d+)?}},<<ClinitCk>>]
     /// CHECK-DAG:                      Return [<<Invoke>>]
 
     /// CHECK-START: java.lang.Object Main.staticReturnArg2(java.lang.String) inliner (after)
@@ -313,7 +314,8 @@
 
     /// CHECK-START: java.lang.Object Main.newObject() inliner (before)
     /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
-    /// CHECK-DAG:              InvokeStaticOrDirect [<<Obj>>] method_name:java.lang.Object.<init>
+    // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+    /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>{{(,[ij]\d+)?}}] method_name:java.lang.Object.<init>
 
     /// CHECK-START: java.lang.Object Main.newObject() inliner (after)
     /// CHECK-NOT:                      InvokeStaticOrDirect
diff --git a/test/570-checker-select/expected.txt b/test/570-checker-select/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/570-checker-select/expected.txt
diff --git a/test/570-checker-select/info.txt b/test/570-checker-select/info.txt
new file mode 100644
index 0000000..6d49532
--- /dev/null
+++ b/test/570-checker-select/info.txt
@@ -0,0 +1 @@
+Tests for HSelect codegens.
diff --git a/test/570-checker-select/src/Main.java b/test/570-checker-select/src/Main.java
new file mode 100644
index 0000000..2f8094d
--- /dev/null
+++ b/test/570-checker-select/src/Main.java
@@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: int Main.BoolCond_IntVarVar(boolean, int, int) register (after)
+  /// CHECK:               Select [{{i\d+}},{{i\d+}},{{z\d+}}]
+
+  public static int BoolCond_IntVarVar(boolean cond, int x, int y) {
+    return cond ? x : y;
+  }
+
+  /// CHECK-START: int Main.BoolCond_IntVarCst(boolean, int) register (after)
+  /// CHECK:               Select [{{i\d+}},{{i\d+}},{{z\d+}}]
+
+  public static int BoolCond_IntVarCst(boolean cond, int x) {
+    return cond ? x : 1;
+  }
+
+  /// CHECK-START: int Main.BoolCond_IntCstVar(boolean, int) register (after)
+  /// CHECK:               Select [{{i\d+}},{{i\d+}},{{z\d+}}]
+
+  public static int BoolCond_IntCstVar(boolean cond, int y) {
+    return cond ? 1 : y;
+  }
+
+  /// CHECK-START: float Main.BoolCond_FloatVarVar(boolean, float, float) register (after)
+  /// CHECK:               Select [{{f\d+}},{{f\d+}},{{z\d+}}]
+
+  public static float BoolCond_FloatVarVar(boolean cond, float x, float y) {
+    return cond ? x : y;
+  }
+
+  /// CHECK-START: float Main.BoolCond_FloatVarCst(boolean, float) register (after)
+  /// CHECK:               Select [{{f\d+}},{{f\d+}},{{z\d+}}]
+
+  public static float BoolCond_FloatVarCst(boolean cond, float x) {
+    return cond ? x : 1.0f;
+  }
+
+  /// CHECK-START: float Main.BoolCond_FloatCstVar(boolean, float) register (after)
+  /// CHECK:               Select [{{f\d+}},{{f\d+}},{{z\d+}}]
+
+  public static float BoolCond_FloatCstVar(boolean cond, float y) {
+    return cond ? 1.0f : y;
+  }
+
+  /// CHECK-START: int Main.IntNonmatCond_IntVarVar(int, int, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
+
+  public static int IntNonmatCond_IntVarVar(int a, int b, int x, int y) {
+    return a > b ? x : y;
+  }
+
+  /// CHECK-START: int Main.IntMatCond_IntVarVar(int, int, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:       <<Sel:i\d+>>  Select [{{i\d+}},{{i\d+}},{{z\d+}}]
+  /// CHECK-NEXT:                     Add [<<Cond>>,<<Sel>>]
+
+  public static int IntMatCond_IntVarVar(int a, int b, int x, int y) {
+    int result = (a > b ? x : y);
+    return result + (a > b ? 0 : 1);
+  }
+
+  /// CHECK-START: int Main.FloatLtNonmatCond_IntVarVar(float, float, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{f\d+}},{{f\d+}}]
+  /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
+
+  public static int FloatLtNonmatCond_IntVarVar(float a, float b, int x, int y) {
+    return a > b ? x : y;
+  }
+
+  /// CHECK-START: int Main.FloatGtNonmatCond_IntVarVar(float, float, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> GreaterThanOrEqual [{{f\d+}},{{f\d+}}]
+  /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
+
+  public static int FloatGtNonmatCond_IntVarVar(float a, float b, int x, int y) {
+    return a < b ? x : y;
+  }
+
+  /// CHECK-START: float Main.FloatGtNonmatCond_FloatVarVar(float, float, float, float) register (after)
+  /// CHECK:            <<Cond:z\d+>> GreaterThanOrEqual [{{f\d+}},{{f\d+}}]
+  /// CHECK-NEXT:                     Select [{{f\d+}},{{f\d+}},<<Cond>>]
+
+  public static float FloatGtNonmatCond_FloatVarVar(float a, float b, float x, float y) {
+    return a < b ? x : y;
+  }
+
+  /// CHECK-START: int Main.FloatLtMatCond_IntVarVar(float, float, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{f\d+}},{{f\d+}}]
+  /// CHECK-NEXT:       <<Sel:i\d+>>  Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK-NEXT:                     Add [<<Cond>>,<<Sel>>]
+
+  public static int FloatLtMatCond_IntVarVar(float a, float b, int x, int y) {
+    int result = (a > b ? x : y);
+    return result + (a > b ? 0 : 1);
+  }
+
+  /// CHECK-START: int Main.FloatGtMatCond_IntVarVar(float, float, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> GreaterThanOrEqual [{{f\d+}},{{f\d+}}]
+  /// CHECK-NEXT:       <<Sel:i\d+>>  Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK-NEXT:                     Add [<<Cond>>,<<Sel>>]
+
+  public static int FloatGtMatCond_IntVarVar(float a, float b, int x, int y) {
+    int result = (a < b ? x : y);
+    return result + (a < b ? 0 : 1);
+  }
+
+  /// CHECK-START: float Main.FloatGtMatCond_FloatVarVar(float, float, float, float) register (after)
+  /// CHECK:            <<Cond:z\d+>> GreaterThanOrEqual
+  /// CHECK-NEXT:       <<Sel:f\d+>>  Select [{{f\d+}},{{f\d+}},<<Cond>>]
+  /// CHECK-NEXT:                     TypeConversion [<<Cond>>]
+
+  public static float FloatGtMatCond_FloatVarVar(float a, float b, float x, float y) {
+    float result = (a < b ? x : y);
+    return result + (a < b ? 0 : 1);
+  }
+
+  public static void assertEqual(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Assertion failed: " + expected + " != " + actual);
+    }
+  }
+
+  public static void assertEqual(float expected, float actual) {
+    if (expected != actual) {
+      throw new Error("Assertion failed: " + expected + " != " + actual);
+    }
+  }
+
+  public static void main(String[] args) {
+    assertEqual(5, BoolCond_IntVarVar(true, 5, 7));
+    assertEqual(7, BoolCond_IntVarVar(false, 5, 7));
+    assertEqual(5, BoolCond_IntVarCst(true, 5));
+    assertEqual(1, BoolCond_IntVarCst(false, 5));
+    assertEqual(1, BoolCond_IntCstVar(true, 7));
+    assertEqual(7, BoolCond_IntCstVar(false, 7));
+
+    assertEqual(5, BoolCond_FloatVarVar(true, 5, 7));
+    assertEqual(7, BoolCond_FloatVarVar(false, 5, 7));
+    assertEqual(5, BoolCond_FloatVarCst(true, 5));
+    assertEqual(1, BoolCond_FloatVarCst(false, 5));
+    assertEqual(1, BoolCond_FloatCstVar(true, 7));
+    assertEqual(7, BoolCond_FloatCstVar(false, 7));
+
+    assertEqual(5, IntNonmatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(7, IntNonmatCond_IntVarVar(2, 3, 5, 7));
+    assertEqual(5, IntMatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(8, IntMatCond_IntVarVar(2, 3, 5, 7));
+
+    assertEqual(5, FloatLtNonmatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(7, FloatLtNonmatCond_IntVarVar(2, 3, 5, 7));
+    assertEqual(7, FloatLtNonmatCond_IntVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(7, FloatLtNonmatCond_IntVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(5, FloatGtNonmatCond_IntVarVar(2, 3, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_IntVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_IntVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(5, FloatGtNonmatCond_FloatVarVar(2, 3, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_FloatVarVar(3, 2, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_FloatVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_FloatVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(5, FloatLtMatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(8, FloatLtMatCond_IntVarVar(2, 3, 5, 7));
+    assertEqual(8, FloatLtMatCond_IntVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(8, FloatLtMatCond_IntVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(5, FloatGtMatCond_IntVarVar(2, 3, 5, 7));
+    assertEqual(8, FloatGtMatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(8, FloatGtMatCond_IntVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(8, FloatGtMatCond_IntVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(5, FloatGtMatCond_FloatVarVar(2, 3, 5, 7));
+    assertEqual(8, FloatGtMatCond_FloatVarVar(3, 2, 5, 7));
+    assertEqual(8, FloatGtMatCond_FloatVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(8, FloatGtMatCond_FloatVarVar(2, Float.NaN, 5, 7));
+  }
+}