Merge "ART: Add ThreadStart & ThreadEnd"
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index d8b780a..c87075f 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -87,7 +87,7 @@
 ART_GTEST_dex2oat_environment_tests_DEX_DEPS := Main MainStripped MultiDex MultiDexModifiedSecondary Nested
 
 ART_GTEST_atomic_method_ref_map_test_DEX_DEPS := Interfaces
-ART_GTEST_class_linker_test_DEX_DEPS := ErroneousA ErroneousB Interfaces MethodTypes MultiDex MyClass Nested Statics StaticsFromCode
+ART_GTEST_class_linker_test_DEX_DEPS := AllFields ErroneousA ErroneousB Interfaces MethodTypes MultiDex MyClass Nested Statics StaticsFromCode
 ART_GTEST_class_table_test_DEX_DEPS := XandY
 ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods ProfileTestMultiDex
 ART_GTEST_dex_cache_test_DEX_DEPS := Main Packages MethodTypes
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 4109345..459aca3 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -1166,9 +1166,9 @@
       // belongs.
       oat_index = GetOatIndexForDexCache(dex_cache);
       ImageInfo& image_info = GetImageInfo(oat_index);
-      {
-        // Note: This table is only accessed from the image writer, avoid locking to prevent lock
-        // order violations from root visiting.
+      if (!compile_app_image_) {
+        // Note: Avoid locking to prevent lock order violations from root visiting;
+        // image_info.class_table_ is only accessed from the image writer.
         image_info.class_table_->InsertWithoutLocks(as_klass);
       }
       for (LengthPrefixedArray<ArtField>* cur_fields : fields) {
@@ -1265,7 +1265,14 @@
       // class loader.
       mirror::ClassLoader* class_loader = obj->AsClassLoader();
       if (class_loader->GetClassTable() != nullptr) {
+        DCHECK(compile_app_image_);
+        DCHECK(class_loaders_.empty());
         class_loaders_.insert(class_loader);
+        ImageInfo& image_info = GetImageInfo(oat_index);
+        // Note: Avoid locking to prevent lock order violations from root visiting;
+        // image_info.class_table_ table is only accessed from the image writer
+        // and class_loader->GetClassTable() is iterated but not modified.
+        image_info.class_table_->CopyWithoutLocks(*class_loader->GetClassTable());
       }
     }
     AssignImageBinSlot(obj, oat_index);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index b566334..2a30178 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -7151,18 +7151,7 @@
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
-  HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
-  // We disable pc-relative load when there is an irreducible loop, as the optimization
-  // is incompatible with it.
-  // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods
-  // with irreducible loops.
-  if (GetGraph()->HasIrreducibleLoops() &&
-      (dispatch_info.method_load_kind ==
-          HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative)) {
-    dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod;
-  }
-
-  return dispatch_info;
+  return desired_dispatch_info;
 }
 
 Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
@@ -7182,8 +7171,7 @@
   // save one load. However, since this is just an intrinsic slow path we prefer this
   // simple and more robust approach rather that trying to determine if that's the case.
   SlowPathCode* slow_path = GetCurrentSlowPath();
-  DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
-  if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+  if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
     int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
     __ LoadFromOffset(kLoadWord, temp, SP, stack_offset);
     return temp;
@@ -7191,7 +7179,8 @@
   return location.AsRegister<Register>();
 }
 
-void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+Location CodeGeneratorARM::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                                                  Location temp) {
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   switch (invoke->GetMethodLoadKind()) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
@@ -7240,6 +7229,11 @@
       break;
     }
   }
+  return callee_method;
+}
+
+void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+  Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 52d1857..df2dbc7 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -456,6 +456,7 @@
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       HInvokeStaticOrDirect* invoke) OVERRIDE;
 
+  Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 68d0b86..d4dcbc0 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -3993,7 +3993,8 @@
   return desired_dispatch_info;
 }
 
-void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+Location CodeGeneratorARM64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                                                    Location temp) {
   // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   switch (invoke->GetMethodLoadKind()) {
@@ -4047,6 +4048,12 @@
       break;
     }
   }
+  return callee_method;
+}
+
+void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+  // All registers are assumed to be correctly set up.
+  Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index a9dca92..7d3c655 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -527,6 +527,7 @@
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       HInvokeStaticOrDirect* invoke) OVERRIDE;
 
+  Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 592ee5a..6c66f8f 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -7233,18 +7233,7 @@
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch(
     const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
     HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
-  HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
-  // We disable pc-relative load when there is an irreducible loop, as the optimization
-  // is incompatible with it.
-  // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods
-  // with irreducible loops.
-  if (GetGraph()->HasIrreducibleLoops() &&
-      (dispatch_info.method_load_kind ==
-          HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative)) {
-    dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod;
-  }
-
-  return dispatch_info;
+  return desired_dispatch_info;
 }
 
 vixl32::Register CodeGeneratorARMVIXL::GetInvokeStaticOrDirectExtraParameter(
@@ -7273,7 +7262,7 @@
   return RegisterFrom(location);
 }
 
-void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
+Location CodeGeneratorARMVIXL::GenerateCalleeMethodStaticOrDirectCall(
     HInvokeStaticOrDirect* invoke, Location temp) {
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   switch (invoke->GetMethodLoadKind()) {
@@ -7324,6 +7313,12 @@
       break;
     }
   }
+  return callee_method;
+}
+
+void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                                      Location temp) {
+  Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index be65353..8ae3b7d 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -537,6 +537,7 @@
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       HInvokeStaticOrDirect* invoke) OVERRIDE;
 
+  Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc
index 9ddcd56..cfcb276 100644
--- a/compiler/optimizing/dex_cache_array_fixups_arm.cc
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc
@@ -65,7 +65,7 @@
     if (invoke->HasPcRelativeDexCache() &&
         !IsCallFreeIntrinsic<IntrinsicLocationsBuilderARMType>(invoke, codegen_)) {
       HArmDexCacheArraysBase* base =
-          GetOrCreateDexCacheArrayBase(invoke->GetDexFileForPcRelativeDexCache());
+          GetOrCreateDexCacheArrayBase(invoke, invoke->GetDexFileForPcRelativeDexCache());
       // Update the element offset in base.
       DexCacheArraysLayout layout(kArmPointerSize, &invoke->GetDexFileForPcRelativeDexCache());
       base->UpdateElementOffset(layout.MethodOffset(invoke->GetDexMethodIndex()));
@@ -75,21 +75,28 @@
     }
   }
 
-  HArmDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) {
-    // Ensure we only initialize the pointer once for each dex file.
-    auto lb = dex_cache_array_bases_.lower_bound(&dex_file);
-    if (lb != dex_cache_array_bases_.end() &&
-        !dex_cache_array_bases_.key_comp()(&dex_file, lb->first)) {
-      return lb->second;
-    }
+  HArmDexCacheArraysBase* GetOrCreateDexCacheArrayBase(HInstruction* cursor,
+                                                       const DexFile& dex_file) {
+    if (GetGraph()->HasIrreducibleLoops()) {
+      HArmDexCacheArraysBase* base = new (GetGraph()->GetArena()) HArmDexCacheArraysBase(dex_file);
+      cursor->GetBlock()->InsertInstructionBefore(base, cursor);
+      return base;
+    } else {
+      // Ensure we only initialize the pointer once for each dex file.
+      auto lb = dex_cache_array_bases_.lower_bound(&dex_file);
+      if (lb != dex_cache_array_bases_.end() &&
+          !dex_cache_array_bases_.key_comp()(&dex_file, lb->first)) {
+        return lb->second;
+      }
 
-    // Insert the base at the start of the entry block, move it to a better
-    // position later in MoveBaseIfNeeded().
-    HArmDexCacheArraysBase* base = new (GetGraph()->GetArena()) HArmDexCacheArraysBase(dex_file);
-    HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
-    entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction());
-    dex_cache_array_bases_.PutBefore(lb, &dex_file, base);
-    return base;
+      // Insert the base at the start of the entry block, move it to a better
+      // position later in MoveBaseIfNeeded().
+      HArmDexCacheArraysBase* base = new (GetGraph()->GetArena()) HArmDexCacheArraysBase(dex_file);
+      HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+      entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction());
+      dex_cache_array_bases_.PutBefore(lb, &dex_file, base);
+      return base;
+    }
   }
 
   CodeGeneratorARMType* codegen_;
@@ -100,11 +107,6 @@
 };
 
 void DexCacheArrayFixups::Run() {
-  if (graph_->HasIrreducibleLoops()) {
-    // Do not run this optimization, as irreducible loops do not work with an instruction
-    // that can be live-in at the irreducible loop header.
-    return;
-  }
   DexCacheArrayFixupsVisitor visitor(graph_, codegen_);
   visitor.VisitInsertionOrder();
   visitor.MoveBasesIfNeeded();
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 8f64fae..c262cf9 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -2592,6 +2592,58 @@
   __ Lsr(out, out, 5);
 }
 
+void IntrinsicLocationsBuilderARM::VisitReferenceGetReferent(HInvoke* invoke) {
+  if (kEmitCompilerReadBarrier) {
+    // Do not intrinsify this call with the read barrier configuration.
+    return;
+  }
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM::VisitReferenceGetReferent(HInvoke* invoke) {
+  DCHECK(!kEmitCompilerReadBarrier);
+  ArmAssembler* const assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register obj = locations->InAt(0).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // Load ArtMethod first.
+  HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
+  DCHECK(invoke_direct != nullptr);
+  Register temp = codegen_->GenerateCalleeMethodStaticOrDirectCall(
+      invoke_direct, locations->GetTemp(0)).AsRegister<Register>();
+
+  // Now get declaring class.
+  __ ldr(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
+
+  uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
+  uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
+  DCHECK_NE(slow_path_flag_offset, 0u);
+  DCHECK_NE(disable_flag_offset, 0u);
+  DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
+
+  // Check static flags that prevent using intrinsic.
+  __ ldr(IP, Address(temp, disable_flag_offset));
+  __ ldr(temp, Address(temp, slow_path_flag_offset));
+  __ orr(IP, IP, ShifterOperand(temp));
+  __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
+
+  // Fast path.
+  __ ldr(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ MaybeUnpoisonHeapReference(out);
+  __ Bind(slow_path->GetExitLabel());
+}
+
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinFloatFloat)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMaxDoubleDouble)
@@ -2605,7 +2657,6 @@
 UNIMPLEMENTED_INTRINSIC(ARM, MathRoundFloat)    // Could be done by changing rounding mode, maybe?
 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeCASLong)     // High register pressure.
 UNIMPLEMENTED_INTRINSIC(ARM, SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(ARM, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(ARM, IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM, LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index d8a896e..bbf826c 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -2773,7 +2773,65 @@
   GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
 }
 
-UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
+void IntrinsicLocationsBuilderARM64::VisitReferenceGetReferent(HInvoke* invoke) {
+  if (kEmitCompilerReadBarrier) {
+    // Do not intrinsify this call with the read barrier configuration.
+    return;
+  }
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
+  DCHECK(!kEmitCompilerReadBarrier);
+  MacroAssembler* masm = GetVIXLAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register obj = InputRegisterAt(invoke, 0);
+  Register out = OutputRegister(invoke);
+
+  SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // Load ArtMethod first.
+  HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
+  DCHECK(invoke_direct != nullptr);
+  Register temp0 = XRegisterFrom(codegen_->GenerateCalleeMethodStaticOrDirectCall(
+                                 invoke_direct, locations->GetTemp(0)));
+
+  // Now get declaring class.
+  __ Ldr(temp0.W(), MemOperand(temp0, ArtMethod::DeclaringClassOffset().Int32Value()));
+
+  uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
+  uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
+  DCHECK_NE(slow_path_flag_offset, 0u);
+  DCHECK_NE(disable_flag_offset, 0u);
+  DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
+
+  // Check static flags that prevent using intrinsic.
+  if (slow_path_flag_offset == disable_flag_offset + 1) {
+    // Load two adjacent flags in one 64-bit load.
+    __ Ldr(temp0, MemOperand(temp0, disable_flag_offset));
+  } else {
+    UseScratchRegisterScope temps(masm);
+    Register temp1 = temps.AcquireW();
+    __ Ldr(temp1.W(), MemOperand(temp0, disable_flag_offset));
+    __ Ldr(temp0.W(), MemOperand(temp0, slow_path_flag_offset));
+    __ Orr(temp0, temp1, temp0);
+  }
+  __ Cbnz(temp0, slow_path->GetEntryLabel());
+
+  // Fast path.
+  __ Ldr(out, HeapOperand(obj, mirror::Reference::ReferentOffset().Int32Value()));
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
+  __ Bind(slow_path->GetExitLabel());
+}
+
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 85e84d8..68c2d2e 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -2688,6 +2688,60 @@
   __ Lsr(out, out, 5);
 }
 
+void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
+  if (kEmitCompilerReadBarrier) {
+    // Do not intrinsify this call with the read barrier configuration.
+    return;
+  }
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
+  DCHECK(!kEmitCompilerReadBarrier);
+  ArmVIXLAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  vixl32::Register obj = InputRegisterAt(invoke, 0);
+  vixl32::Register out = OutputRegister(invoke);
+
+  SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // Load ArtMethod first.
+  HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
+  DCHECK(invoke_direct != nullptr);
+  vixl32::Register temp0 = RegisterFrom(codegen_->GenerateCalleeMethodStaticOrDirectCall(
+      invoke_direct, locations->GetTemp(0)));
+
+  // Now get declaring class.
+  __ Ldr(temp0, MemOperand(temp0, ArtMethod::DeclaringClassOffset().Int32Value()));
+
+  uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
+  uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
+  DCHECK_NE(slow_path_flag_offset, 0u);
+  DCHECK_NE(disable_flag_offset, 0u);
+  DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
+
+  // Check static flags that prevent using intrinsic.
+  UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+  vixl32::Register temp1 = temps.Acquire();
+  __ Ldr(temp1, MemOperand(temp0, disable_flag_offset));
+  __ Ldr(temp0, MemOperand(temp0, slow_path_flag_offset));
+  __ Orr(temp0, temp1, temp0);
+  __ CompareAndBranchIfNonZero(temp0, slow_path->GetEntryLabel());
+
+  // Fast path.
+  __ Ldr(out, MemOperand(obj, mirror::Reference::ReferentOffset().Int32Value()));
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  assembler->MaybeUnpoisonHeapReference(out);
+  __ Bind(slow_path->GetExitLabel());
+}
+
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinDoubleDouble)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinFloatFloat)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxDoubleDouble)
@@ -2701,7 +2755,6 @@
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat)    // Could be done by changing rounding mode, maybe?
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong)     // High register pressure.
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit)
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index f2cbebb..74b8f06 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -283,6 +283,38 @@
 // FP Operations //
 ///////////////////
 
+TEST_F(AssemblerMIPS64Test, AddS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::AddS, "add.s ${reg1}, ${reg2}, ${reg3}"), "add.s");
+}
+
+TEST_F(AssemblerMIPS64Test, AddD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::AddD, "add.d ${reg1}, ${reg2}, ${reg3}"), "add.d");
+}
+
+TEST_F(AssemblerMIPS64Test, SubS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::SubS, "sub.s ${reg1}, ${reg2}, ${reg3}"), "sub.s");
+}
+
+TEST_F(AssemblerMIPS64Test, SubD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::SubD, "sub.d ${reg1}, ${reg2}, ${reg3}"), "sub.d");
+}
+
+TEST_F(AssemblerMIPS64Test, MulS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::MulS, "mul.s ${reg1}, ${reg2}, ${reg3}"), "mul.s");
+}
+
+TEST_F(AssemblerMIPS64Test, MulD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::MulD, "mul.d ${reg1}, ${reg2}, ${reg3}"), "mul.d");
+}
+
+TEST_F(AssemblerMIPS64Test, DivS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::DivS, "div.s ${reg1}, ${reg2}, ${reg3}"), "div.s");
+}
+
+TEST_F(AssemblerMIPS64Test, DivD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::DivD, "div.d ${reg1}, ${reg2}, ${reg3}"), "div.d");
+}
+
 TEST_F(AssemblerMIPS64Test, SqrtS) {
   DriverStr(RepeatFF(&mips64::Mips64Assembler::SqrtS, "sqrt.s ${reg1}, ${reg2}"), "sqrt.s");
 }
@@ -567,6 +599,26 @@
   DriverStr(RepeatRF(&mips64::Mips64Assembler::Dmtc1, "dmtc1 ${reg1}, ${reg2}"), "Dmtc1");
 }
 
+TEST_F(AssemblerMIPS64Test, Lwc1) {
+  DriverStr(RepeatFRIb(&mips64::Mips64Assembler::Lwc1, -16, "lwc1 ${reg1}, {imm}(${reg2})"),
+            "lwc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Ldc1) {
+  DriverStr(RepeatFRIb(&mips64::Mips64Assembler::Ldc1, -16, "ldc1 ${reg1}, {imm}(${reg2})"),
+            "ldc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Swc1) {
+  DriverStr(RepeatFRIb(&mips64::Mips64Assembler::Swc1, -16, "swc1 ${reg1}, {imm}(${reg2})"),
+            "swc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Sdc1) {
+  DriverStr(RepeatFRIb(&mips64::Mips64Assembler::Sdc1, -16, "sdc1 ${reg1}, {imm}(${reg2})"),
+            "sdc1");
+}
+
 ////////////////
 // CALL / JMP //
 ////////////////
@@ -850,6 +902,16 @@
   DriverStr(RepeatRIb(&mips64::Mips64Assembler::Ldpc, 18, code), "Ldpc");
 }
 
+TEST_F(AssemblerMIPS64Test, Auipc) {
+  DriverStr(RepeatRIb(&mips64::Mips64Assembler::Auipc, 16, "auipc ${reg}, {imm}"), "Auipc");
+}
+
+TEST_F(AssemblerMIPS64Test, Addiupc) {
+  // The comment from the Lwpc() test applies to this Addiupc() test as well.
+  const char* code = ".set imm, {imm}\naddiupc ${reg}, (imm - ((imm & 0x40000) << 1)) << 2";
+  DriverStr(RepeatRIb(&mips64::Mips64Assembler::Addiupc, 19, code), "Addiupc");
+}
+
 TEST_F(AssemblerMIPS64Test, LoadFarthestNearLabelAddress) {
   mips64::Mips64Label label;
   __ LoadLabelAddress(mips64::V0, &label);
@@ -1079,6 +1141,188 @@
   EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (5 + kAdduCount) * 4);
 }
 
+TEST_F(AssemblerMIPS64Test, Addu) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Addu, "addu ${reg1}, ${reg2}, ${reg3}"), "addu");
+}
+
+TEST_F(AssemblerMIPS64Test, Addiu) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Addiu, -16, "addiu ${reg1}, ${reg2}, {imm}"),
+            "addiu");
+}
+
+TEST_F(AssemblerMIPS64Test, Daddu) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Daddu, "daddu ${reg1}, ${reg2}, ${reg3}"), "daddu");
+}
+
+TEST_F(AssemblerMIPS64Test, Daddiu) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Daddiu, -16, "daddiu ${reg1}, ${reg2}, {imm}"),
+            "daddiu");
+}
+
+TEST_F(AssemblerMIPS64Test, Subu) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Subu, "subu ${reg1}, ${reg2}, ${reg3}"), "subu");
+}
+
+TEST_F(AssemblerMIPS64Test, Dsubu) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Dsubu, "dsubu ${reg1}, ${reg2}, ${reg3}"), "dsubu");
+}
+
+TEST_F(AssemblerMIPS64Test, MulR6) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::MulR6, "mul ${reg1}, ${reg2}, ${reg3}"), "mulR6");
+}
+
+TEST_F(AssemblerMIPS64Test, DivR6) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::DivR6, "div ${reg1}, ${reg2}, ${reg3}"), "divR6");
+}
+
+TEST_F(AssemblerMIPS64Test, ModR6) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::ModR6, "mod ${reg1}, ${reg2}, ${reg3}"), "modR6");
+}
+
+TEST_F(AssemblerMIPS64Test, DivuR6) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::DivuR6, "divu ${reg1}, ${reg2}, ${reg3}"),
+            "divuR6");
+}
+
+TEST_F(AssemblerMIPS64Test, ModuR6) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::ModuR6, "modu ${reg1}, ${reg2}, ${reg3}"),
+            "moduR6");
+}
+
+TEST_F(AssemblerMIPS64Test, Dmul) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Dmul, "dmul ${reg1}, ${reg2}, ${reg3}"), "dmul");
+}
+
+TEST_F(AssemblerMIPS64Test, Ddiv) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Ddiv, "ddiv ${reg1}, ${reg2}, ${reg3}"), "ddiv");
+}
+
+TEST_F(AssemblerMIPS64Test, Dmod) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Dmod, "dmod ${reg1}, ${reg2}, ${reg3}"), "dmod");
+}
+
+TEST_F(AssemblerMIPS64Test, Ddivu) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Ddivu, "ddivu ${reg1}, ${reg2}, ${reg3}"), "ddivu");
+}
+
+TEST_F(AssemblerMIPS64Test, Dmodu) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Dmodu, "dmodu ${reg1}, ${reg2}, ${reg3}"), "dmodu");
+}
+
+TEST_F(AssemblerMIPS64Test, And) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::And, "and ${reg1}, ${reg2}, ${reg3}"), "and");
+}
+
+TEST_F(AssemblerMIPS64Test, Andi) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Andi, 16, "andi ${reg1}, ${reg2}, {imm}"), "andi");
+}
+
+TEST_F(AssemblerMIPS64Test, Or) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Or, "or ${reg1}, ${reg2}, ${reg3}"), "or");
+}
+
+TEST_F(AssemblerMIPS64Test, Ori) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Ori, 16, "ori ${reg1}, ${reg2}, {imm}"), "ori");
+}
+
+TEST_F(AssemblerMIPS64Test, Xor) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Xor, "xor ${reg1}, ${reg2}, ${reg3}"), "xor");
+}
+
+TEST_F(AssemblerMIPS64Test, Xori) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Xori, 16, "xori ${reg1}, ${reg2}, {imm}"), "xori");
+}
+
+TEST_F(AssemblerMIPS64Test, Nor) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Nor, "nor ${reg1}, ${reg2}, ${reg3}"), "nor");
+}
+
+TEST_F(AssemblerMIPS64Test, Lb) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Lb, -16, "lb ${reg1}, {imm}(${reg2})"), "lb");
+}
+
+TEST_F(AssemblerMIPS64Test, Lh) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Lh, -16, "lh ${reg1}, {imm}(${reg2})"), "lh");
+}
+
+TEST_F(AssemblerMIPS64Test, Lw) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Lw, -16, "lw ${reg1}, {imm}(${reg2})"), "lw");
+}
+
+TEST_F(AssemblerMIPS64Test, Ld) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Ld, -16, "ld ${reg1}, {imm}(${reg2})"), "ld");
+}
+
+TEST_F(AssemblerMIPS64Test, Lbu) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Lbu, -16, "lbu ${reg1}, {imm}(${reg2})"), "lbu");
+}
+
+TEST_F(AssemblerMIPS64Test, Lhu) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Lhu, -16, "lhu ${reg1}, {imm}(${reg2})"), "lhu");
+}
+
+TEST_F(AssemblerMIPS64Test, Lwu) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Lwu, -16, "lwu ${reg1}, {imm}(${reg2})"), "lwu");
+}
+
+TEST_F(AssemblerMIPS64Test, Lui) {
+  DriverStr(RepeatRIb(&mips64::Mips64Assembler::Lui, 16, "lui ${reg}, {imm}"), "lui");
+}
+
+TEST_F(AssemblerMIPS64Test, Dahi) {
+  DriverStr(RepeatRIb(&mips64::Mips64Assembler::Dahi, 16, "dahi ${reg}, ${reg}, {imm}"), "dahi");
+}
+
+TEST_F(AssemblerMIPS64Test, Dati) {
+  DriverStr(RepeatRIb(&mips64::Mips64Assembler::Dati, 16, "dati ${reg}, ${reg}, {imm}"), "dati");
+}
+
+TEST_F(AssemblerMIPS64Test, Sb) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Sb, -16, "sb ${reg1}, {imm}(${reg2})"), "sb");
+}
+
+TEST_F(AssemblerMIPS64Test, Sh) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Sh, -16, "sh ${reg1}, {imm}(${reg2})"), "sh");
+}
+
+TEST_F(AssemblerMIPS64Test, Sw) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Sw, -16, "sw ${reg1}, {imm}(${reg2})"), "sw");
+}
+
+TEST_F(AssemblerMIPS64Test, Sd) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Sd, -16, "sd ${reg1}, {imm}(${reg2})"), "sd");
+}
+
+TEST_F(AssemblerMIPS64Test, Slt) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Slt, "slt ${reg1}, ${reg2}, ${reg3}"), "slt");
+}
+
+TEST_F(AssemblerMIPS64Test, Sltu) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Sltu, "sltu ${reg1}, ${reg2}, ${reg3}"), "sltu");
+}
+
+TEST_F(AssemblerMIPS64Test, Slti) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Slti, -16, "slti ${reg1}, ${reg2}, {imm}"),
+            "slti");
+}
+
+TEST_F(AssemblerMIPS64Test, Sltiu) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Sltiu, -16, "sltiu ${reg1}, ${reg2}, {imm}"),
+            "sltiu");
+}
+
+TEST_F(AssemblerMIPS64Test, Move) {
+  DriverStr(RepeatRR(&mips64::Mips64Assembler::Move, "or ${reg1}, ${reg2}, $zero"), "move");
+}
+
+TEST_F(AssemblerMIPS64Test, Clear) {
+  DriverStr(RepeatR(&mips64::Mips64Assembler::Clear, "or ${reg}, $zero, $zero"), "clear");
+}
+
+TEST_F(AssemblerMIPS64Test, Not) {
+  DriverStr(RepeatRR(&mips64::Mips64Assembler::Not, "nor ${reg1}, ${reg2}, $zero"), "not");
+}
+
 TEST_F(AssemblerMIPS64Test, Bitswap) {
   DriverStr(RepeatRR(&mips64::Mips64Assembler::Bitswap, "bitswap ${reg1}, ${reg2}"), "bitswap");
 }
@@ -1230,6 +1474,18 @@
             "dsra32");
 }
 
+TEST_F(AssemblerMIPS64Test, Dsllv) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Dsllv, "dsllv ${reg1}, ${reg2}, ${reg3}"), "dsllv");
+}
+
+TEST_F(AssemblerMIPS64Test, Dsrlv) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Dsrlv, "dsrlv ${reg1}, ${reg2}, ${reg3}"), "dsrlv");
+}
+
+TEST_F(AssemblerMIPS64Test, Dsrav) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Dsrav, "dsrav ${reg1}, ${reg2}, ${reg3}"), "dsrav");
+}
+
 TEST_F(AssemblerMIPS64Test, Sc) {
   DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Sc, -9, "sc ${reg1}, {imm}(${reg2})"), "sc");
 }
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 14918df..2e258be 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1398,7 +1398,11 @@
         class_loader_(class_loader) {}
 
   bool operator()(ObjPtr<mirror::Class> klass) const REQUIRES_SHARED(Locks::mutator_lock_) {
-    klass->SetClassLoader(class_loader_);
+    // Do not update class loader for boot image classes where the app image
+    // class loader is only the initiating loader but not the defining loader.
+    if (klass->GetClassLoader() != nullptr) {
+      klass->SetClassLoader(class_loader_);
+    }
     return true;
   }
 
@@ -2458,10 +2462,8 @@
     return EnsureResolved(self, descriptor, klass);
   }
   // Class is not yet loaded.
-  if (descriptor[0] == '[') {
-    return CreateArrayClass(self, descriptor, hash, class_loader);
-  } else if (class_loader.Get() == nullptr) {
-    // The boot class loader, search the boot class path.
+  if (descriptor[0] != '[' && class_loader.Get() == nullptr) {
+    // Non-array class and the boot class loader, search the boot class path.
     ClassPathEntry pair = FindInClassPath(descriptor, hash, boot_class_path_);
     if (pair.second != nullptr) {
       return DefineClass(self,
@@ -2474,14 +2476,21 @@
       // The boot class loader is searched ahead of the application class loader, failures are
       // expected and will be wrapped in a ClassNotFoundException. Use the pre-allocated error to
       // trigger the chaining with a proper stack trace.
-      ObjPtr<mirror::Throwable> pre_allocated = Runtime::Current()->GetPreAllocatedNoClassDefFoundError();
+      ObjPtr<mirror::Throwable> pre_allocated =
+          Runtime::Current()->GetPreAllocatedNoClassDefFoundError();
       self->SetException(pre_allocated);
       return nullptr;
     }
+  }
+  ObjPtr<mirror::Class> result_ptr;
+  bool descriptor_equals;
+  if (descriptor[0] == '[') {
+    result_ptr = CreateArrayClass(self, descriptor, hash, class_loader);
+    DCHECK_EQ(result_ptr == nullptr, self->IsExceptionPending());
+    DCHECK(result_ptr == nullptr || result_ptr->DescriptorEquals(descriptor));
+    descriptor_equals = true;
   } else {
     ScopedObjectAccessUnchecked soa(self);
-    ObjPtr<mirror::Class> result_ptr;
-    bool descriptor_equals;
     bool known_hierarchy =
         FindClassInBaseDexClassLoader(soa, self, descriptor, hash, class_loader, &result_ptr);
     if (result_ptr != nullptr) {
@@ -2525,16 +2534,7 @@
                                                  WellKnownClasses::java_lang_ClassLoader_loadClass,
                                                  class_name_object.get()));
       }
-      if (self->IsExceptionPending()) {
-        // If the ClassLoader threw, pass that exception up.
-        // However, to comply with the RI behavior, first check if another thread succeeded.
-        result_ptr = LookupClass(self, descriptor, hash, class_loader.Get());
-        if (result_ptr != nullptr && !result_ptr->IsErroneous()) {
-          self->ClearException();
-          return EnsureResolved(self, descriptor, result_ptr);
-        }
-        return nullptr;
-      } else if (result.get() == nullptr) {
+      if (result.get() == nullptr && !self->IsExceptionPending()) {
         // broken loader - throw NPE to be compatible with Dalvik
         ThrowNullPointerException(StringPrintf("ClassLoader.loadClass returned null for %s",
                                                class_name_string.c_str()).c_str());
@@ -2542,50 +2542,60 @@
       }
       result_ptr = soa.Decode<mirror::Class>(result.get());
       // Check the name of the returned class.
-      descriptor_equals = result_ptr->DescriptorEquals(descriptor);
+      descriptor_equals = (result_ptr != nullptr) && result_ptr->DescriptorEquals(descriptor);
     }
-
-    // Try to insert the class to the class table, checking for mismatch.
-    ObjPtr<mirror::Class> old;
-    {
-      WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-      ClassTable* const class_table = InsertClassTableForClassLoader(class_loader.Get());
-      old = class_table->Lookup(descriptor, hash);
-      if (old == nullptr) {
-        old = result_ptr;  // For the comparison below, after releasing the lock.
-        if (descriptor_equals) {
-          class_table->InsertWithHash(result_ptr.Ptr(), hash);
-          Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader.Get());
-        }  // else throw below, after releasing the lock.
-      }
-    }
-    if (UNLIKELY(old != result_ptr)) {
-      // Return `old` (even if `!descriptor_equals`) to mimic the RI behavior for parallel
-      // capable class loaders.  (All class loaders are considered parallel capable on Android.)
-      mirror::Class* loader_class = class_loader->GetClass();
-      const char* loader_class_name =
-          loader_class->GetDexFile().StringByTypeIdx(loader_class->GetDexTypeIndex());
-      LOG(WARNING) << "Initiating class loader of type " << DescriptorToDot(loader_class_name)
-          << " is not well-behaved; it returned a different Class for racing loadClass(\""
-          << DescriptorToDot(descriptor) << "\").";
-      return EnsureResolved(self, descriptor, old);
-    }
-    if (UNLIKELY(!descriptor_equals)) {
-      std::string result_storage;
-      const char* result_name = result_ptr->GetDescriptor(&result_storage);
-      std::string loader_storage;
-      const char* loader_class_name = class_loader->GetClass()->GetDescriptor(&loader_storage);
-      ThrowNoClassDefFoundError(
-          "Initiating class loader of type %s returned class %s instead of %s.",
-          DescriptorToDot(loader_class_name).c_str(),
-          DescriptorToDot(result_name).c_str(),
-          DescriptorToDot(descriptor).c_str());
-      return nullptr;
-    }
-    // success, return mirror::Class*
-    return result_ptr.Ptr();
   }
-  UNREACHABLE();
+
+  if (self->IsExceptionPending()) {
+    // If the ClassLoader threw or array class allocation failed, pass that exception up.
+    // However, to comply with the RI behavior, first check if another thread succeeded.
+    result_ptr = LookupClass(self, descriptor, hash, class_loader.Get());
+    if (result_ptr != nullptr && !result_ptr->IsErroneous()) {
+      self->ClearException();
+      return EnsureResolved(self, descriptor, result_ptr);
+    }
+    return nullptr;
+  }
+
+  // Try to insert the class to the class table, checking for mismatch.
+  ObjPtr<mirror::Class> old;
+  {
+    WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+    ClassTable* const class_table = InsertClassTableForClassLoader(class_loader.Get());
+    old = class_table->Lookup(descriptor, hash);
+    if (old == nullptr) {
+      old = result_ptr;  // For the comparison below, after releasing the lock.
+      if (descriptor_equals) {
+        class_table->InsertWithHash(result_ptr.Ptr(), hash);
+        Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader.Get());
+      }  // else throw below, after releasing the lock.
+    }
+  }
+  if (UNLIKELY(old != result_ptr)) {
+    // Return `old` (even if `!descriptor_equals`) to mimic the RI behavior for parallel
+    // capable class loaders.  (All class loaders are considered parallel capable on Android.)
+    mirror::Class* loader_class = class_loader->GetClass();
+    const char* loader_class_name =
+        loader_class->GetDexFile().StringByTypeIdx(loader_class->GetDexTypeIndex());
+    LOG(WARNING) << "Initiating class loader of type " << DescriptorToDot(loader_class_name)
+        << " is not well-behaved; it returned a different Class for racing loadClass(\""
+        << DescriptorToDot(descriptor) << "\").";
+    return EnsureResolved(self, descriptor, old);
+  }
+  if (UNLIKELY(!descriptor_equals)) {
+    std::string result_storage;
+    const char* result_name = result_ptr->GetDescriptor(&result_storage);
+    std::string loader_storage;
+    const char* loader_class_name = class_loader->GetClass()->GetDescriptor(&loader_storage);
+    ThrowNoClassDefFoundError(
+        "Initiating class loader of type %s returned class %s instead of %s.",
+        DescriptorToDot(loader_class_name).c_str(),
+        DescriptorToDot(result_name).c_str(),
+        DescriptorToDot(descriptor).c_str());
+    return nullptr;
+  }
+  // success, return mirror::Class*
+  return result_ptr.Ptr();
 }
 
 mirror::Class* ClassLinker::DefineClass(Thread* self,
@@ -3494,7 +3504,8 @@
   // class to the hash table --- necessary because of possible races with
   // other threads.)
   if (class_loader.Get() != component_type->GetClassLoader()) {
-    ObjPtr<mirror::Class> new_class = LookupClass(self, descriptor, hash, component_type->GetClassLoader());
+    ObjPtr<mirror::Class> new_class =
+        LookupClass(self, descriptor, hash, component_type->GetClassLoader());
     if (new_class != nullptr) {
       return new_class.Ptr();
     }
@@ -7712,7 +7723,7 @@
       type = LookupClass(self, descriptor, hash, class_loader.Ptr());
     }
   }
-  if (type != nullptr || type->IsResolved()) {
+  if (type != nullptr && type->IsResolved()) {
     return type.Ptr();
   }
   return nullptr;
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index d98daa5..7d4b158 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -906,6 +906,41 @@
       klass);
 }
 
+TEST_F(ClassLinkerTest, LookupResolvedTypeArray) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader>(LoadDex("AllFields"))));
+  // Get the AllFields class for the dex cache and dex file.
+  ObjPtr<mirror::Class> all_fields_klass
+      = class_linker_->FindClass(soa.Self(), "LAllFields;", class_loader);
+  ASSERT_OBJ_PTR_NE(all_fields_klass, ObjPtr<mirror::Class>(nullptr));
+  Handle<mirror::DexCache> dex_cache = hs.NewHandle(all_fields_klass->GetDexCache());
+  const DexFile& dex_file = *dex_cache->GetDexFile();
+  // Get the index of the array class we want to test.
+  const DexFile::TypeId* array_id = dex_file.FindTypeId("[Ljava/lang/Object;");
+  ASSERT_TRUE(array_id != nullptr);
+  dex::TypeIndex array_idx = dex_file.GetIndexForTypeId(*array_id);
+  // Check that the array class wasn't resolved yet.
+  EXPECT_OBJ_PTR_EQ(
+      class_linker_->LookupResolvedType(dex_file, array_idx, dex_cache.Get(), class_loader.Get()),
+      ObjPtr<mirror::Class>(nullptr));
+  // Resolve the array class we want to test.
+  ObjPtr<mirror::Class> array_klass
+      = class_linker_->FindClass(soa.Self(), "[Ljava/lang/Object;", class_loader);
+  ASSERT_OBJ_PTR_NE(array_klass, ObjPtr<mirror::Class>(nullptr));
+  // Test that LookupResolvedType() finds the array class.
+  EXPECT_OBJ_PTR_EQ(
+      class_linker_->LookupResolvedType(dex_file, array_idx, dex_cache.Get(), class_loader.Get()),
+      array_klass);
+  // Zero out the resolved type and make sure LookupResolvedType() still finds it.
+  dex_cache->SetResolvedType(array_idx, nullptr);
+  EXPECT_TRUE(dex_cache->GetResolvedType(array_idx) == nullptr);
+  EXPECT_OBJ_PTR_EQ(
+      class_linker_->LookupResolvedType(dex_file, array_idx, dex_cache.Get(), class_loader.Get()),
+      array_klass);
+}
+
 TEST_F(ClassLinkerTest, LibCore) {
   ScopedObjectAccess soa(Thread::Current());
   ASSERT_TRUE(java_lang_dex_file_ != nullptr);
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index 0f985c6..ff846a7 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -129,6 +129,19 @@
   classes_.back().InsertWithHash(TableSlot(klass, hash), hash);
 }
 
+void ClassTable::CopyWithoutLocks(const ClassTable& source_table) {
+  if (kIsDebugBuild) {
+    for (ClassSet& class_set : classes_) {
+      CHECK(class_set.Empty());
+    }
+  }
+  for (const ClassSet& class_set : source_table.classes_) {
+    for (const TableSlot& slot : class_set) {
+      classes_.back().Insert(slot);
+    }
+  }
+}
+
 void ClassTable::InsertWithoutLocks(ObjPtr<mirror::Class> klass) {
   const uint32_t hash = TableSlot::HashDescriptor(klass);
   classes_.back().InsertWithHash(TableSlot(klass, hash), hash);
diff --git a/runtime/class_table.h b/runtime/class_table.h
index f27d809..c8ec28e 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -240,6 +240,7 @@
   }
 
  private:
+  void CopyWithoutLocks(const ClassTable& source_table) NO_THREAD_SAFETY_ANALYSIS;
   void InsertWithoutLocks(ObjPtr<mirror::Class> klass) NO_THREAD_SAFETY_ANALYSIS;
 
   size_t CountDefiningLoaderClasses(ObjPtr<mirror::ClassLoader> defining_loader,
diff --git a/runtime/openjdkjvmti/events.cc b/runtime/openjdkjvmti/events.cc
index 7182055..d3f8001 100644
--- a/runtime/openjdkjvmti/events.cc
+++ b/runtime/openjdkjvmti/events.cc
@@ -212,7 +212,7 @@
                               thread.get(),
                               object.get(),
                               klass.get(),
-                              byte_count);
+                              static_cast<jlong>(byte_count));
     }
   }
 
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index a72159b..d1ad77c 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -302,6 +302,9 @@
           .IntoKey(M::Plugins)
       .Define("-Xfully-deoptable")
           .IntoKey(M::FullyDeoptable)
+      .Define("-XX:ThreadSuspendTimeout=_")  // in ms
+          .WithType<MillisecondsToNanoseconds>()  // store as ns
+          .IntoKey(M::ThreadSuspendTimeout)
       .Ignore({
           "-ea", "-da", "-enableassertions", "-disableassertions", "--runtime-arg", "-esa",
           "-dsa", "-enablesystemassertions", "-disablesystemassertions", "-Xrs", "-Xint:_",
@@ -724,6 +727,7 @@
   UsageMessage(stream, "  -XX:MaxSpinsBeforeThinLockInflation=integervalue\n");
   UsageMessage(stream, "  -XX:LongPauseLogThreshold=integervalue\n");
   UsageMessage(stream, "  -XX:LongGCLogThreshold=integervalue\n");
+  UsageMessage(stream, "  -XX:ThreadSuspendTimeout=integervalue\n");
   UsageMessage(stream, "  -XX:DumpGCPerformanceOnShutdown\n");
   UsageMessage(stream, "  -XX:DumpJITInfoOnShutdown\n");
   UsageMessage(stream, "  -XX:IgnoreMaxFootprint\n");
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 1218a98..06cd7ff 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1046,7 +1046,7 @@
 
   monitor_list_ = new MonitorList;
   monitor_pool_ = MonitorPool::Create();
-  thread_list_ = new ThreadList;
+  thread_list_ = new ThreadList(runtime_options.GetOrDefault(Opt::ThreadSuspendTimeout));
   intern_table_ = new InternTable;
 
   verify_ = runtime_options.GetOrDefault(Opt::Verify);
diff --git a/runtime/runtime_options.cc b/runtime/runtime_options.cc
index e75481c..aa14719 100644
--- a/runtime/runtime_options.cc
+++ b/runtime/runtime_options.cc
@@ -21,6 +21,7 @@
 #include "gc/heap.h"
 #include "monitor.h"
 #include "runtime.h"
+#include "thread_list.h"
 #include "trace.h"
 #include "utils.h"
 #include "debugger.h"
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index ecabf9a..749a36e 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -60,6 +60,8 @@
                                           LongPauseLogThreshold,          gc::Heap::kDefaultLongPauseLogThreshold)
 RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \
                                           LongGCLogThreshold,             gc::Heap::kDefaultLongGCLogThreshold)
+RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \
+                                          ThreadSuspendTimeout,           ThreadList::kDefaultThreadSuspendTimeout)
 RUNTIME_OPTIONS_KEY (Unit,                DumpGCPerformanceOnShutdown)
 RUNTIME_OPTIONS_KEY (Unit,                DumpJITInfoOnShutdown)
 RUNTIME_OPTIONS_KEY (Unit,                IgnoreMaxFootprint)
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index c5c7e2c..01c940e 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -57,7 +57,6 @@
 using android::base::StringPrintf;
 
 static constexpr uint64_t kLongThreadSuspendThreshold = MsToNs(5);
-static constexpr uint64_t kThreadSuspendTimeoutMs = 30 * 1000;  // 30s.
 // Use 0 since we want to yield to prevent blocking for an unpredictable amount of time.
 static constexpr useconds_t kThreadSuspendInitialSleepUs = 0;
 static constexpr useconds_t kThreadSuspendMaxYieldUs = 3000;
@@ -68,12 +67,13 @@
 // Turned off again. b/29248079
 static constexpr bool kDumpUnattachedThreadNativeStackForSigQuit = false;
 
-ThreadList::ThreadList()
+ThreadList::ThreadList(uint64_t thread_suspend_timeout_ns)
     : suspend_all_count_(0),
       debug_suspend_all_count_(0),
       unregistering_count_(0),
       suspend_all_historam_("suspend all histogram", 16, 64),
       long_suspend_(false),
+      thread_suspend_timeout_ns_(thread_suspend_timeout_ns),
       empty_checkpoint_barrier_(new Barrier(0)) {
   CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1, 0U)));
 }
@@ -554,12 +554,14 @@
     // Make sure this thread grabs exclusive access to the mutator lock and its protected data.
 #if HAVE_TIMED_RWLOCK
     while (true) {
-      if (Locks::mutator_lock_->ExclusiveLockWithTimeout(self, kThreadSuspendTimeoutMs, 0)) {
+      if (Locks::mutator_lock_->ExclusiveLockWithTimeout(self,
+                                                         NsToMs(thread_suspend_timeout_ns_),
+                                                         0)) {
         break;
       } else if (!long_suspend_) {
         // Reading long_suspend without the mutator lock is slightly racy, in some rare cases, this
         // could result in a thread suspend timeout.
-        // Timeout if we wait more than kThreadSuspendTimeoutMs seconds.
+        // Timeout if we wait more than thread_suspend_timeout_ns_ nanoseconds.
         UnsafeLogFatalForThreadSuspendAllTimeout();
       }
     }
@@ -653,7 +655,7 @@
   // is done with a timeout so that we can detect problems.
 #if ART_USE_FUTEXES
   timespec wait_timeout;
-  InitTimeSpec(false, CLOCK_MONOTONIC, kIsDebugBuild ? 50000 : 10000, 0, &wait_timeout);
+  InitTimeSpec(false, CLOCK_MONOTONIC, NsToMs(thread_suspend_timeout_ns_), 0, &wait_timeout);
 #endif
   const uint64_t start_time = NanoTime();
   while (true) {
@@ -863,7 +865,7 @@
           return thread;
         }
         const uint64_t total_delay = NanoTime() - start_time;
-        if (total_delay >= MsToNs(kThreadSuspendTimeoutMs)) {
+        if (total_delay >= thread_suspend_timeout_ns_) {
           ThreadSuspendByPeerWarning(self,
                                      ::android::base::FATAL,
                                      "Thread suspension timed out",
@@ -969,7 +971,7 @@
           return thread;
         }
         const uint64_t total_delay = NanoTime() - start_time;
-        if (total_delay >= MsToNs(kThreadSuspendTimeoutMs)) {
+        if (total_delay >= thread_suspend_timeout_ns_) {
           ThreadSuspendByThreadIdWarning(::android::base::WARNING,
                                          "Thread suspension timed out",
                                          thread_id);
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 658db00..b60fca1 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -20,6 +20,7 @@
 #include "barrier.h"
 #include "base/histogram.h"
 #include "base/mutex.h"
+#include "base/time_utils.h"
 #include "base/value_object.h"
 #include "gc_root.h"
 #include "jni.h"
@@ -41,11 +42,12 @@
 
 class ThreadList {
  public:
-  static const uint32_t kMaxThreadId = 0xFFFF;
-  static const uint32_t kInvalidThreadId = 0;
-  static const uint32_t kMainThreadId = 1;
+  static constexpr uint32_t kMaxThreadId = 0xFFFF;
+  static constexpr uint32_t kInvalidThreadId = 0;
+  static constexpr uint32_t kMainThreadId = 1;
+  static constexpr uint64_t kDefaultThreadSuspendTimeout = MsToNs(kIsDebugBuild ? 50000 : 10000);
 
-  explicit ThreadList();
+  explicit ThreadList(uint64_t thread_suspend_timeout_ns);
   ~ThreadList();
 
   void DumpForSigQuit(std::ostream& os)
@@ -219,6 +221,9 @@
   // Whether or not the current thread suspension is long.
   bool long_suspend_;
 
+  // Thread suspension timeout in nanoseconds.
+  const uint64_t thread_suspend_timeout_ns_;
+
   std::unique_ptr<Barrier> empty_checkpoint_barrier_;
 
   friend class Thread;
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 639996e..814f968 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -277,42 +277,6 @@
   147-stripped-dex-fallback \
   569-checker-pattern-replacement
 
-# These 9** tests are not supported in current form due to linker
-# restrictions. See b/31681198
-TEST_ART_BROKEN_TARGET_TESTS += \
-  901-hello-ti-agent \
-  902-hello-transformation \
-  903-hello-tagging \
-  904-object-allocation \
-  905-object-free \
-  906-iterate-heap \
-  907-get-loaded-classes \
-  908-gc-start-finish \
-  909-attach-agent \
-  910-methods \
-  911-get-stack-trace \
-  912-classes \
-  913-heaps \
-  914-hello-obsolescence \
-  915-obsolete-2 \
-  916-obsolete-jit \
-  917-fields-transformation \
-  918-fields \
-  919-obsolete-fields \
-  920-objects \
-  921-hello-failure \
-  922-properties \
-  923-monitors \
-  924-threads \
-  925-threadgroups \
-  926-multi-obsolescence \
-  927-timers \
-  928-jni-table \
-  929-search \
-  930-hello-retransform \
-  931-agent-thread \
-  932-transform-saves \
-
 ifneq (,$(filter target,$(TARGET_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
       $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
diff --git a/test/run-test b/test/run-test
index 9b17802..c78fa35 100755
--- a/test/run-test
+++ b/test/run-test
@@ -131,6 +131,7 @@
 multi_image_suffix=""
 android_root="/system"
 bisection_search="no"
+suspend_timeout="500000"
 # By default we will use optimizing.
 image_args=""
 image_suffix=""
@@ -219,6 +220,10 @@
         basic_verify="true"
         gc_stress="true"
         shift
+    elif [ "x$1" = "x--suspend-timeout" ]; then
+        shift
+        suspend_timeout="$1"
+        shift
     elif [ "x$1" = "x--image" ]; then
         shift
         image="$1"
@@ -402,6 +407,9 @@
 tmp_dir="`cd $oldwd ; python -c "import os; print os.path.realpath('$tmp_dir')"`"
 mkdir -p $tmp_dir
 
+# Add thread suspend timeout flag
+run_args="${run_args} --runtime-option -XX:ThreadSuspendTimeout=$suspend_timeout"
+
 if [ "$basic_verify" = "true" ]; then
   # Set HspaceCompactForOOMMinIntervalMs to zero to run hspace compaction for OOM more frequently in tests.
   run_args="${run_args} --runtime-option -Xgc:preverify --runtime-option -Xgc:postverify --runtime-option -XX:HspaceCompactForOOMMinIntervalMs=0"
@@ -649,6 +657,7 @@
         echo "    --quiet               Don't print anything except failure messages"
         echo "    --bisection-search    Perform bisection bug search."
         echo "    --vdex                Test using vdex as in input to dex2oat. Only works with --prebuild."
+        echo "    --suspend-timeout     Change thread suspend timeout ms (default 500000)."
     ) 1>&2  # Direct to stderr so usage is not printed if --quiet is set.
     exit 1
 fi
diff --git a/test/ti-agent/common_helper.cc b/test/ti-agent/common_helper.cc
index fd9fc38..80e1797 100644
--- a/test/ti-agent/common_helper.cc
+++ b/test/ti-agent/common_helper.cc
@@ -349,7 +349,7 @@
     names[1] = m->JniLongName();
   }
   for (const std::string& mangled_name : names) {
-    void* sym = dlsym(nullptr, mangled_name.c_str());
+    void* sym = dlsym(RTLD_DEFAULT, mangled_name.c_str());
     if (sym == nullptr) {
       continue;
     }