Merge "Make format args explicit in DocString."
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index ad2feeb..acce68b 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -333,7 +333,7 @@
     ifneq ($(NATIVE_COVERAGE),true)
       art_host_non_debug_cflags += -Wframe-larger-than=2700
       ifdef SANITIZE_TARGET
-        art_target_non_debug_cflags += -Wframe-larger-than=5450
+        art_target_non_debug_cflags += -Wframe-larger-than=6400
       else
         art_target_non_debug_cflags += -Wframe-larger-than=1728
       endif
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 603130a..acede45 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -335,7 +335,7 @@
     }
 
     // Create a `CompiledMethod`, with the quickened information in the vmap table.
-    Leb128EncodingVector builder;
+    Leb128EncodingVector<> builder;
     for (QuickenedInfo info : dex_compiler.GetQuickenedInfo()) {
       builder.PushBackUnsigned(info.dex_pc);
       builder.PushBackUnsigned(info.dex_member_index);
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 8bf709a..bcfd440 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -368,7 +368,7 @@
 
 struct SuccessorBlockInfo;
 
-class BasicBlock : public DeletableArenaObject<kArenaAllocBB> {
+class BasicBlock : public DeletableArenaObject<kArenaAllocBasicBlock> {
  public:
   BasicBlock(BasicBlockId block_id, BBType type, ArenaAllocator* allocator)
       : id(block_id),
diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc
index be913fe..31c3808 100644
--- a/compiler/dex/mir_method_info.cc
+++ b/compiler/dex/mir_method_info.cc
@@ -105,7 +105,8 @@
       // Don't devirt if we are in a different dex file since we can't have direct invokes in
       // another dex file unless we always put a direct / patch pointer.
       devirt_target = nullptr;
-      current_dex_cache.Assign(runtime->GetClassLinker()->FindDexCache(*it->target_dex_file_));
+      current_dex_cache.Assign(runtime->GetClassLinker()->FindDexCache(
+          soa.Self(), *it->target_dex_file_));
       CHECK(current_dex_cache.Get() != nullptr);
       DexCompilationUnit cu(
           mUnit->GetCompilationUnit(), mUnit->GetClassLoader(), mUnit->GetClassLinker(),
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 72754ae..7082bed 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1115,7 +1115,7 @@
 
 CompiledMethod* Mir2Lir::GetCompiledMethod() {
   // Combine vmap tables - core regs, then fp regs - into vmap_table.
-  Leb128EncodingVector vmap_encoder;
+  Leb128EncodingVector<> vmap_encoder;
   if (frame_size_ > 0) {
     // Prefix the encoded data with its size.
     size_t size = core_vmap_table_.size() + 1 /* marker */ + fp_vmap_table_.size();
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 80387f2..8f1987a 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -31,7 +31,7 @@
 namespace art {
 
 inline mirror::DexCache* CompilerDriver::GetDexCache(const DexCompilationUnit* mUnit) {
-  return mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile());
+  return mUnit->GetClassLinker()->FindDexCache(Thread::Current(), *mUnit->GetDexFile(), false);
 }
 
 inline mirror::ClassLoader* CompilerDriver::GetClassLoader(ScopedObjectAccess& soa,
@@ -87,7 +87,7 @@
 }
 
 inline mirror::DexCache* CompilerDriver::FindDexCache(const DexFile* dex_file) {
-  return Runtime::Current()->GetClassLinker()->FindDexCache(*dex_file);
+  return Runtime::Current()->GetClassLinker()->FindDexCache(Thread::Current(), *dex_file, false);
 }
 
 inline ArtField* CompilerDriver::ResolveField(
@@ -339,7 +339,8 @@
     // Sharpen a virtual call into a direct call. The method_idx is into referrer's
     // dex cache, check that this resolved method is where we expect it.
     CHECK_EQ(target_method->dex_file, mUnit->GetDexFile());
-    DCHECK_EQ(dex_cache.Get(), mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile()));
+    DCHECK_EQ(dex_cache.Get(), mUnit->GetClassLinker()->FindDexCache(
+        soa.Self(), *mUnit->GetDexFile(), false));
     CHECK_EQ(referrer_class->GetDexCache()->GetResolvedMethod(
         target_method->dex_method_index, pointer_size),
              resolved_method) << PrettyMethod(resolved_method);
@@ -369,7 +370,7 @@
           nullptr, kVirtual);
     } else {
       StackHandleScope<1> hs(soa.Self());
-      auto target_dex_cache(hs.NewHandle(class_linker->FindDexCache(*devirt_target->dex_file)));
+      auto target_dex_cache(hs.NewHandle(class_linker->RegisterDexFile(*devirt_target->dex_file)));
       called_method = class_linker->ResolveMethod(
           *devirt_target->dex_file, devirt_target->dex_method_index, target_dex_cache,
           class_loader, nullptr, kVirtual);
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index fb3af2d..6d3a960 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -940,7 +940,7 @@
       uint16_t exception_type_idx = exception_type.first;
       const DexFile* dex_file = exception_type.second;
       StackHandleScope<2> hs2(self);
-      Handle<mirror::DexCache> dex_cache(hs2.NewHandle(class_linker->FindDexCache(*dex_file)));
+      Handle<mirror::DexCache> dex_cache(hs2.NewHandle(class_linker->RegisterDexFile(*dex_file)));
       Handle<mirror::Class> klass(hs2.NewHandle(
           class_linker->ResolveType(*dex_file, exception_type_idx, dex_cache,
                                     NullHandle<mirror::ClassLoader>())));
@@ -1174,7 +1174,8 @@
       IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) {
     {
       ScopedObjectAccess soa(Thread::Current());
-      mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file);
+      mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
+          soa.Self(), dex_file, false);
       mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
       if (resolved_class == nullptr) {
         // Erroneous class.
@@ -1199,9 +1200,10 @@
     // We resolve all const-string strings when building for the image.
     ScopedObjectAccess soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
-    Handle<mirror::DexCache> dex_cache(
-        hs.NewHandle(Runtime::Current()->GetClassLinker()->FindDexCache(dex_file)));
-    Runtime::Current()->GetClassLinker()->ResolveString(dex_file, string_idx, dex_cache);
+    ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(
+        soa.Self(), dex_file, false)));
+    class_linker->ResolveString(dex_file, string_idx, dex_cache);
     result = true;
   }
   if (result) {
@@ -1226,7 +1228,8 @@
     *equals_referrers_class = false;
   }
   ScopedObjectAccess soa(Thread::Current());
-  mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file);
+  mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
+      soa.Self(), dex_file, false);
   // Get type from dex cache assuming it was populated by the verifier
   mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
   if (resolved_class == nullptr) {
@@ -1263,7 +1266,8 @@
                                                             const DexFile& dex_file,
                                                             uint32_t type_idx) {
   ScopedObjectAccess soa(Thread::Current());
-  mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file);
+  mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
+      soa.Self(), dex_file, false);
   // Get type from dex cache assuming it was populated by the verifier.
   mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
   if (resolved_class == nullptr) {
@@ -1292,7 +1296,8 @@
                                         uintptr_t* direct_type_ptr, bool* out_is_finalizable) {
   ScopedObjectAccess soa(Thread::Current());
   Runtime* runtime = Runtime::Current();
-  mirror::DexCache* dex_cache = runtime->GetClassLinker()->FindDexCache(dex_file);
+  mirror::DexCache* dex_cache = runtime->GetClassLinker()->FindDexCache(
+      soa.Self(), dex_file, false);
   mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
   if (resolved_class == nullptr) {
     return false;
@@ -1421,7 +1426,8 @@
   {
     StackHandleScope<2> hs(soa.Self());
     Handle<mirror::DexCache> dex_cache_handle(
-        hs.NewHandle(mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile())));
+        hs.NewHandle(mUnit->GetClassLinker()->FindDexCache(
+            soa.Self(), *mUnit->GetDexFile(), false)));
     Handle<mirror::ClassLoader> class_loader_handle(
         hs.NewHandle(soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader())));
     resolved_field =
@@ -1471,7 +1477,8 @@
   {
     StackHandleScope<2> hs(soa.Self());
     Handle<mirror::DexCache> dex_cache_handle(
-        hs.NewHandle(mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile())));
+        hs.NewHandle(mUnit->GetClassLinker()->FindDexCache(
+            soa.Self(), *mUnit->GetDexFile(), false)));
     Handle<mirror::ClassLoader> class_loader_handle(
         hs.NewHandle(soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader())));
     resolved_field =
@@ -1657,7 +1664,8 @@
   // Try to resolve the method and compiling method's class.
   StackHandleScope<3> hs(soa.Self());
   Handle<mirror::DexCache> dex_cache(
-      hs.NewHandle(mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile())));
+      hs.NewHandle(mUnit->GetClassLinker()->FindDexCache(
+          soa.Self(), *mUnit->GetDexFile(), false)));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
       soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader())));
   uint32_t method_idx = target_method->dex_method_index;
@@ -1909,7 +1917,8 @@
     StackHandleScope<2> hs(soa.Self());
     Handle<mirror::ClassLoader> class_loader(
         hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader)));
-    Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(dex_file)));
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(
+        soa.Self(), dex_file, false)));
     // Resolve the class.
     mirror::Class* klass = class_linker->ResolveType(dex_file, class_def.class_idx_, dex_cache,
                                                      class_loader);
@@ -2002,7 +2011,7 @@
     ClassLinker* class_linker = manager_->GetClassLinker();
     const DexFile& dex_file = *manager_->GetDexFile();
     StackHandleScope<2> hs(soa.Self());
-    Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(dex_file)));
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->RegisterDexFile(dex_file)));
     Handle<mirror::ClassLoader> class_loader(
         hs.NewHandle(soa.Decode<mirror::ClassLoader*>(manager_->GetClassLoader())));
     mirror::Class* klass = class_linker->ResolveType(dex_file, type_idx, dex_cache, class_loader);
@@ -2088,7 +2097,8 @@
        * This is to ensure the class is structurally sound for compilation. An unsound class
        * will be rejected by the verifier and later skipped during compilation in the compiler.
        */
-      Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(dex_file)));
+      Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(
+          soa.Self(), dex_file, false)));
       std::string error_msg;
       if (verifier::MethodVerifier::VerifyClass(soa.Self(), &dex_file, dex_cache, class_loader,
                                                 &class_def, true, &error_msg) ==
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index a5ace0b..1107599 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -108,7 +108,7 @@
   ScopedObjectAccess soa(Thread::Current());
   ASSERT_TRUE(java_lang_dex_file_ != nullptr);
   const DexFile& dex = *java_lang_dex_file_;
-  mirror::DexCache* dex_cache = class_linker_->FindDexCache(dex);
+  mirror::DexCache* dex_cache = class_linker_->FindDexCache(soa.Self(), dex);
   EXPECT_EQ(dex.NumStringIds(), dex_cache->NumStrings());
   for (size_t i = 0; i < dex_cache->NumStrings(); i++) {
     const mirror::String* string = dex_cache->GetResolvedString(i);
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 93897aa..dbd3366 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -70,7 +70,6 @@
 
 // Separate objects into multiple bins to optimize dirty memory use.
 static constexpr bool kBinObjects = true;
-static constexpr bool kComputeEagerResolvedStrings = false;
 
 static void CheckNoDexObjectsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED)
     SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -90,11 +89,6 @@
     PruneNonImageClasses();  // Remove junk
     ComputeLazyFieldsForImageClasses();  // Add useful information
 
-    // Calling this can in theory fill in some resolved strings. However, in practice it seems to
-    // never resolve any.
-    if (kComputeEagerResolvedStrings) {
-      ComputeEagerResolvedStrings();
-    }
     Thread::Current()->TransitionFromRunnableToSuspended(kNative);
   }
   gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -302,11 +296,15 @@
 
 void ImageWriter::PrepareDexCacheArraySlots() {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  ReaderMutexLock mu(Thread::Current(), *class_linker->DexLock());
-  size_t dex_cache_count = class_linker->GetDexCacheCount();
+  Thread* const self = Thread::Current();
+  ReaderMutexLock mu(self, *class_linker->DexLock());
   uint32_t size = 0u;
-  for (size_t idx = 0; idx < dex_cache_count; ++idx) {
-    DexCache* dex_cache = class_linker->GetDexCache(idx);
+  for (jobject weak_root : class_linker->GetDexCaches()) {
+    mirror::DexCache* dex_cache =
+        down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
+    if (dex_cache == nullptr) {
+      continue;
+    }
     const DexFile* dex_file = dex_cache->GetDexFile();
     dex_cache_array_starts_.Put(dex_file, size);
     DexCacheArraysLayout layout(target_ptr_size_, dex_file);
@@ -554,39 +552,6 @@
   class_linker->VisitClassesWithoutClassesLock(&visitor);
 }
 
-void ImageWriter::ComputeEagerResolvedStringsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED) {
-  if (!obj->GetClass()->IsStringClass()) {
-    return;
-  }
-  mirror::String* string = obj->AsString();
-  const uint16_t* utf16_string = string->GetValue();
-  size_t utf16_length = static_cast<size_t>(string->GetLength());
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  ReaderMutexLock mu(Thread::Current(), *class_linker->DexLock());
-  size_t dex_cache_count = class_linker->GetDexCacheCount();
-  for (size_t i = 0; i < dex_cache_count; ++i) {
-    DexCache* dex_cache = class_linker->GetDexCache(i);
-    const DexFile& dex_file = *dex_cache->GetDexFile();
-    const DexFile::StringId* string_id;
-    if (UNLIKELY(utf16_length == 0)) {
-      string_id = dex_file.FindStringId("");
-    } else {
-      string_id = dex_file.FindStringId(utf16_string, utf16_length);
-    }
-    if (string_id != nullptr) {
-      // This string occurs in this dex file, assign the dex cache entry.
-      uint32_t string_idx = dex_file.GetIndexForStringId(*string_id);
-      if (dex_cache->GetResolvedString(string_idx) == nullptr) {
-        dex_cache->SetResolvedString(string_idx, string);
-      }
-    }
-  }
-}
-
-void ImageWriter::ComputeEagerResolvedStrings() {
-  Runtime::Current()->GetHeap()->VisitObjects(ComputeEagerResolvedStringsCallback, this);
-}
-
 bool ImageWriter::IsImageClass(Class* klass) {
   if (klass == nullptr) {
     return false;
@@ -631,16 +596,14 @@
 
   // Clear references to removed classes from the DexCaches.
   const ArtMethod* resolution_method = runtime->GetResolutionMethod();
-  size_t dex_cache_count;
-  {
-    ReaderMutexLock mu(self, *class_linker->DexLock());
-    dex_cache_count = class_linker->GetDexCacheCount();
-  }
-  for (size_t idx = 0; idx < dex_cache_count; ++idx) {
-    DexCache* dex_cache;
-    {
-      ReaderMutexLock mu(self, *class_linker->DexLock());
-      dex_cache = class_linker->GetDexCache(idx);
+
+  ScopedAssertNoThreadSuspension sa(self, __FUNCTION__);
+  ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);  // For ClassInClassTable
+  ReaderMutexLock mu2(self, *class_linker->DexLock());
+  for (jobject weak_root : class_linker->GetDexCaches()) {
+    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
+    if (dex_cache == nullptr) {
+      continue;
     }
     for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) {
       Class* klass = dex_cache->GetResolvedType(i);
@@ -762,8 +725,12 @@
     ReaderMutexLock mu(self, *class_linker->DexLock());
     CHECK_EQ(dex_cache_count, class_linker->GetDexCacheCount())
         << "The number of dex caches changed.";
-    for (size_t i = 0; i < dex_cache_count; ++i) {
-      dex_caches->Set<false>(i, class_linker->GetDexCache(i));
+    size_t i = 0;
+    for (jobject weak_root : class_linker->GetDexCaches()) {
+      mirror::DexCache* dex_cache =
+          down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
+      dex_caches->Set<false>(i, dex_cache);
+      ++i;
     }
   }
 
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index c8aa82d..778521c 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -225,11 +225,6 @@
   void ComputeLazyFieldsForImageClasses()
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Wire dex cache resolved strings to strings in the image to avoid runtime resolution.
-  void ComputeEagerResolvedStrings() SHARED_REQUIRES(Locks::mutator_lock_);
-  static void ComputeEagerResolvedStringsCallback(mirror::Object* obj, void* arg)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   // Remove unwanted classes from various roots.
   void PruneNonImageClasses() SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 64e7487..fdf904d 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -617,7 +617,8 @@
     // Unchecked as we hold mutator_lock_ on entry.
     ScopedObjectAccessUnchecked soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
-    Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->FindDexCache(*dex_file_)));
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->FindDexCache(
+        Thread::Current(), *dex_file_)));
     ArtMethod* method = linker->ResolveMethod(
         *dex_file_, it.GetMemberIndex(), dex_cache, NullHandle<mirror::ClassLoader>(), nullptr,
         invoke_type);
@@ -668,7 +669,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_) {
     OatDexMethodVisitor::StartClass(dex_file, class_def_index);
     if (dex_cache_ == nullptr || dex_cache_->GetDexFile() != dex_file) {
-      dex_cache_ = class_linker_->FindDexCache(*dex_file);
+      dex_cache_ = class_linker_->FindDexCache(Thread::Current(), *dex_file);
     }
     return true;
   }
@@ -691,6 +692,8 @@
     OatClass* oat_class = writer_->oat_classes_[oat_class_index_];
     const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
+    // No thread suspension since dex_cache_ that may get invalidated if that occurs.
+    ScopedAssertNoThreadSuspension tsc(Thread::Current(), __FUNCTION__);
     if (compiled_method != nullptr) {  // ie. not an abstract method
       size_t file_offset = file_offset_;
       OutputStream* out = out_;
@@ -796,7 +799,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_) {
     MethodReference ref = patch.TargetMethod();
     mirror::DexCache* dex_cache =
-        (dex_file_ == ref.dex_file) ? dex_cache_ : class_linker_->FindDexCache(*ref.dex_file);
+        (dex_file_ == ref.dex_file) ? dex_cache_ : class_linker_->FindDexCache(
+            Thread::Current(), *ref.dex_file);
     ArtMethod* method = dex_cache->GetResolvedMethod(
         ref.dex_method_index, class_linker_->GetImagePointerSize());
     CHECK(method != nullptr);
@@ -830,7 +834,7 @@
   mirror::Class* GetTargetType(const LinkerPatch& patch)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     mirror::DexCache* dex_cache = (dex_file_ == patch.TargetTypeDexFile())
-        ? dex_cache_ : class_linker_->FindDexCache(*patch.TargetTypeDexFile());
+        ? dex_cache_ : class_linker_->FindDexCache(Thread::Current(), *patch.TargetTypeDexFile());
     mirror::Class* type = dex_cache->GetResolvedType(patch.TargetTypeIndex());
     CHECK(type != nullptr);
     return type;
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 7b42db8..23ab94e 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -902,7 +902,7 @@
   StackHandleScope<4> hs(soa.Self());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
       dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          *dex_compilation_unit_->GetDexFile())));
+          soa.Self(), *dex_compilation_unit_->GetDexFile())));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
       soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
   ArtMethod* resolved_method = compiler_driver_->ResolveMethod(
@@ -912,7 +912,7 @@
 
   const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
   Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(outer_dex_file)));
+      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
   Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
 
   // The index at which the method's class is stored in the DexCache's type array.
@@ -1228,7 +1228,7 @@
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
       soa.Decode<mirror::ClassLoader*>(compilation_unit.GetClassLoader())));
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      compilation_unit.GetClassLinker()->FindDexCache(dex_file)));
+      compilation_unit.GetClassLinker()->FindDexCache(soa.Self(), dex_file)));
 
   return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit);
 }
@@ -1245,7 +1245,8 @@
   ScopedObjectAccess soa(Thread::Current());
   StackHandleScope<4> hs(soa.Self());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(*dex_compilation_unit_->GetDexFile())));
+      dex_compilation_unit_->GetClassLinker()->FindDexCache(
+          soa.Self(), *dex_compilation_unit_->GetDexFile())));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
       soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
   Handle<mirror::Class> cls(hs.NewHandle(compiler_driver_->ResolveClass(
@@ -1264,7 +1265,8 @@
   ScopedObjectAccess soa(Thread::Current());
   StackHandleScope<4> hs(soa.Self());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(*dex_compilation_unit_->GetDexFile())));
+      dex_compilation_unit_->GetClassLinker()->FindDexCache(
+          soa.Self(), *dex_compilation_unit_->GetDexFile())));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
       soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
   ArtField* resolved_field = compiler_driver_->ResolveField(
@@ -1277,7 +1279,7 @@
 
   const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
   Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(outer_dex_file)));
+      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
   Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
 
   // The index at which the field's class is stored in the DexCache's type array.
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 503187b..a6fc455 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -239,7 +239,10 @@
         InitLocationsBaseline(current);
       }
       DCHECK(CheckTypeConsistency(current));
+      uintptr_t native_pc_begin = GetAssembler()->CodeSize();
       current->Accept(instruction_visitor);
+      uintptr_t native_pc_end = GetAssembler()->CodeSize();
+      RecordNativeDebugInfo(current->GetDexPc(), native_pc_begin, native_pc_end);
     }
   }
 
@@ -585,7 +588,7 @@
 }
 
 void CodeGenerator::BuildNativeGCMap(
-    std::vector<uint8_t>* data, const DexCompilationUnit& dex_compilation_unit) const {
+    ArenaVector<uint8_t>* data, const DexCompilationUnit& dex_compilation_unit) const {
   const std::vector<uint8_t>& gc_map_raw =
       dex_compilation_unit.GetVerifiedMethod()->GetDexGcMap();
   verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]);
@@ -604,16 +607,7 @@
   }
 }
 
-void CodeGenerator::BuildSourceMap(DefaultSrcMap* src_map) const {
-  for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) {
-    const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
-    uint32_t pc2dex_offset = stack_map_entry.native_pc_offset;
-    int32_t pc2dex_dalvik_offset = stack_map_entry.dex_pc;
-    src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset}));
-  }
-}
-
-void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const {
+void CodeGenerator::BuildMappingTable(ArenaVector<uint8_t>* data) const {
   uint32_t pc2dex_data_size = 0u;
   uint32_t pc2dex_entries = stack_map_stream_.GetNumberOfStackMaps();
   uint32_t pc2dex_offset = 0u;
@@ -712,24 +706,31 @@
   }
 }
 
-void CodeGenerator::BuildVMapTable(std::vector<uint8_t>* data) const {
-  Leb128EncodingVector vmap_encoder;
+void CodeGenerator::BuildVMapTable(ArenaVector<uint8_t>* data) const {
+  Leb128Encoder<ArenaAllocatorAdapter<uint8_t>> vmap_encoder(data);
   // We currently don't use callee-saved registers.
   size_t size = 0 + 1 /* marker */ + 0;
   vmap_encoder.Reserve(size + 1u);  // All values are likely to be one byte in ULEB128 (<128).
   vmap_encoder.PushBackUnsigned(size);
   vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker);
-
-  *data = vmap_encoder.GetData();
 }
 
-void CodeGenerator::BuildStackMaps(std::vector<uint8_t>* data) {
+void CodeGenerator::BuildStackMaps(ArenaVector<uint8_t>* data) {
   uint32_t size = stack_map_stream_.PrepareForFillIn();
   data->resize(size);
   MemoryRegion region(data->data(), size);
   stack_map_stream_.FillIn(region);
 }
 
+void CodeGenerator::RecordNativeDebugInfo(uint32_t dex_pc,
+                                          uintptr_t native_pc_begin,
+                                          uintptr_t native_pc_end) {
+  if (src_map_ != nullptr && dex_pc != kNoDexPc && native_pc_begin != native_pc_end) {
+    src_map_->push_back(SrcMapElem({static_cast<uint32_t>(native_pc_begin),
+                                    static_cast<int32_t>(dex_pc)}));
+  }
+}
+
 void CodeGenerator::RecordPcInfo(HInstruction* instruction,
                                  uint32_t dex_pc,
                                  SlowPathCode* slow_path) {
@@ -1071,12 +1072,6 @@
       << instruction->DebugName() << ((slow_path != nullptr) ? slow_path->GetDescription() : "");
 }
 
-void SlowPathCode::RecordPcInfo(CodeGenerator* codegen,
-                                HInstruction* instruction,
-                                uint32_t dex_pc) {
-  codegen->RecordPcInfo(instruction, dex_pc, this);
-}
-
 void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
   RegisterSet* register_set = locations->GetLiveRegisters();
   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 938369b..b3c4d72 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -19,6 +19,8 @@
 
 #include "arch/instruction_set.h"
 #include "arch/instruction_set_features.h"
+#include "base/arena_containers.h"
+#include "base/arena_object.h"
 #include "base/bit_field.h"
 #include "driver/compiler_options.h"
 #include "globals.h"
@@ -81,7 +83,6 @@
 
   virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
   virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
-  void RecordPcInfo(CodeGenerator* codegen, HInstruction* instruction, uint32_t dex_pc);
 
   bool IsCoreRegisterSaved(int reg) const {
     return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
@@ -228,7 +229,11 @@
     return (fpu_callee_save_mask_ & (1 << reg)) != 0;
   }
 
+  // Record native to dex mapping for a suspend point.  Required by runtime.
   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
+  // Record additional native to dex mappings for native debugging/profiling tools.
+  void RecordNativeDebugInfo(uint32_t dex_pc, uintptr_t native_pc_begin, uintptr_t native_pc_end);
+
   bool CanMoveNullCheckToUser(HNullCheck* null_check);
   void MaybeRecordImplicitNullCheck(HInstruction* instruction);
 
@@ -236,12 +241,13 @@
     slow_paths_.Add(slow_path);
   }
 
-  void BuildSourceMap(DefaultSrcMap* src_map) const;
-  void BuildMappingTable(std::vector<uint8_t>* vector) const;
-  void BuildVMapTable(std::vector<uint8_t>* vector) const;
+  void SetSrcMap(DefaultSrcMap* src_map) { src_map_ = src_map; }
+
+  void BuildMappingTable(ArenaVector<uint8_t>* vector) const;
+  void BuildVMapTable(ArenaVector<uint8_t>* vector) const;
   void BuildNativeGCMap(
-      std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
-  void BuildStackMaps(std::vector<uint8_t>* vector);
+      ArenaVector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
+  void BuildStackMaps(ArenaVector<uint8_t>* vector);
 
   bool IsBaseline() const {
     return is_baseline_;
@@ -394,6 +400,7 @@
         disasm_info_(nullptr),
         graph_(graph),
         compiler_options_(compiler_options),
+        src_map_(nullptr),
         slow_paths_(graph->GetArena(), 8),
         current_block_index_(0),
         is_leaf_(true),
@@ -488,6 +495,8 @@
   HGraph* const graph_;
   const CompilerOptions& compiler_options_;
 
+  // Native to dex_pc map used for native debugging/profiling tools.
+  DefaultSrcMap* src_map_;
   GrowableArray<SlowPathCode*> slow_paths_;
 
   // The current block index in `block_order_` of the block
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 62026f3..0640179 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -142,24 +142,22 @@
 
 class BoundsCheckSlowPathARM : public SlowPathCodeARM {
  public:
-  BoundsCheckSlowPathARM(HBoundsCheck* instruction,
-                         Location index_location,
-                         Location length_location)
-      : instruction_(instruction),
-        index_location_(index_location),
-        length_location_(length_location) {}
+  explicit BoundsCheckSlowPathARM(HBoundsCheck* instruction)
+      : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+
     __ Bind(GetEntryLabel());
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
     codegen->EmitParallelMoves(
-        index_location_,
+        locations->InAt(0),
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
         Primitive::kPrimInt,
-        length_location_,
+        locations->InAt(1),
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimInt);
     arm_codegen->InvokeRuntime(
@@ -172,8 +170,6 @@
 
  private:
   HBoundsCheck* const instruction_;
-  const Location index_location_;
-  const Location length_location_;
 
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM);
 };
@@ -263,17 +259,12 @@
 
 class TypeCheckSlowPathARM : public SlowPathCodeARM {
  public:
-  TypeCheckSlowPathARM(HInstruction* instruction,
-                       Location class_to_check,
-                       Location object_class,
-                       uint32_t dex_pc)
-      : instruction_(instruction),
-        class_to_check_(class_to_check),
-        object_class_(object_class),
-        dex_pc_(dex_pc) {}
+  explicit TypeCheckSlowPathARM(HInstruction* instruction) : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
+    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
+                                                        : locations->Out();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
 
@@ -285,20 +276,25 @@
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
     codegen->EmitParallelMoves(
-        class_to_check_,
+        locations->InAt(1),
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
         Primitive::kPrimNot,
-        object_class_,
+        object_class,
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimNot);
 
     if (instruction_->IsInstanceOf()) {
-      arm_codegen->InvokeRuntime(
-          QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc_, this);
+      arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
       arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
     } else {
       DCHECK(instruction_->IsCheckCast());
-      arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_, this);
+      arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
     }
 
     RestoreLiveRegisters(codegen, locations);
@@ -309,9 +305,6 @@
 
  private:
   HInstruction* const instruction_;
-  const Location class_to_check_;
-  const Location object_class_;
-  uint32_t dex_pc_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM);
 };
@@ -2731,11 +2724,9 @@
         Register temp = locations->GetTemp(0).AsRegister<Register>();
 
         // temp = reg1 / reg2  (integer division)
-        // temp = temp * reg2
-        // dest = reg1 - temp
+        // dest = reg1 - temp * reg2
         __ sdiv(temp, reg1, reg2);
-        __ mul(temp, temp, reg2);
-        __ sub(out.AsRegister<Register>(), reg1, ShifterOperand(temp));
+        __ mls(out.AsRegister<Register>(), temp, reg2, reg1);
       } else {
         InvokeRuntimeCallingConvention calling_convention;
         DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegister<Register>());
@@ -2905,7 +2896,7 @@
         // If the shift is > 32 bits, override the high part
         __ subs(temp, o_l, ShifterOperand(kArmBitsPerWord));
         __ it(PL);
-        __ Lsl(o_h, low, temp, false, PL);
+        __ Lsl(o_h, low, temp, PL);
         // Shift the low part
         __ Lsl(o_l, low, o_l);
       } else if (op->IsShr()) {
@@ -2919,7 +2910,7 @@
         // If the shift is > 32 bits, override the low part
         __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
         __ it(PL);
-        __ Asr(o_l, high, temp, false, PL);
+        __ Asr(o_l, high, temp, PL);
         // Shift the high part
         __ Asr(o_h, high, o_h);
       } else {
@@ -2931,7 +2922,7 @@
         __ orr(o_l, o_l, ShifterOperand(temp));
         __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
         __ it(PL);
-        __ Lsr(o_l, high, temp, false, PL);
+        __ Lsr(o_l, high, temp, PL);
         __ Lsr(o_h, high, o_h);
       }
       break;
@@ -3901,8 +3892,8 @@
 
 void InstructionCodeGeneratorARM::VisitBoundsCheck(HBoundsCheck* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(
-      instruction, locations->InAt(0), locations->InAt(1));
+  SlowPathCodeARM* slow_path =
+      new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(instruction);
   codegen_->AddSlowPath(slow_path);
 
   Register index = locations->InAt(0).AsRegister<Register>();
@@ -4346,6 +4337,7 @@
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
   // The out register is used as a temporary, so it overlaps with the inputs.
+  // Note that TypeCheckSlowPathARM uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
 }
 
@@ -4375,8 +4367,7 @@
   } else {
     // If the classes are not equal, we go into a slow path.
     DCHECK(locations->OnlyCallsOnSlowPath());
-    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(
-        instruction, locations->InAt(1), locations->Out(), instruction->GetDexPc());
+    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction);
     codegen_->AddSlowPath(slow_path);
     __ b(slow_path->GetEntryLabel(), NE);
     __ LoadImmediate(out, 1);
@@ -4399,6 +4390,7 @@
       instruction, LocationSummary::kCallOnSlowPath);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
+  // Note that TypeCheckSlowPathARM uses this register too.
   locations->AddTemp(Location::RequiresRegister());
 }
 
@@ -4409,8 +4401,8 @@
   Register temp = locations->GetTemp(0).AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
 
-  SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(
-      instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc());
+  SlowPathCodeARM* slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction);
   codegen_->AddSlowPath(slow_path);
 
   // avoid null check if we know obj is not null.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 25b3ea2..8035461 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -191,23 +191,19 @@
 
 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  BoundsCheckSlowPathARM64(HBoundsCheck* instruction,
-                           Location index_location,
-                           Location length_location)
-      : instruction_(instruction),
-        index_location_(index_location),
-        length_location_(length_location) {}
-
+  explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+
     __ Bind(GetEntryLabel());
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
     codegen->EmitParallelMoves(
-        index_location_, LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt,
-        length_location_, LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt);
+        locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt,
+        locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt);
     arm64_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
@@ -219,8 +215,6 @@
 
  private:
   HBoundsCheck* const instruction_;
-  const Location index_location_;
-  const Location length_location_;
 
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
 };
@@ -403,20 +397,17 @@
 
 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  TypeCheckSlowPathARM64(HInstruction* instruction,
-                         Location class_to_check,
-                         Location object_class,
-                         uint32_t dex_pc)
-      : instruction_(instruction),
-        class_to_check_(class_to_check),
-        object_class_(object_class),
-        dex_pc_(dex_pc) {}
+  explicit TypeCheckSlowPathARM64(HInstruction* instruction) : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
+    Location class_to_check = locations->InAt(1);
+    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
+                                                        : locations->Out();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    uint32_t dex_pc = instruction_->GetDexPc();
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -425,12 +416,12 @@
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
     codegen->EmitParallelMoves(
-        class_to_check_, LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimNot,
-        object_class_, LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimNot);
+        class_to_check, LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimNot,
+        object_class, LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimNot);
 
     if (instruction_->IsInstanceOf()) {
       arm64_codegen->InvokeRuntime(
-          QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc_, this);
+          QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc, this);
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
@@ -438,7 +429,7 @@
                            const mirror::Class*, const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
-      arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_, this);
+      arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this);
       CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
     }
 
@@ -450,9 +441,6 @@
 
  private:
   HInstruction* const instruction_;
-  const Location class_to_check_;
-  const Location object_class_;
-  uint32_t dex_pc_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
 };
@@ -1602,9 +1590,8 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  BoundsCheckSlowPathARM64* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(
-      instruction, locations->InAt(0), locations->InAt(1));
+  BoundsCheckSlowPathARM64* slow_path =
+      new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(instruction);
   codegen_->AddSlowPath(slow_path);
 
   __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1));
@@ -1616,17 +1603,17 @@
       instruction, LocationSummary::kCallOnSlowPath);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
+  // Note that TypeCheckSlowPathARM64 uses this register too.
   locations->AddTemp(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
   Register obj = InputRegisterAt(instruction, 0);;
   Register cls = InputRegisterAt(instruction, 1);;
   Register obj_cls = WRegisterFrom(instruction->GetLocations()->GetTemp(0));
 
-  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(
-      instruction, locations->InAt(1), LocationFrom(obj_cls), instruction->GetDexPc());
+  SlowPathCodeARM64* slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction);
   codegen_->AddSlowPath(slow_path);
 
   // Avoid null check if we know obj is not null.
@@ -2240,6 +2227,7 @@
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
   // The output does overlap inputs.
+  // Note that TypeCheckSlowPathARM64 uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
 }
 
@@ -2269,8 +2257,7 @@
     // If the classes are not equal, we go into a slow path.
     DCHECK(locations->OnlyCallsOnSlowPath());
     SlowPathCodeARM64* slow_path =
-        new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(
-        instruction, locations->InAt(1), locations->Out(), instruction->GetDexPc());
+        new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction);
     codegen_->AddSlowPath(slow_path);
     __ B(ne, slow_path->GetEntryLabel());
     __ Mov(out, 1);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 093d786..e4188e4 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -112,23 +112,19 @@
 
 class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  BoundsCheckSlowPathMIPS64(HBoundsCheck* instruction,
-                            Location index_location,
-                            Location length_location)
-      : instruction_(instruction),
-        index_location_(index_location),
-        length_location_(length_location) {}
+  explicit BoundsCheckSlowPathMIPS64(HBoundsCheck* instruction) : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
     __ Bind(GetEntryLabel());
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    codegen->EmitParallelMoves(index_location_,
+    codegen->EmitParallelMoves(locations->InAt(0),
                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
                                Primitive::kPrimInt,
-                               length_location_,
+                               locations->InAt(1),
                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
                                Primitive::kPrimInt);
     mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
@@ -144,8 +140,6 @@
 
  private:
   HBoundsCheck* const instruction_;
-  const Location index_location_;
-  const Location length_location_;
 
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathMIPS64);
 };
@@ -334,17 +328,13 @@
 
 class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  TypeCheckSlowPathMIPS64(HInstruction* instruction,
-                          Location class_to_check,
-                          Location object_class,
-                          uint32_t dex_pc)
-      : instruction_(instruction),
-        class_to_check_(class_to_check),
-        object_class_(object_class),
-        dex_pc_(dex_pc) {}
+  explicit TypeCheckSlowPathMIPS64(HInstruction* instruction) : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
+    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
+                                                        : locations->Out();
+    uint32_t dex_pc = instruction_->GetDexPc();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
@@ -355,17 +345,17 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    codegen->EmitParallelMoves(class_to_check_,
+    codegen->EmitParallelMoves(locations->InAt(1),
                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
                                Primitive::kPrimNot,
-                               object_class_,
+                               object_class,
                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
                                Primitive::kPrimNot);
 
     if (instruction_->IsInstanceOf()) {
       mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
                                     instruction_,
-                                    dex_pc_,
+                                    dex_pc,
                                     this);
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
@@ -376,7 +366,7 @@
                            const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
-      mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_, this);
+      mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this);
       CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
     }
 
@@ -388,9 +378,6 @@
 
  private:
   HInstruction* const instruction_;
-  const Location class_to_check_;
-  const Location object_class_;
-  uint32_t dex_pc_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathMIPS64);
 };
@@ -1590,10 +1577,8 @@
 
 void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  BoundsCheckSlowPathMIPS64* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathMIPS64(
-      instruction,
-      locations->InAt(0),
-      locations->InAt(1));
+  BoundsCheckSlowPathMIPS64* slow_path =
+      new (GetGraph()->GetArena()) BoundsCheckSlowPathMIPS64(instruction);
   codegen_->AddSlowPath(slow_path);
 
   GpuRegister index = locations->InAt(0).AsRegister<GpuRegister>();
@@ -1616,6 +1601,7 @@
       LocationSummary::kCallOnSlowPath);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
+  // Note that TypeCheckSlowPathMIPS64 uses this register too.
   locations->AddTemp(Location::RequiresRegister());
 }
 
@@ -1625,11 +1611,8 @@
   GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
   GpuRegister obj_cls = locations->GetTemp(0).AsRegister<GpuRegister>();
 
-  SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(
-      instruction,
-      locations->InAt(1),
-      Location::RegisterLocation(obj_cls),
-      instruction->GetDexPc());
+  SlowPathCodeMIPS64* slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction);
   codegen_->AddSlowPath(slow_path);
 
   // TODO: avoid this check if we know obj is not null.
@@ -2270,6 +2253,7 @@
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
   // The output does overlap inputs.
+  // Note that TypeCheckSlowPathMIPS64 uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
 }
 
@@ -2296,10 +2280,7 @@
     // If the classes are not equal, we go into a slow path.
     DCHECK(locations->OnlyCallsOnSlowPath());
     SlowPathCodeMIPS64* slow_path =
-        new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction,
-                                                             locations->InAt(1),
-                                                             locations->Out(),
-                                                             instruction->GetDexPc());
+        new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction);
     codegen_->AddSlowPath(slow_path);
     __ Bnec(out, cls, slow_path->GetEntryLabel());
     __ LoadConst32(out, 1);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 72c690d..e8aa61d 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -116,24 +116,20 @@
 
 class BoundsCheckSlowPathX86 : public SlowPathCodeX86 {
  public:
-  BoundsCheckSlowPathX86(HBoundsCheck* instruction,
-                         Location index_location,
-                         Location length_location)
-      : instruction_(instruction),
-        index_location_(index_location),
-        length_location_(length_location) {}
+  explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     __ Bind(GetEntryLabel());
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
     x86_codegen->EmitParallelMoves(
-        index_location_,
+        locations->InAt(0),
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
         Primitive::kPrimInt,
-        length_location_,
+        locations->InAt(1),
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimInt);
     x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
@@ -148,8 +144,6 @@
 
  private:
   HBoundsCheck* const instruction_;
-  const Location index_location_;
-  const Location length_location_;
 
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
 };
@@ -280,15 +274,12 @@
 
 class TypeCheckSlowPathX86 : public SlowPathCodeX86 {
  public:
-  TypeCheckSlowPathX86(HInstruction* instruction,
-                       Location class_to_check,
-                       Location object_class)
-      : instruction_(instruction),
-        class_to_check_(class_to_check),
-        object_class_(object_class) {}
+  explicit TypeCheckSlowPathX86(HInstruction* instruction) : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
+    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
+                                                        : locations->Out();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
 
@@ -300,10 +291,10 @@
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
     x86_codegen->EmitParallelMoves(
-        class_to_check_,
+        locations->InAt(1),
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
         Primitive::kPrimNot,
-        object_class_,
+        object_class,
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimNot);
 
@@ -332,8 +323,6 @@
 
  private:
   HInstruction* const instruction_;
-  const Location class_to_check_;
-  const Location object_class_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
 };
@@ -4357,7 +4346,7 @@
   Location index_loc = locations->InAt(0);
   Location length_loc = locations->InAt(1);
   SlowPathCodeX86* slow_path =
-    new (GetGraph()->GetArena()) BoundsCheckSlowPathX86(instruction, index_loc, length_loc);
+    new (GetGraph()->GetArena()) BoundsCheckSlowPathX86(instruction);
 
   if (length_loc.IsConstant()) {
     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
@@ -4830,6 +4819,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::Any());
+  // Note that TypeCheckSlowPathX86 uses this register too.
   locations->SetOut(Location::RequiresRegister());
 }
 
@@ -4866,8 +4856,7 @@
   } else {
     // If the classes are not equal, we go into a slow path.
     DCHECK(locations->OnlyCallsOnSlowPath());
-    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(
-        instruction, locations->InAt(1), locations->Out());
+    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction);
     codegen_->AddSlowPath(slow_path);
     __ j(kNotEqual, slow_path->GetEntryLabel());
     __ movl(out, Immediate(1));
@@ -4890,6 +4879,7 @@
       instruction, LocationSummary::kCallOnSlowPath);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::Any());
+  // Note that TypeCheckSlowPathX86 uses this register too.
   locations->AddTemp(Location::RequiresRegister());
 }
 
@@ -4899,8 +4889,8 @@
   Location cls = locations->InAt(1);
   Register temp = locations->GetTemp(0).AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(
-      instruction, locations->InAt(1), locations->GetTemp(0));
+  SlowPathCodeX86* slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction);
   codegen_->AddSlowPath(slow_path);
 
   // Avoid null check if we know obj is not null.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 820ec78..ff52f4f 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -170,24 +170,21 @@
 
 class BoundsCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
  public:
-  BoundsCheckSlowPathX86_64(HBoundsCheck* instruction,
-                            Location index_location,
-                            Location length_location)
-      : instruction_(instruction),
-        index_location_(index_location),
-        length_location_(length_location) {}
+  explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
+    : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
     codegen->EmitParallelMoves(
-        index_location_,
+        locations->InAt(0),
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
         Primitive::kPrimInt,
-        length_location_,
+        locations->InAt(1),
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimInt);
     x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
@@ -200,8 +197,6 @@
 
  private:
   HBoundsCheck* const instruction_;
-  const Location index_location_;
-  const Location length_location_;
 
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
 };
@@ -293,17 +288,14 @@
 
 class TypeCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
  public:
-  TypeCheckSlowPathX86_64(HInstruction* instruction,
-                          Location class_to_check,
-                          Location object_class,
-                          uint32_t dex_pc)
-      : instruction_(instruction),
-        class_to_check_(class_to_check),
-        object_class_(object_class),
-        dex_pc_(dex_pc) {}
+  explicit TypeCheckSlowPathX86_64(HInstruction* instruction)
+      : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
+    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
+                                                        : locations->Out();
+    uint32_t dex_pc = instruction_->GetDexPc();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
 
@@ -315,23 +307,23 @@
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
     codegen->EmitParallelMoves(
-        class_to_check_,
+        locations->InAt(1),
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
         Primitive::kPrimNot,
-        object_class_,
+        object_class,
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimNot);
 
     if (instruction_->IsInstanceOf()) {
       x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
                                  instruction_,
-                                 dex_pc_,
+                                 dex_pc,
                                  this);
     } else {
       DCHECK(instruction_->IsCheckCast());
       x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
                                  instruction_,
-                                 dex_pc_,
+                                 dex_pc,
                                  this);
     }
 
@@ -347,9 +339,6 @@
 
  private:
   HInstruction* const instruction_;
-  const Location class_to_check_;
-  const Location object_class_;
-  const uint32_t dex_pc_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
 };
@@ -4195,7 +4184,7 @@
   Location index_loc = locations->InAt(0);
   Location length_loc = locations->InAt(1);
   SlowPathCodeX86_64* slow_path =
-    new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction, index_loc, length_loc);
+    new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
 
   if (length_loc.IsConstant()) {
     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
@@ -4653,6 +4642,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::Any());
+  // Note that TypeCheckSlowPathX86_64 uses this register too.
   locations->SetOut(Location::RequiresRegister());
 }
 
@@ -4688,8 +4678,7 @@
   } else {
     // If the classes are not equal, we go into a slow path.
     DCHECK(locations->OnlyCallsOnSlowPath());
-    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(
-        instruction, locations->InAt(1), locations->Out(), instruction->GetDexPc());
+    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction);
     codegen_->AddSlowPath(slow_path);
     __ j(kNotEqual, slow_path->GetEntryLabel());
     __ movl(out, Immediate(1));
@@ -4712,6 +4701,7 @@
       instruction, LocationSummary::kCallOnSlowPath);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::Any());
+  // Note that TypeCheckSlowPathX86_64 uses this register too.
   locations->AddTemp(Location::RequiresRegister());
 }
 
@@ -4721,8 +4711,8 @@
   Location cls = locations->InAt(1);
   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(
-      instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc());
+  SlowPathCodeX86_64* slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction);
   codegen_->AddSlowPath(slow_path);
 
   // Avoid null check if we know obj is not null.
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index ff90f32..112d42e 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -182,10 +182,10 @@
   ArtMethod* resolved_method;
   if (invoke_instruction->IsInvokeStaticOrDirect()) {
     MethodReference ref = invoke_instruction->AsInvokeStaticOrDirect()->GetTargetMethod();
-    resolved_method = class_linker->FindDexCache(*ref.dex_file)->GetResolvedMethod(
+    resolved_method = class_linker->FindDexCache(soa.Self(), *ref.dex_file)->GetResolvedMethod(
         ref.dex_method_index, class_linker->GetImagePointerSize());
   } else {
-    resolved_method = class_linker->FindDexCache(caller_dex_file)->GetResolvedMethod(
+    resolved_method = class_linker->FindDexCache(soa.Self(), caller_dex_file)->GetResolvedMethod(
         method_index, class_linker->GetImagePointerSize());
   }
 
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index df6e550..0ac26de 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -132,6 +132,12 @@
   // with
   //    ADD tmp, a, b
   //    NEG dst, tmp
+  // Note that we cannot optimize `(-a) + (-b)` to `-(a + b)` for floating-point.
+  // When `a` is `-0.0` and `b` is `0.0`, the former expression yields `0.0`,
+  // while the later yields `-0.0`.
+  if (!Primitive::IsIntegralType(binop->GetType())) {
+    return false;
+  }
   binop->ReplaceInput(left_neg->GetInput(), 0);
   binop->ReplaceInput(right_neg->GetInput(), 1);
   left_neg->GetBlock()->RemoveInstruction(left_neg);
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 806fd7a..69a3e62 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -103,7 +103,7 @@
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
                                           Location::RegisterLocation(kArtMethodRegister));
-      RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
+      codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
     } else {
       UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
       UNREACHABLE();
@@ -989,10 +989,7 @@
   DCHECK_ALIGNED(value_offset, 4);
   static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
 
-  // temp cannot overflow because we cannot allocate a String object with size 4GiB or greater.
-  __ add(temp, temp, ShifterOperand(temp));
   __ LoadImmediate(temp1, value_offset);
-  __ add(temp, temp, ShifterOperand(value_offset));
 
   // Loop to compare strings 2 characters at a time starting at the front of the string.
   // Ok to do this because strings with an odd length are zero-padded.
@@ -1002,8 +999,8 @@
   __ cmp(out, ShifterOperand(temp2));
   __ b(&return_false, NE);
   __ add(temp1, temp1, ShifterOperand(sizeof(uint32_t)));
-  __ cmp(temp1, ShifterOperand(temp));
-  __ b(&loop, LO);
+  __ subs(temp, temp, ShifterOperand(sizeof(uint32_t) /  sizeof(uint16_t)));
+  __ b(&loop, GT);
 
   // Return true and exit the function.
   // If loop does not result in returning false, we return true.
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index a5332ea..0171d69 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -112,7 +112,7 @@
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
                                           LocationFrom(kArtMethodRegister));
-      RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
+      codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
     } else {
       UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
       UNREACHABLE();
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index b7126b2..be076cd 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -141,7 +141,7 @@
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
                                           Location::RegisterLocation(EAX));
-      RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
+      codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
     } else {
       UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
       UNREACHABLE();
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 15fbac1..1f35b59 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -132,7 +132,7 @@
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(
           invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI));
-      RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
+      codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
     } else {
       UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
       UNREACHABLE();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 2ed2d9a..9bf3968 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_NODES_H_
 #define ART_COMPILER_OPTIMIZING_NODES_H_
 
+#include <array>
 #include <type_traits>
 
 #include "base/arena_containers.h"
@@ -81,7 +82,7 @@
   kCondGE,
 };
 
-class HInstructionList {
+class HInstructionList : public ValueObject {
  public:
   HInstructionList() : first_instruction_(nullptr), last_instruction_(nullptr) {}
 
@@ -127,7 +128,7 @@
 };
 
 // Control-flow graph of a method. Contains a list of basic blocks.
-class HGraph : public ArenaObject<kArenaAllocMisc> {
+class HGraph : public ArenaObject<kArenaAllocGraph> {
  public:
   HGraph(ArenaAllocator* arena,
          const DexFile& dex_file,
@@ -464,7 +465,7 @@
   DISALLOW_COPY_AND_ASSIGN(HGraph);
 };
 
-class HLoopInformation : public ArenaObject<kArenaAllocMisc> {
+class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> {
  public:
   HLoopInformation(HBasicBlock* header, HGraph* graph)
       : header_(header),
@@ -562,7 +563,7 @@
 // Stores try/catch information for basic blocks.
 // Note that HGraph is constructed so that catch blocks cannot simultaneously
 // be try blocks.
-class TryCatchInformation : public ArenaObject<kArenaAllocMisc> {
+class TryCatchInformation : public ArenaObject<kArenaAllocTryCatchInfo> {
  public:
   // Try block information constructor.
   explicit TryCatchInformation(const HTryBoundary& try_entry)
@@ -619,7 +620,7 @@
 // as a double linked list. Each block knows its predecessors and
 // successors.
 
-class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
+class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> {
  public:
   explicit HBasicBlock(HGraph* graph, uint32_t dex_pc = kNoDexPc)
       : graph_(graph),
@@ -1107,7 +1108,7 @@
 template <typename T> class HUseList;
 
 template <typename T>
-class HUseListNode : public ArenaObject<kArenaAllocMisc> {
+class HUseListNode : public ArenaObject<kArenaAllocUseListNode> {
  public:
   HUseListNode* GetPrevious() const { return prev_; }
   HUseListNode* GetNext() const { return next_; }
@@ -1492,7 +1493,7 @@
 };
 
 // A HEnvironment object contains the values of virtual registers at a given location.
-class HEnvironment : public ArenaObject<kArenaAllocMisc> {
+class HEnvironment : public ArenaObject<kArenaAllocEnvironment> {
  public:
   HEnvironment(ArenaAllocator* arena,
                size_t number_of_vregs,
@@ -1682,7 +1683,7 @@
 
 std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs);
 
-class HInstruction : public ArenaObject<kArenaAllocMisc> {
+class HInstruction : public ArenaObject<kArenaAllocInstruction> {
  public:
   explicit HInstruction(SideEffects side_effects)
       : previous_(nullptr),
@@ -1731,9 +1732,7 @@
 
   virtual bool NeedsEnvironment() const { return false; }
   virtual uint32_t GetDexPc() const {
-    LOG(FATAL) << "GetDexPc() cannot be called on an instruction that"
-                  " does not need an environment";
-    UNREACHABLE();
+    return kNoDexPc;
   }
   virtual bool IsControlFlow() const { return false; }
 
@@ -2038,54 +2037,7 @@
   DISALLOW_COPY_AND_ASSIGN(HBackwardInstructionIterator);
 };
 
-// An embedded container with N elements of type T.  Used (with partial
-// specialization for N=0) because embedded arrays cannot have size 0.
-template<typename T, intptr_t N>
-class EmbeddedArray {
- public:
-  EmbeddedArray() : elements_() {}
-
-  intptr_t GetLength() const { return N; }
-
-  const T& operator[](intptr_t i) const {
-    DCHECK_LT(i, GetLength());
-    return elements_[i];
-  }
-
-  T& operator[](intptr_t i) {
-    DCHECK_LT(i, GetLength());
-    return elements_[i];
-  }
-
-  const T& At(intptr_t i) const {
-    return (*this)[i];
-  }
-
-  void SetAt(intptr_t i, const T& val) {
-    (*this)[i] = val;
-  }
-
- private:
-  T elements_[N];
-};
-
-template<typename T>
-class EmbeddedArray<T, 0> {
- public:
-  intptr_t length() const { return 0; }
-  const T& operator[](intptr_t i) const {
-    UNUSED(i);
-    LOG(FATAL) << "Unreachable";
-    UNREACHABLE();
-  }
-  T& operator[](intptr_t i) {
-    UNUSED(i);
-    LOG(FATAL) << "Unreachable";
-    UNREACHABLE();
-  }
-};
-
-template<intptr_t N>
+template<size_t N>
 class HTemplateInstruction: public HInstruction {
  public:
   HTemplateInstruction<N>(SideEffects side_effects)
@@ -2095,18 +2047,47 @@
   size_t InputCount() const OVERRIDE { return N; }
 
  protected:
-  const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_[i]; }
+  const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE {
+    DCHECK_LT(i, N);
+    return inputs_[i];
+  }
 
   void SetRawInputRecordAt(size_t i, const HUserRecord<HInstruction*>& input) OVERRIDE {
+    DCHECK_LT(i, N);
     inputs_[i] = input;
   }
 
  private:
-  EmbeddedArray<HUserRecord<HInstruction*>, N> inputs_;
+  std::array<HUserRecord<HInstruction*>, N> inputs_;
 
   friend class SsaBuilder;
 };
 
+// HTemplateInstruction specialization for N=0.
+template<>
+class HTemplateInstruction<0>: public HInstruction {
+ public:
+  explicit HTemplateInstruction(SideEffects side_effects) : HInstruction(side_effects) {}
+  virtual ~HTemplateInstruction() {}
+
+  size_t InputCount() const OVERRIDE { return 0; }
+
+ protected:
+  const HUserRecord<HInstruction*> InputRecordAt(size_t i ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << "Unreachable";
+    UNREACHABLE();
+  }
+
+  void SetRawInputRecordAt(size_t i ATTRIBUTE_UNUSED,
+                           const HUserRecord<HInstruction*>& input ATTRIBUTE_UNUSED) OVERRIDE {
+    LOG(FATAL) << "Unreachable";
+    UNREACHABLE();
+  }
+
+ private:
+  friend class SsaBuilder;
+};
+
 template<intptr_t N>
 class HExpression : public HTemplateInstruction<N> {
  public:
@@ -4833,7 +4814,7 @@
   DISALLOW_COPY_AND_ASSIGN(HFakeString);
 };
 
-class MoveOperands : public ArenaObject<kArenaAllocMisc> {
+class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> {
  public:
   MoveOperands(Location source,
                Location destination,
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index f793a65..2f59d4c 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -26,7 +26,7 @@
 /**
  * Abstraction to implement an optimization pass.
  */
-class HOptimization : public ArenaObject<kArenaAllocMisc> {
+class HOptimization : public ArenaObject<kArenaAllocOptimization> {
  public:
   HOptimization(HGraph* graph,
                 const char* pass_name,
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 1db3063..8e48f6d 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -25,6 +25,7 @@
 
 #include "art_method-inl.h"
 #include "base/arena_allocator.h"
+#include "base/arena_containers.h"
 #include "base/dumpable.h"
 #include "base/timing_logger.h"
 #include "boolean_simplifier.h"
@@ -68,7 +69,9 @@
  */
 class CodeVectorAllocator FINAL : public CodeAllocator {
  public:
-  CodeVectorAllocator() : size_(0) {}
+  explicit CodeVectorAllocator(ArenaAllocator* arena)
+      : memory_(arena->Adapter(kArenaAllocCodeBuffer)),
+        size_(0) {}
 
   virtual uint8_t* Allocate(size_t size) {
     size_ = size;
@@ -77,10 +80,10 @@
   }
 
   size_t GetSize() const { return size_; }
-  const std::vector<uint8_t>& GetMemory() const { return memory_; }
+  const ArenaVector<uint8_t>& GetMemory() const { return memory_; }
 
  private:
-  std::vector<uint8_t> memory_;
+  ArenaVector<uint8_t> memory_;
   size_t size_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeVectorAllocator);
@@ -498,7 +501,7 @@
 
 // The stack map we generate must be 4-byte aligned on ARM. Since existing
 // maps are generated alongside these stack maps, we must also align them.
-static ArrayRef<const uint8_t> AlignVectorSize(std::vector<uint8_t>& vector) {
+static ArrayRef<const uint8_t> AlignVectorSize(ArenaVector<uint8_t>& vector) {
   size_t size = vector.size();
   size_t aligned_size = RoundUp(size, 4);
   for (; size < aligned_size; ++size) {
@@ -553,17 +556,17 @@
 
   AllocateRegisters(graph, codegen, pass_observer);
 
-  CodeVectorAllocator allocator;
+  ArenaAllocator* arena = graph->GetArena();
+  CodeVectorAllocator allocator(arena);
+  DefaultSrcMap src_mapping_table;
+  codegen->SetSrcMap(compiler_driver->GetCompilerOptions().GetGenerateDebugInfo()
+                         ? &src_mapping_table
+                         : nullptr);
   codegen->CompileOptimized(&allocator);
 
   ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
 
-  DefaultSrcMap src_mapping_table;
-  if (compiler_driver->GetCompilerOptions().GetGenerateDebugInfo()) {
-    codegen->BuildSourceMap(&src_mapping_table);
-  }
-
-  std::vector<uint8_t> stack_map;
+  ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps));
   codegen->BuildStackMaps(&stack_map);
 
   MaybeRecordStat(MethodCompilationStat::kCompiledOptimized);
@@ -595,20 +598,21 @@
     CompilerDriver* compiler_driver,
     const DexCompilationUnit& dex_compilation_unit,
     PassObserver* pass_observer) const {
-  CodeVectorAllocator allocator;
+  ArenaAllocator* arena = codegen->GetGraph()->GetArena();
+  CodeVectorAllocator allocator(arena);
+  DefaultSrcMap src_mapping_table;
+  codegen->SetSrcMap(compiler_driver->GetCompilerOptions().GetGenerateDebugInfo()
+                         ? &src_mapping_table
+                         : nullptr);
   codegen->CompileBaseline(&allocator);
 
   ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
 
-  std::vector<uint8_t> mapping_table;
+  ArenaVector<uint8_t> mapping_table(arena->Adapter(kArenaAllocBaselineMaps));
   codegen->BuildMappingTable(&mapping_table);
-  DefaultSrcMap src_mapping_table;
-  if (compiler_driver->GetCompilerOptions().GetGenerateDebugInfo()) {
-    codegen->BuildSourceMap(&src_mapping_table);
-  }
-  std::vector<uint8_t> vmap_table;
+  ArenaVector<uint8_t> vmap_table(arena->Adapter(kArenaAllocBaselineMaps));
   codegen->BuildVMapTable(&vmap_table);
-  std::vector<uint8_t> gc_map;
+  ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps));
   codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit);
 
   MaybeRecordStat(MethodCompilationStat::kCompiledBaseline);
@@ -714,7 +718,8 @@
     ScopedObjectAccess soa(Thread::Current());
     StackHandleScope<4> hs(soa.Self());
     ClassLinker* class_linker = dex_compilation_unit.GetClassLinker();
-    Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(dex_file)));
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(
+        soa.Self(), dex_file)));
     Handle<mirror::ClassLoader> loader(hs.NewHandle(
         soa.Decode<mirror::ClassLoader*>(class_loader)));
     ArtMethod* art_method = compiler_driver->ResolveMethod(
@@ -751,6 +756,7 @@
   // or the debuggable flag). If it is set, we can run baseline. Otherwise, we fall back
   // to Quick.
   bool can_use_baseline = !run_optimizations_ && builder.CanUseBaselineForStringInit();
+  CompiledMethod* compiled_method = nullptr;
   if (run_optimizations_ && can_allocate_registers) {
     VLOG(compiler) << "Optimizing " << method_name;
 
@@ -765,11 +771,11 @@
       }
     }
 
-    return CompileOptimized(graph,
-                            codegen.get(),
-                            compiler_driver,
-                            dex_compilation_unit,
-                            &pass_observer);
+    compiled_method = CompileOptimized(graph,
+                                       codegen.get(),
+                                       compiler_driver,
+                                       dex_compilation_unit,
+                                       &pass_observer);
   } else if (shouldOptimize && can_allocate_registers) {
     LOG(FATAL) << "Could not allocate registers in optimizing compiler";
     UNREACHABLE();
@@ -782,13 +788,20 @@
       MaybeRecordStat(MethodCompilationStat::kNotOptimizedRegisterAllocator);
     }
 
-    return CompileBaseline(codegen.get(),
-                           compiler_driver,
-                           dex_compilation_unit,
-                           &pass_observer);
-  } else {
-    return nullptr;
+    compiled_method = CompileBaseline(codegen.get(),
+                                      compiler_driver,
+                                      dex_compilation_unit,
+                                      &pass_observer);
   }
+
+  if (kArenaAllocatorCountAllocations) {
+    if (arena.BytesAllocated() > 4 * MB) {
+      MemStats mem_stats(arena.GetMemStats());
+      LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats);
+    }
+  }
+
+  return compiled_method;
 }
 
 CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index d3eec1a..516638b 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -363,7 +363,8 @@
     if (kIsDebugBuild) {
       ScopedObjectAccess soa(Thread::Current());
       ClassLinker* cl = Runtime::Current()->GetClassLinker();
-      mirror::DexCache* dex_cache = cl->FindDexCache(instr->AsInvoke()->GetDexFile());
+      mirror::DexCache* dex_cache = cl->FindDexCache(
+          soa.Self(), instr->AsInvoke()->GetDexFile(), false);
       ArtMethod* method = dex_cache->GetResolvedMethod(
           instr->AsInvoke()->GetDexMethodIndex(), cl->GetImagePointerSize());
       DCHECK(method != nullptr);
@@ -394,7 +395,8 @@
   DCHECK_EQ(instr->GetType(), Primitive::kPrimNot);
 
   ScopedObjectAccess soa(Thread::Current());
-  mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file);
+  mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
+      soa.Self(), dex_file, false);
   // Get type from dex cache assuming it was populated by the verifier.
   SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx), is_exact);
 }
@@ -432,7 +434,7 @@
 
   ScopedObjectAccess soa(Thread::Current());
   ClassLinker* cl = Runtime::Current()->GetClassLinker();
-  mirror::DexCache* dex_cache = cl->FindDexCache(info.GetDexFile());
+  mirror::DexCache* dex_cache = cl->FindDexCache(soa.Self(), info.GetDexFile(), false);
   ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), dex_cache);
   // TODO: There are certain cases where we can't resolve the field.
   // b/21914925 is open to keep track of a repro case for this issue.
@@ -451,7 +453,7 @@
 void RTPVisitor::VisitLoadClass(HLoadClass* instr) {
   ScopedObjectAccess soa(Thread::Current());
   mirror::DexCache* dex_cache =
-      Runtime::Current()->GetClassLinker()->FindDexCache(instr->GetDexFile());
+      Runtime::Current()->GetClassLinker()->FindDexCache(soa.Self(), instr->GetDexFile(), false);
   // Get type from dex cache assuming it was populated by the verifier.
   mirror::Class* resolved_class = dex_cache->GetResolvedType(instr->GetTypeIndex());
   // TODO: investigating why we are still getting unresolved classes: b/22821472.
@@ -634,7 +636,7 @@
 
   ScopedObjectAccess soa(Thread::Current());
   ClassLinker* cl = Runtime::Current()->GetClassLinker();
-  mirror::DexCache* dex_cache = cl->FindDexCache(instr->GetDexFile());
+  mirror::DexCache* dex_cache = cl->FindDexCache(soa.Self(), instr->GetDexFile());
   ArtMethod* method = dex_cache->GetResolvedMethod(
       instr->GetDexMethodIndex(), cl->GetImagePointerSize());
   mirror::Class* klass = (method == nullptr) ? nullptr : method->GetReturnType(false);
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 0e3e08c..807beda 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -137,10 +137,14 @@
         if (rs_ == kNoRegister) {
           // Immediate shift.
           if (shift_ == RRX) {
+            DCHECK_EQ(immed_, 0u);
             // RRX is encoded as an ROR with imm 0.
             return ROR << 4 | static_cast<uint32_t>(rm_);
           } else {
-            uint32_t imm3 = immed_ >> 2;
+            DCHECK((1 <= immed_ && immed_ <= 31) ||
+                   (immed_ == 0u && shift_ == LSL) ||
+                   (immed_ == 32u && (shift_ == ASR || shift_ == LSR)));
+            uint32_t imm3 = (immed_ >> 2) & 7 /* 0b111*/;
             uint32_t imm2 = immed_ & 3U /* 0b11 */;
 
             return imm3 << 12 | imm2 << 6 | shift_ << 4 |
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index ef60fef..7825457 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -375,6 +375,13 @@
   kItE = kItElse
 };
 
+// Set condition codes request.
+enum SetCc {
+  kCcDontCare,  // Allows prioritizing 16-bit instructions on Thumb2 whether they set CCs or not.
+  kCcSet,
+  kCcKeep,
+};
+
 constexpr uint32_t kNoItCondition = 3;
 constexpr uint32_t kInvalidModifiedImmediate = -1;
 
@@ -392,25 +399,61 @@
   virtual bool IsThumb() const = 0;
 
   // Data-processing instructions.
-  virtual void and_(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void and_(Register rd, Register rn, const ShifterOperand& so,
+                    Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
 
-  virtual void eor(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void ands(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    and_(rd, rn, so, cond, kCcSet);
+  }
 
-  virtual void sub(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
-  virtual void subs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void eor(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
 
-  virtual void rsb(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
-  virtual void rsbs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void eors(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    eor(rd, rn, so, cond, kCcSet);
+  }
 
-  virtual void add(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void sub(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
 
-  virtual void adds(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void subs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    sub(rd, rn, so, cond, kCcSet);
+  }
 
-  virtual void adc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void rsb(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
 
-  virtual void sbc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void rsbs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    rsb(rd, rn, so, cond, kCcSet);
+  }
 
-  virtual void rsc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void add(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  virtual void adds(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    add(rd, rn, so, cond, kCcSet);
+  }
+
+  virtual void adc(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  virtual void adcs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    adc(rd, rn, so, cond, kCcSet);
+  }
+
+  virtual void sbc(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  virtual void sbcs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    sbc(rd, rn, so, cond, kCcSet);
+  }
+
+  virtual void rsc(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  virtual void rscs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    rsc(rd, rn, so, cond, kCcSet);
+  }
 
   virtual void tst(Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
@@ -420,16 +463,33 @@
 
   virtual void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  virtual void orr(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
-  virtual void orrs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void orr(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
 
-  virtual void mov(Register rd, const ShifterOperand& so, Condition cond = AL) = 0;
-  virtual void movs(Register rd, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void orrs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    orr(rd, rn, so, cond, kCcSet);
+  }
 
-  virtual void bic(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void mov(Register rd, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
 
-  virtual void mvn(Register rd, const ShifterOperand& so, Condition cond = AL) = 0;
-  virtual void mvns(Register rd, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void movs(Register rd, const ShifterOperand& so, Condition cond = AL) {
+    mov(rd, so, cond, kCcSet);
+  }
+
+  virtual void bic(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  virtual void bics(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    bic(rd, rn, so, cond, kCcSet);
+  }
+
+  virtual void mvn(Register rd, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  virtual void mvns(Register rd, const ShifterOperand& so, Condition cond = AL) {
+    mvn(rd, so, cond, kCcSet);
+  }
 
   // Miscellaneous data-processing instructions.
   virtual void clz(Register rd, Register rm, Condition cond = AL) = 0;
@@ -697,25 +757,68 @@
 
   // Convenience shift instructions. Use mov instruction with shifter operand
   // for variants setting the status flags or using a register shift count.
-  virtual void Lsl(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-                   Condition cond = AL) = 0;
-  virtual void Lsr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-                   Condition cond = AL) = 0;
-  virtual void Asr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-                   Condition cond = AL) = 0;
-  virtual void Ror(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-                   Condition cond = AL) = 0;
-  virtual void Rrx(Register rd, Register rm, bool setcc = false,
-                   Condition cond = AL) = 0;
+  virtual void Lsl(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
 
-  virtual void Lsl(Register rd, Register rm, Register rn, bool setcc = false,
-                   Condition cond = AL) = 0;
-  virtual void Lsr(Register rd, Register rm, Register rn, bool setcc = false,
-                   Condition cond = AL) = 0;
-  virtual void Asr(Register rd, Register rm, Register rn, bool setcc = false,
-                   Condition cond = AL) = 0;
-  virtual void Ror(Register rd, Register rm, Register rn, bool setcc = false,
-                   Condition cond = AL) = 0;
+  void Lsls(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) {
+    Lsl(rd, rm, shift_imm, cond, kCcSet);
+  }
+
+  virtual void Lsr(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  void Lsrs(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) {
+    Lsr(rd, rm, shift_imm, cond, kCcSet);
+  }
+
+  virtual void Asr(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  void Asrs(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) {
+    Asr(rd, rm, shift_imm, cond, kCcSet);
+  }
+
+  virtual void Ror(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  void Rors(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) {
+    Ror(rd, rm, shift_imm, cond, kCcSet);
+  }
+
+  virtual void Rrx(Register rd, Register rm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  void Rrxs(Register rd, Register rm, Condition cond = AL) {
+    Rrx(rd, rm, cond, kCcSet);
+  }
+
+  virtual void Lsl(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  void Lsls(Register rd, Register rm, Register rn, Condition cond = AL) {
+    Lsl(rd, rm, rn, cond, kCcSet);
+  }
+
+  virtual void Lsr(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  void Lsrs(Register rd, Register rm, Register rn, Condition cond = AL) {
+    Lsr(rd, rm, rn, cond, kCcSet);
+  }
+
+  virtual void Asr(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  void Asrs(Register rd, Register rm, Register rn, Condition cond = AL) {
+    Asr(rd, rm, rn, cond, kCcSet);
+  }
+
+  virtual void Ror(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  void Rors(Register rd, Register rm, Register rn, Condition cond = AL) {
+    Ror(rd, rm, rn, cond, kCcSet);
+  }
 
   // Returns whether the `immediate` can fit in a `ShifterOperand`. If yes,
   // `shifter_op` contains the operand.
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index 6e60ddc..d91ddee 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -57,126 +57,94 @@
 }
 
 void Arm32Assembler::and_(Register rd, Register rn, const ShifterOperand& so,
-                        Condition cond) {
-  EmitType01(cond, so.type(), AND, 0, rn, rd, so);
+                          Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), AND, set_cc, rn, rd, so);
 }
 
 
 void Arm32Assembler::eor(Register rd, Register rn, const ShifterOperand& so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), EOR, 0, rn, rd, so);
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), EOR, set_cc, rn, rd, so);
 }
 
 
 void Arm32Assembler::sub(Register rd, Register rn, const ShifterOperand& so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), SUB, 0, rn, rd, so);
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), SUB, set_cc, rn, rd, so);
 }
 
 void Arm32Assembler::rsb(Register rd, Register rn, const ShifterOperand& so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), RSB, 0, rn, rd, so);
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), RSB, set_cc, rn, rd, so);
 }
 
-void Arm32Assembler::rsbs(Register rd, Register rn, const ShifterOperand& so,
-                        Condition cond) {
-  EmitType01(cond, so.type(), RSB, 1, rn, rd, so);
-}
-
-
 void Arm32Assembler::add(Register rd, Register rn, const ShifterOperand& so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), ADD, 0, rn, rd, so);
-}
-
-
-void Arm32Assembler::adds(Register rd, Register rn, const ShifterOperand& so,
-                        Condition cond) {
-  EmitType01(cond, so.type(), ADD, 1, rn, rd, so);
-}
-
-
-void Arm32Assembler::subs(Register rd, Register rn, const ShifterOperand& so,
-                        Condition cond) {
-  EmitType01(cond, so.type(), SUB, 1, rn, rd, so);
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), ADD, set_cc, rn, rd, so);
 }
 
 
 void Arm32Assembler::adc(Register rd, Register rn, const ShifterOperand& so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), ADC, 0, rn, rd, so);
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), ADC, set_cc, rn, rd, so);
 }
 
 
 void Arm32Assembler::sbc(Register rd, Register rn, const ShifterOperand& so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), SBC, 0, rn, rd, so);
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), SBC, set_cc, rn, rd, so);
 }
 
 
 void Arm32Assembler::rsc(Register rd, Register rn, const ShifterOperand& so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), RSC, 0, rn, rd, so);
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), RSC, set_cc, rn, rd, so);
 }
 
 
 void Arm32Assembler::tst(Register rn, const ShifterOperand& so, Condition cond) {
   CHECK_NE(rn, PC);  // Reserve tst pc instruction for exception handler marker.
-  EmitType01(cond, so.type(), TST, 1, rn, R0, so);
+  EmitType01(cond, so.type(), TST, kCcSet, rn, R0, so);
 }
 
 
 void Arm32Assembler::teq(Register rn, const ShifterOperand& so, Condition cond) {
   CHECK_NE(rn, PC);  // Reserve teq pc instruction for exception handler marker.
-  EmitType01(cond, so.type(), TEQ, 1, rn, R0, so);
+  EmitType01(cond, so.type(), TEQ, kCcSet, rn, R0, so);
 }
 
 
 void Arm32Assembler::cmp(Register rn, const ShifterOperand& so, Condition cond) {
-  EmitType01(cond, so.type(), CMP, 1, rn, R0, so);
+  EmitType01(cond, so.type(), CMP, kCcSet, rn, R0, so);
 }
 
 
 void Arm32Assembler::cmn(Register rn, const ShifterOperand& so, Condition cond) {
-  EmitType01(cond, so.type(), CMN, 1, rn, R0, so);
+  EmitType01(cond, so.type(), CMN, kCcSet, rn, R0, so);
 }
 
 
-void Arm32Assembler::orr(Register rd, Register rn,
-                    const ShifterOperand& so, Condition cond) {
-  EmitType01(cond, so.type(), ORR, 0, rn, rd, so);
+void Arm32Assembler::orr(Register rd, Register rn, const ShifterOperand& so,
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), ORR, set_cc, rn, rd, so);
 }
 
 
-void Arm32Assembler::orrs(Register rd, Register rn,
-                        const ShifterOperand& so, Condition cond) {
-  EmitType01(cond, so.type(), ORR, 1, rn, rd, so);
-}
-
-
-void Arm32Assembler::mov(Register rd, const ShifterOperand& so, Condition cond) {
-  EmitType01(cond, so.type(), MOV, 0, R0, rd, so);
-}
-
-
-void Arm32Assembler::movs(Register rd, const ShifterOperand& so, Condition cond) {
-  EmitType01(cond, so.type(), MOV, 1, R0, rd, so);
+void Arm32Assembler::mov(Register rd, const ShifterOperand& so,
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), MOV, set_cc, R0, rd, so);
 }
 
 
 void Arm32Assembler::bic(Register rd, Register rn, const ShifterOperand& so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), BIC, 0, rn, rd, so);
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), BIC, set_cc, rn, rd, so);
 }
 
 
-void Arm32Assembler::mvn(Register rd, const ShifterOperand& so, Condition cond) {
-  EmitType01(cond, so.type(), MVN, 0, R0, rd, so);
-}
-
-
-void Arm32Assembler::mvns(Register rd, const ShifterOperand& so, Condition cond) {
-  EmitType01(cond, so.type(), MVN, 1, R0, rd, so);
+void Arm32Assembler::mvn(Register rd, const ShifterOperand& so,
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), MVN, set_cc, R0, rd, so);
 }
 
 
@@ -573,7 +541,7 @@
 
 
 void Arm32Assembler::MarkExceptionHandler(Label* label) {
-  EmitType01(AL, 1, TST, 1, PC, R0, ShifterOperand(0));
+  EmitType01(AL, 1, TST, kCcSet, PC, R0, ShifterOperand(0));
   Label l;
   b(&l);
   EmitBranch(AL, label, false);
@@ -590,7 +558,7 @@
 void Arm32Assembler::EmitType01(Condition cond,
                                 int type,
                                 Opcode opcode,
-                                int set_cc,
+                                SetCc set_cc,
                                 Register rn,
                                 Register rd,
                                 const ShifterOperand& so) {
@@ -599,7 +567,7 @@
   int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
                      type << kTypeShift |
                      static_cast<int32_t>(opcode) << kOpcodeShift |
-                     set_cc << kSShift |
+                     (set_cc == kCcSet ? 1 : 0) << kSShift |
                      static_cast<int32_t>(rn) << kRnShift |
                      static_cast<int32_t>(rd) << kRdShift |
                      so.encodingArm();
@@ -1158,96 +1126,60 @@
 
 
 void Arm32Assembler::Lsl(Register rd, Register rm, uint32_t shift_imm,
-                         bool setcc, Condition cond) {
+                         Condition cond, SetCc set_cc) {
   CHECK_LE(shift_imm, 31u);
-  if (setcc) {
-    movs(rd, ShifterOperand(rm, LSL, shift_imm), cond);
-  } else {
-    mov(rd, ShifterOperand(rm, LSL, shift_imm), cond);
-  }
+  mov(rd, ShifterOperand(rm, LSL, shift_imm), cond, set_cc);
 }
 
 
 void Arm32Assembler::Lsr(Register rd, Register rm, uint32_t shift_imm,
-                         bool setcc, Condition cond) {
+                         Condition cond, SetCc set_cc) {
   CHECK(1u <= shift_imm && shift_imm <= 32u);
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
-  if (setcc) {
-    movs(rd, ShifterOperand(rm, LSR, shift_imm), cond);
-  } else {
-    mov(rd, ShifterOperand(rm, LSR, shift_imm), cond);
-  }
+  mov(rd, ShifterOperand(rm, LSR, shift_imm), cond, set_cc);
 }
 
 
 void Arm32Assembler::Asr(Register rd, Register rm, uint32_t shift_imm,
-                         bool setcc, Condition cond) {
+                         Condition cond, SetCc set_cc) {
   CHECK(1u <= shift_imm && shift_imm <= 32u);
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
-  if (setcc) {
-    movs(rd, ShifterOperand(rm, ASR, shift_imm), cond);
-  } else {
-    mov(rd, ShifterOperand(rm, ASR, shift_imm), cond);
-  }
+  mov(rd, ShifterOperand(rm, ASR, shift_imm), cond, set_cc);
 }
 
 
 void Arm32Assembler::Ror(Register rd, Register rm, uint32_t shift_imm,
-                         bool setcc, Condition cond) {
+                         Condition cond, SetCc set_cc) {
   CHECK(1u <= shift_imm && shift_imm <= 31u);
-  if (setcc) {
-    movs(rd, ShifterOperand(rm, ROR, shift_imm), cond);
-  } else {
-    mov(rd, ShifterOperand(rm, ROR, shift_imm), cond);
-  }
+  mov(rd, ShifterOperand(rm, ROR, shift_imm), cond, set_cc);
 }
 
-void Arm32Assembler::Rrx(Register rd, Register rm, bool setcc, Condition cond) {
-  if (setcc) {
-    movs(rd, ShifterOperand(rm, ROR, 0), cond);
-  } else {
-    mov(rd, ShifterOperand(rm, ROR, 0), cond);
-  }
+void Arm32Assembler::Rrx(Register rd, Register rm, Condition cond, SetCc set_cc) {
+  mov(rd, ShifterOperand(rm, ROR, 0), cond, set_cc);
 }
 
 
 void Arm32Assembler::Lsl(Register rd, Register rm, Register rn,
-                         bool setcc, Condition cond) {
-  if (setcc) {
-    movs(rd, ShifterOperand(rm, LSL, rn), cond);
-  } else {
-    mov(rd, ShifterOperand(rm, LSL, rn), cond);
-  }
+                         Condition cond, SetCc set_cc) {
+  mov(rd, ShifterOperand(rm, LSL, rn), cond, set_cc);
 }
 
 
 void Arm32Assembler::Lsr(Register rd, Register rm, Register rn,
-                         bool setcc, Condition cond) {
-  if (setcc) {
-    movs(rd, ShifterOperand(rm, LSR, rn), cond);
-  } else {
-    mov(rd, ShifterOperand(rm, LSR, rn), cond);
-  }
+                         Condition cond, SetCc set_cc) {
+  mov(rd, ShifterOperand(rm, LSR, rn), cond, set_cc);
 }
 
 
 void Arm32Assembler::Asr(Register rd, Register rm, Register rn,
-                         bool setcc, Condition cond) {
-  if (setcc) {
-    movs(rd, ShifterOperand(rm, ASR, rn), cond);
-  } else {
-    mov(rd, ShifterOperand(rm, ASR, rn), cond);
-  }
+                         Condition cond, SetCc set_cc) {
+  mov(rd, ShifterOperand(rm, ASR, rn), cond, set_cc);
 }
 
 
 void Arm32Assembler::Ror(Register rd, Register rm, Register rn,
-                         bool setcc, Condition cond) {
-  if (setcc) {
-    movs(rd, ShifterOperand(rm, ROR, rn), cond);
-  } else {
-    mov(rd, ShifterOperand(rm, ROR, rn), cond);
-  }
+                         Condition cond, SetCc set_cc) {
+  mov(rd, ShifterOperand(rm, ROR, rn), cond, set_cc);
 }
 
 void Arm32Assembler::vmstat(Condition cond) {  // VMRS APSR_nzcv, FPSCR
@@ -1434,24 +1366,24 @@
                                          Condition cond) {
   ShifterOperand shifter_op;
   if (ShifterOperandCanHoldArm32(value, &shifter_op)) {
-    adds(rd, rn, shifter_op, cond);
+    add(rd, rn, shifter_op, cond, kCcSet);
   } else if (ShifterOperandCanHoldArm32(-value, &shifter_op)) {
-    subs(rd, rn, shifter_op, cond);
+    sub(rd, rn, shifter_op, cond, kCcSet);
   } else {
     CHECK(rn != IP);
     if (ShifterOperandCanHoldArm32(~value, &shifter_op)) {
       mvn(IP, shifter_op, cond);
-      adds(rd, rn, ShifterOperand(IP), cond);
+      add(rd, rn, ShifterOperand(IP), cond, kCcSet);
     } else if (ShifterOperandCanHoldArm32(~(-value), &shifter_op)) {
       mvn(IP, shifter_op, cond);
-      subs(rd, rn, ShifterOperand(IP), cond);
+      sub(rd, rn, ShifterOperand(IP), cond, kCcSet);
     } else {
       movw(IP, Low16Bits(value), cond);
       uint16_t value_high = High16Bits(value);
       if (value_high != 0) {
         movt(IP, value_high, cond);
       }
-      adds(rd, rn, ShifterOperand(IP), cond);
+      add(rd, rn, ShifterOperand(IP), cond, kCcSet);
     }
   }
 }
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index 1c38eec..b96bb74 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -39,25 +39,29 @@
   }
 
   // Data-processing instructions.
-  void and_(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void and_(Register rd, Register rn, const ShifterOperand& so,
+                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void eor(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void eor(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void sub(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-  void subs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void sub(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void rsb(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-  void rsbs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void rsb(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void add(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void add(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void adds(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void adc(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void adc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void sbc(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void sbc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-
-  void rsc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void rsc(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
   void tst(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
 
@@ -67,16 +71,17 @@
 
   void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
 
-  void orr(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-  void orrs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void orr(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void mov(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-  void movs(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void mov(Register rd, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void bic(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void bic(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void mvn(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-  void mvns(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void mvn(Register rd, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
   // Miscellaneous data-processing instructions.
   void clz(Register rd, Register rm, Condition cond = AL) OVERRIDE;
@@ -204,25 +209,25 @@
   void bl(Label* label, Condition cond = AL) OVERRIDE;
   void blx(Register rm, Condition cond = AL) OVERRIDE;
   void bx(Register rm, Condition cond = AL) OVERRIDE;
-  void Lsl(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Lsr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Asr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Ror(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Rrx(Register rd, Register rm, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
+  virtual void Lsl(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Lsr(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Asr(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Ror(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Rrx(Register rd, Register rm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void Lsl(Register rd, Register rm, Register rn, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Lsr(Register rd, Register rm, Register rn, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Asr(Register rd, Register rm, Register rn, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Ror(Register rd, Register rm, Register rn, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
+  virtual void Lsl(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Lsr(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Asr(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Ror(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
   void Push(Register rd, Condition cond = AL) OVERRIDE;
   void Pop(Register rd, Condition cond = AL) OVERRIDE;
@@ -305,7 +310,7 @@
   void EmitType01(Condition cond,
                   int type,
                   Opcode opcode,
-                  int set_cc,
+                  SetCc set_cc,
                   Register rn,
                   Register rd,
                   const ShifterOperand& so);
diff --git a/compiler/utils/arm/assembler_arm32_test.cc b/compiler/utils/arm/assembler_arm32_test.cc
index efd517b..e6412ac 100644
--- a/compiler/utils/arm/assembler_arm32_test.cc
+++ b/compiler/utils/arm/assembler_arm32_test.cc
@@ -42,7 +42,8 @@
 
 class AssemblerArm32Test : public AssemblerArmTest<arm::Arm32Assembler,
                                                    arm::Register, arm::SRegister,
-                                                   uint32_t, arm::ShifterOperand, arm::Condition> {
+                                                   uint32_t, arm::ShifterOperand, arm::Condition,
+                                                   arm::SetCc> {
  protected:
   std::string GetArchitectureString() OVERRIDE {
     return "arm";
@@ -125,6 +126,10 @@
       conditions_.push_back(arm::Condition::AL);
     }
 
+    set_ccs_.push_back(arm::kCcDontCare);
+    set_ccs_.push_back(arm::kCcSet);
+    set_ccs_.push_back(arm::kCcKeep);
+
     shifter_operands_.push_back(arm::ShifterOperand(0));
     shifter_operands_.push_back(arm::ShifterOperand(1));
     shifter_operands_.push_back(arm::ShifterOperand(2));
@@ -240,6 +245,15 @@
     return oss.str();
   }
 
+  std::vector<arm::SetCc>& GetSetCcs() OVERRIDE {
+    return set_ccs_;
+  }
+
+  std::string GetSetCcString(arm::SetCc s) OVERRIDE {
+    // For arm32, kCcDontCare defaults to not setting condition codes.
+    return s == arm::kCcSet ? "s" : "";
+  }
+
   arm::Register GetPCRegister() OVERRIDE {
     return arm::R15;
   }
@@ -369,12 +383,12 @@
 
       size_t cond_index = after_cond.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
-        after_cond.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+        after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c));
       }
 
       cond_index = after_cond_filter.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
-        after_cond_filter.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+        after_cond_filter.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c));
       }
       if (EvalFilterString(after_cond_filter)) {
         continue;
@@ -384,6 +398,30 @@
     }
   }
 
+  void TemplateHelper(std::function<void(arm::SetCc)> f, int depth ATTRIBUTE_UNUSED,
+                      bool without_pc ATTRIBUTE_UNUSED, std::string fmt, std::string filter,
+                      std::ostringstream& oss) {
+    for (arm::SetCc s : GetSetCcs()) {
+      std::string after_cond = fmt;
+      std::string after_cond_filter = filter;
+
+      size_t cond_index = after_cond.find(SET_CC_TOKEN);
+      if (cond_index != std::string::npos) {
+        after_cond.replace(cond_index, ConstexprStrLen(SET_CC_TOKEN), GetSetCcString(s));
+      }
+
+      cond_index = after_cond_filter.find(SET_CC_TOKEN);
+      if (cond_index != std::string::npos) {
+        after_cond_filter.replace(cond_index, ConstexprStrLen(SET_CC_TOKEN), GetSetCcString(s));
+      }
+      if (EvalFilterString(after_cond_filter)) {
+        continue;
+      }
+
+      ExecuteAndPrint([&] () { f(s); }, after_cond, oss);
+    }
+  }
+
   template <typename... Args>
   void TemplateHelper(std::function<void(arm::Register, Args...)> f, int depth, bool without_pc,
                       std::string fmt, std::string filter, std::ostringstream& oss) {
@@ -449,12 +487,12 @@
 
       size_t cond_index = after_cond.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
-        after_cond.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+        after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c));
       }
 
       cond_index = after_cond_filter.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
-        after_cond_filter.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+        after_cond_filter.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c));
       }
       if (EvalFilterString(after_cond_filter)) {
         continue;
@@ -466,25 +504,51 @@
     }
   }
 
-  template <typename T1, typename T2>
-  std::function<void(T1, T2)> GetBoundFunction2(void (arm::Arm32Assembler::*f)(T1, T2)) {
+  template <typename... Args>
+  void TemplateHelper(std::function<void(arm::SetCc, Args...)> f, int depth, bool without_pc,
+                      std::string fmt, std::string filter, std::ostringstream& oss) {
+    for (arm::SetCc s : GetSetCcs()) {
+      std::string after_cond = fmt;
+      std::string after_cond_filter = filter;
+
+      size_t cond_index = after_cond.find(SET_CC_TOKEN);
+      if (cond_index != std::string::npos) {
+        after_cond.replace(cond_index, ConstexprStrLen(SET_CC_TOKEN), GetSetCcString(s));
+      }
+
+      cond_index = after_cond_filter.find(SET_CC_TOKEN);
+      if (cond_index != std::string::npos) {
+        after_cond_filter.replace(cond_index, ConstexprStrLen(SET_CC_TOKEN), GetSetCcString(s));
+      }
+      if (EvalFilterString(after_cond_filter)) {
+        continue;
+      }
+
+      auto lambda = [&] (Args... args) { f(s, args...); };  // NOLINT [readability/braces] [4]
+      TemplateHelper(std::function<void(Args...)>(lambda), depth, without_pc,
+          after_cond, after_cond_filter, oss);
+    }
+  }
+
+  template <typename Assembler, typename T1, typename T2>
+  std::function<void(T1, T2)> GetBoundFunction2(void (Assembler::*f)(T1, T2)) {
     return std::bind(f, GetAssembler(), _1, _2);
   }
 
-  template <typename T1, typename T2, typename T3>
-  std::function<void(T1, T2, T3)> GetBoundFunction3(void (arm::Arm32Assembler::*f)(T1, T2, T3)) {
+  template <typename Assembler, typename T1, typename T2, typename T3>
+  std::function<void(T1, T2, T3)> GetBoundFunction3(void (Assembler::*f)(T1, T2, T3)) {
     return std::bind(f, GetAssembler(), _1, _2, _3);
   }
 
-  template <typename T1, typename T2, typename T3, typename T4>
+  template <typename Assembler, typename T1, typename T2, typename T3, typename T4>
   std::function<void(T1, T2, T3, T4)> GetBoundFunction4(
-      void (arm::Arm32Assembler::*f)(T1, T2, T3, T4)) {
+      void (Assembler::*f)(T1, T2, T3, T4)) {
     return std::bind(f, GetAssembler(), _1, _2, _3, _4);
   }
 
-  template <typename T1, typename T2, typename T3, typename T4, typename T5>
+  template <typename Assembler, typename T1, typename T2, typename T3, typename T4, typename T5>
   std::function<void(T1, T2, T3, T4, T5)> GetBoundFunction5(
-      void (arm::Arm32Assembler::*f)(T1, T2, T3, T4, T5)) {
+      void (Assembler::*f)(T1, T2, T3, T4, T5)) {
     return std::bind(f, GetAssembler(), _1, _2, _3, _4, _5);
   }
 
@@ -503,26 +567,26 @@
     DriverStr(oss.str(), test_name);
   }
 
-  template <typename... Args>
-  void T2Helper(void (arm::Arm32Assembler::*f)(Args...), bool without_pc, std::string fmt,
+  template <typename Assembler, typename... Args>
+  void T2Helper(void (Assembler::*f)(Args...), bool without_pc, std::string fmt,
                 std::string test_name, std::string filter = "") {
     GenericTemplateHelper(GetBoundFunction2(f), without_pc, fmt, test_name, filter);
   }
 
-  template <typename... Args>
-  void T3Helper(void (arm::Arm32Assembler::*f)(Args...), bool without_pc, std::string fmt,
+  template <typename Assembler, typename... Args>
+  void T3Helper(void (Assembler::*f)(Args...), bool without_pc, std::string fmt,
       std::string test_name, std::string filter = "") {
     GenericTemplateHelper(GetBoundFunction3(f), without_pc, fmt, test_name, filter);
   }
 
-  template <typename... Args>
-  void T4Helper(void (arm::Arm32Assembler::*f)(Args...), bool without_pc, std::string fmt,
+  template <typename Assembler, typename... Args>
+  void T4Helper(void (Assembler::*f)(Args...), bool without_pc, std::string fmt,
       std::string test_name, std::string filter = "") {
     GenericTemplateHelper(GetBoundFunction4(f), without_pc, fmt, test_name, filter);
   }
 
-  template <typename... Args>
-  void T5Helper(void (arm::Arm32Assembler::*f)(Args...), bool without_pc, std::string fmt,
+  template <typename Assembler, typename... Args>
+  void T5Helper(void (Assembler::*f)(Args...), bool without_pc, std::string fmt,
       std::string test_name, std::string filter = "") {
     GenericTemplateHelper(GetBoundFunction5(f), without_pc, fmt, test_name, filter);
   }
@@ -573,6 +637,7 @@
 
   std::vector<arm::Register*> registers_;
   std::vector<arm::Condition> conditions_;
+  std::vector<arm::SetCc> set_ccs_;
   std::vector<arm::ShifterOperand> shifter_operands_;
 };
 
@@ -656,15 +721,23 @@
 }
 
 TEST_F(AssemblerArm32Test, And) {
-  T4Helper(&arm::Arm32Assembler::and_, true, "and{cond} {reg1}, {reg2}, {shift}", "and");
+  T5Helper(&arm::Arm32Assembler::and_, true, "and{cond}{s} {reg1}, {reg2}, {shift}", "and");
+}
+
+TEST_F(AssemblerArm32Test, Ands) {
+  T4Helper(&arm::Arm32Assembler::ands, true, "and{cond}s {reg1}, {reg2}, {shift}", "ands");
 }
 
 TEST_F(AssemblerArm32Test, Eor) {
-  T4Helper(&arm::Arm32Assembler::eor, true, "eor{cond} {reg1}, {reg2}, {shift}", "eor");
+  T5Helper(&arm::Arm32Assembler::eor, true, "eor{cond}{s} {reg1}, {reg2}, {shift}", "eor");
+}
+
+TEST_F(AssemblerArm32Test, Eors) {
+  T4Helper(&arm::Arm32Assembler::eors, true, "eor{cond}s {reg1}, {reg2}, {shift}", "eors");
 }
 
 TEST_F(AssemblerArm32Test, Orr) {
-  T4Helper(&arm::Arm32Assembler::orr, true, "orr{cond} {reg1}, {reg2}, {shift}", "orr");
+  T5Helper(&arm::Arm32Assembler::orr, true, "orr{cond}{s} {reg1}, {reg2}, {shift}", "orr");
 }
 
 TEST_F(AssemblerArm32Test, Orrs) {
@@ -672,11 +745,15 @@
 }
 
 TEST_F(AssemblerArm32Test, Bic) {
-  T4Helper(&arm::Arm32Assembler::bic, true, "bic{cond} {reg1}, {reg2}, {shift}", "bic");
+  T5Helper(&arm::Arm32Assembler::bic, true, "bic{cond}{s} {reg1}, {reg2}, {shift}", "bic");
+}
+
+TEST_F(AssemblerArm32Test, Bics) {
+  T4Helper(&arm::Arm32Assembler::bics, true, "bic{cond}s {reg1}, {reg2}, {shift}", "bics");
 }
 
 TEST_F(AssemblerArm32Test, Mov) {
-  T3Helper(&arm::Arm32Assembler::mov, true, "mov{cond} {reg1}, {shift}", "mov");
+  T4Helper(&arm::Arm32Assembler::mov, true, "mov{cond}{s} {reg1}, {shift}", "mov");
 }
 
 TEST_F(AssemblerArm32Test, Movs) {
@@ -684,7 +761,7 @@
 }
 
 TEST_F(AssemblerArm32Test, Mvn) {
-  T3Helper(&arm::Arm32Assembler::mvn, true, "mvn{cond} {reg1}, {shift}", "mvn");
+  T4Helper(&arm::Arm32Assembler::mvn, true, "mvn{cond}{s} {reg1}, {shift}", "mvn");
 }
 
 TEST_F(AssemblerArm32Test, Mvns) {
@@ -692,7 +769,7 @@
 }
 
 TEST_F(AssemblerArm32Test, Add) {
-  T4Helper(&arm::Arm32Assembler::add, false, "add{cond} {reg1}, {reg2}, {shift}", "add");
+  T5Helper(&arm::Arm32Assembler::add, false, "add{cond}{s} {reg1}, {reg2}, {shift}", "add");
 }
 
 TEST_F(AssemblerArm32Test, Adds) {
@@ -700,11 +777,15 @@
 }
 
 TEST_F(AssemblerArm32Test, Adc) {
-  T4Helper(&arm::Arm32Assembler::adc, false, "adc{cond} {reg1}, {reg2}, {shift}", "adc");
+  T5Helper(&arm::Arm32Assembler::adc, false, "adc{cond}{s} {reg1}, {reg2}, {shift}", "adc");
+}
+
+TEST_F(AssemblerArm32Test, Adcs) {
+  T4Helper(&arm::Arm32Assembler::adcs, false, "adc{cond}s {reg1}, {reg2}, {shift}", "adcs");
 }
 
 TEST_F(AssemblerArm32Test, Sub) {
-  T4Helper(&arm::Arm32Assembler::sub, false, "sub{cond} {reg1}, {reg2}, {shift}", "sub");
+  T5Helper(&arm::Arm32Assembler::sub, false, "sub{cond}{s} {reg1}, {reg2}, {shift}", "sub");
 }
 
 TEST_F(AssemblerArm32Test, Subs) {
@@ -712,11 +793,15 @@
 }
 
 TEST_F(AssemblerArm32Test, Sbc) {
-  T4Helper(&arm::Arm32Assembler::sbc, false, "sbc{cond} {reg1}, {reg2}, {shift}", "sbc");
+  T5Helper(&arm::Arm32Assembler::sbc, false, "sbc{cond}{s} {reg1}, {reg2}, {shift}", "sbc");
+}
+
+TEST_F(AssemblerArm32Test, Sbcs) {
+  T4Helper(&arm::Arm32Assembler::sbcs, false, "sbc{cond}s {reg1}, {reg2}, {shift}", "sbcs");
 }
 
 TEST_F(AssemblerArm32Test, Rsb) {
-  T4Helper(&arm::Arm32Assembler::rsb, true, "rsb{cond} {reg1}, {reg2}, {shift}", "rsb");
+  T5Helper(&arm::Arm32Assembler::rsb, true, "rsb{cond}{s} {reg1}, {reg2}, {shift}", "rsb");
 }
 
 TEST_F(AssemblerArm32Test, Rsbs) {
@@ -724,7 +809,11 @@
 }
 
 TEST_F(AssemblerArm32Test, Rsc) {
-  T4Helper(&arm::Arm32Assembler::rsc, true, "rsc{cond} {reg1}, {reg2}, {shift}", "rsc");
+  T5Helper(&arm::Arm32Assembler::rsc, true, "rsc{cond}{s} {reg1}, {reg2}, {shift}", "rsc");
+}
+
+TEST_F(AssemblerArm32Test, Rscs) {
+  T4Helper(&arm::Arm32Assembler::rscs, false, "rsc{cond}s {reg1}, {reg2}, {shift}", "rscs");
 }
 
 /* TODO: Need better filter support.
diff --git a/compiler/utils/arm/assembler_arm_test.h b/compiler/utils/arm/assembler_arm_test.h
index 838abb6..a85a05e 100644
--- a/compiler/utils/arm/assembler_arm_test.h
+++ b/compiler/utils/arm/assembler_arm_test.h
@@ -21,7 +21,13 @@
 
 namespace art {
 
-template<typename Ass, typename Reg, typename FPReg, typename Imm, typename SOp, typename Cond>
+template<typename Ass,
+         typename Reg,
+         typename FPReg,
+         typename Imm,
+         typename SOp,
+         typename Cond,
+         typename SetCc>
 class AssemblerArmTest : public AssemblerTest<Ass, Reg, FPReg, Imm> {
  public:
   typedef AssemblerTest<Ass, Reg, FPReg, Imm> Base;
@@ -94,7 +100,7 @@
 
       size_t cond_index = after_cond.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
-        after_cond.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+        after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c));
       }
 
       for (Imm i : immediates1) {
@@ -185,7 +191,7 @@
 
       size_t cond_index = after_cond.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
-        after_cond.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+        after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c));
       }
 
       for (std::pair<Imm, Imm>& pair : immediates) {
@@ -271,7 +277,7 @@
 
       size_t cond_index = after_cond.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
-        after_cond.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+        after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c));
       }
 
       for (auto reg1 : reg1_registers) {
@@ -337,7 +343,7 @@
 
       size_t cond_index = after_cond.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
-        after_cond.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+        after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c));
       }
 
       for (auto reg1 : reg1_registers) {
@@ -401,7 +407,7 @@
 
       size_t cond_index = after_cond.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
-        after_cond.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+        after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c));
       }
 
       for (const SOp& shift : shifts) {
@@ -457,7 +463,7 @@
 
       size_t cond_index = after_cond.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
-        after_cond.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+        after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c));
       }
 
       for (const SOp& shift : shifts) {
@@ -511,6 +517,9 @@
   virtual std::vector<Cond>& GetConditions() = 0;
   virtual std::string GetConditionString(Cond c) = 0;
 
+  virtual std::vector<SetCc>& GetSetCcs() = 0;
+  virtual std::string GetSetCcString(SetCc s) = 0;
+
   virtual std::vector<SOp>& GetShiftOperands() = 0;
   virtual std::string GetShiftString(SOp sop) = 0;
 
@@ -534,6 +543,7 @@
   static constexpr const char* REG3_TOKEN = "{reg3}";
   static constexpr const char* REG4_TOKEN = "{reg4}";
   static constexpr const char* COND_TOKEN = "{cond}";
+  static constexpr const char* SET_CC_TOKEN = "{s}";
   static constexpr const char* SHIFT_TOKEN = "{shift}";
 
  private:
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 4e918e9..90ed10c 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -417,128 +417,96 @@
 }
 
 void Thumb2Assembler::and_(Register rd, Register rn, const ShifterOperand& so,
-                           Condition cond) {
-  EmitDataProcessing(cond, AND, 0, rn, rd, so);
+                           Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, AND, set_cc, rn, rd, so);
 }
 
 
 void Thumb2Assembler::eor(Register rd, Register rn, const ShifterOperand& so,
-                          Condition cond) {
-  EmitDataProcessing(cond, EOR, 0, rn, rd, so);
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, EOR, set_cc, rn, rd, so);
 }
 
 
 void Thumb2Assembler::sub(Register rd, Register rn, const ShifterOperand& so,
-                          Condition cond) {
-  EmitDataProcessing(cond, SUB, 0, rn, rd, so);
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, SUB, set_cc, rn, rd, so);
 }
 
 
 void Thumb2Assembler::rsb(Register rd, Register rn, const ShifterOperand& so,
-                          Condition cond) {
-  EmitDataProcessing(cond, RSB, 0, rn, rd, so);
-}
-
-
-void Thumb2Assembler::rsbs(Register rd, Register rn, const ShifterOperand& so,
-                           Condition cond) {
-  EmitDataProcessing(cond, RSB, 1, rn, rd, so);
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, RSB, set_cc, rn, rd, so);
 }
 
 
 void Thumb2Assembler::add(Register rd, Register rn, const ShifterOperand& so,
-                          Condition cond) {
-  EmitDataProcessing(cond, ADD, 0, rn, rd, so);
-}
-
-
-void Thumb2Assembler::adds(Register rd, Register rn, const ShifterOperand& so,
-                           Condition cond) {
-  EmitDataProcessing(cond, ADD, 1, rn, rd, so);
-}
-
-
-void Thumb2Assembler::subs(Register rd, Register rn, const ShifterOperand& so,
-                           Condition cond) {
-  EmitDataProcessing(cond, SUB, 1, rn, rd, so);
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, ADD, set_cc, rn, rd, so);
 }
 
 
 void Thumb2Assembler::adc(Register rd, Register rn, const ShifterOperand& so,
-                          Condition cond) {
-  EmitDataProcessing(cond, ADC, 0, rn, rd, so);
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, ADC, set_cc, rn, rd, so);
 }
 
 
 void Thumb2Assembler::sbc(Register rd, Register rn, const ShifterOperand& so,
-                          Condition cond) {
-  EmitDataProcessing(cond, SBC, 0, rn, rd, so);
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, SBC, set_cc, rn, rd, so);
 }
 
 
 void Thumb2Assembler::rsc(Register rd, Register rn, const ShifterOperand& so,
-                          Condition cond) {
-  EmitDataProcessing(cond, RSC, 0, rn, rd, so);
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, RSC, set_cc, rn, rd, so);
 }
 
 
 void Thumb2Assembler::tst(Register rn, const ShifterOperand& so, Condition cond) {
   CHECK_NE(rn, PC);  // Reserve tst pc instruction for exception handler marker.
-  EmitDataProcessing(cond, TST, 1, rn, R0, so);
+  EmitDataProcessing(cond, TST, kCcSet, rn, R0, so);
 }
 
 
 void Thumb2Assembler::teq(Register rn, const ShifterOperand& so, Condition cond) {
   CHECK_NE(rn, PC);  // Reserve teq pc instruction for exception handler marker.
-  EmitDataProcessing(cond, TEQ, 1, rn, R0, so);
+  EmitDataProcessing(cond, TEQ, kCcSet, rn, R0, so);
 }
 
 
 void Thumb2Assembler::cmp(Register rn, const ShifterOperand& so, Condition cond) {
-  EmitDataProcessing(cond, CMP, 1, rn, R0, so);
+  EmitDataProcessing(cond, CMP, kCcSet, rn, R0, so);
 }
 
 
 void Thumb2Assembler::cmn(Register rn, const ShifterOperand& so, Condition cond) {
-  EmitDataProcessing(cond, CMN, 1, rn, R0, so);
+  EmitDataProcessing(cond, CMN, kCcSet, rn, R0, so);
 }
 
 
-void Thumb2Assembler::orr(Register rd, Register rn,
-                          const ShifterOperand& so, Condition cond) {
-  EmitDataProcessing(cond, ORR, 0, rn, rd, so);
+void Thumb2Assembler::orr(Register rd, Register rn, const ShifterOperand& so,
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, ORR, set_cc, rn, rd, so);
 }
 
 
-void Thumb2Assembler::orrs(Register rd, Register rn,
-                           const ShifterOperand& so, Condition cond) {
-  EmitDataProcessing(cond, ORR, 1, rn, rd, so);
-}
-
-
-void Thumb2Assembler::mov(Register rd, const ShifterOperand& so, Condition cond) {
-  EmitDataProcessing(cond, MOV, 0, R0, rd, so);
-}
-
-
-void Thumb2Assembler::movs(Register rd, const ShifterOperand& so, Condition cond) {
-  EmitDataProcessing(cond, MOV, 1, R0, rd, so);
+void Thumb2Assembler::mov(Register rd, const ShifterOperand& so,
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, MOV, set_cc, R0, rd, so);
 }
 
 
 void Thumb2Assembler::bic(Register rd, Register rn, const ShifterOperand& so,
-                       Condition cond) {
-  EmitDataProcessing(cond, BIC, 0, rn, rd, so);
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, BIC, set_cc, rn, rd, so);
 }
 
 
-void Thumb2Assembler::mvn(Register rd, const ShifterOperand& so, Condition cond) {
-  EmitDataProcessing(cond, MVN, 0, R0, rd, so);
-}
-
-
-void Thumb2Assembler::mvns(Register rd, const ShifterOperand& so, Condition cond) {
-  EmitDataProcessing(cond, MVN, 1, R0, rd, so);
+void Thumb2Assembler::mvn(Register rd, const ShifterOperand& so,
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, MVN, set_cc, R0, rd, so);
 }
 
 
@@ -1054,7 +1022,7 @@
 
 
 void Thumb2Assembler::MarkExceptionHandler(Label* label) {
-  EmitDataProcessing(AL, TST, 1, PC, R0, ShifterOperand(0));
+  EmitDataProcessing(AL, TST, kCcSet, PC, R0, ShifterOperand(0));
   Label l;
   b(&l);
   EmitBranch(AL, label, false, false);
@@ -1075,9 +1043,9 @@
 }
 
 
-bool Thumb2Assembler::Is32BitDataProcessing(Condition cond ATTRIBUTE_UNUSED,
+bool Thumb2Assembler::Is32BitDataProcessing(Condition cond,
                                             Opcode opcode,
-                                            bool set_cc,
+                                            SetCc set_cc,
                                             Register rn,
                                             Register rd,
                                             const ShifterOperand& so) {
@@ -1086,7 +1054,7 @@
   }
 
   // Check special case for SP relative ADD and SUB immediate.
-  if ((opcode == ADD || opcode == SUB) && rn == SP && so.IsImmediate()) {
+  if ((opcode == ADD || opcode == SUB) && rn == SP && so.IsImmediate() && set_cc != kCcSet) {
     // If the immediate is in range, use 16 bit.
     if (rd == SP) {
       if (so.GetImmediate() < (1 << 9)) {    // 9 bit immediate.
@@ -1099,8 +1067,10 @@
     }
   }
 
-  bool can_contain_high_register = (opcode == MOV)
-      || ((opcode == ADD) && (rn == rd) && !set_cc);
+  bool can_contain_high_register =
+      (opcode == CMP) ||
+      (opcode == MOV && set_cc != kCcSet) ||
+      ((opcode == ADD) && (rn == rd) && set_cc != kCcSet);
 
   if (IsHighRegister(rd) || IsHighRegister(rn)) {
     if (!can_contain_high_register) {
@@ -1146,39 +1116,80 @@
   }
 
   if (so.IsImmediate()) {
-    if (rn_is_valid && rn != rd) {
-      // The only thumb1 instruction with a register and an immediate are ADD and SUB.  The
-      // immediate must be 3 bits.
-      if (opcode != ADD && opcode != SUB) {
+    if (opcode == RSB) {
+      DCHECK(rn_is_valid);
+      if (so.GetImmediate() != 0u) {
         return true;
-      } else {
-        // Check that the immediate is 3 bits for ADD and SUB.
-        if (so.GetImmediate() >= 8) {
+      }
+    } else if (rn_is_valid && rn != rd) {
+      // The only thumb1 instructions with a register and an immediate are ADD and SUB
+      // with a 3-bit immediate, and RSB with zero immediate.
+      if (opcode == ADD || opcode == SUB) {
+        if (!IsUint<3>(so.GetImmediate())) {
           return true;
         }
+      } else {
+        return true;
       }
     } else {
       // ADD, SUB, CMP and MOV may be thumb1 only if the immediate is 8 bits.
       if (!(opcode == ADD || opcode == SUB || opcode == MOV || opcode == CMP)) {
         return true;
       } else {
-        if (so.GetImmediate() > 255) {
+        if (!IsUint<8>(so.GetImmediate())) {
           return true;
         }
       }
     }
-  }
-
-  // Check for register shift operand.
-  if (so.IsRegister() && so.IsShift()) {
-    if (opcode != MOV) {
-      return true;
-    }
-    // Check for MOV with an ROR.
-    if (so.GetShift() == ROR) {
-      if (so.GetImmediate() != 0) {
+  } else {
+    DCHECK(so.IsRegister());
+    if (so.IsShift()) {
+      // Shift operand - check if it is a MOV convertible to a 16-bit shift instruction.
+      if (opcode != MOV) {
         return true;
       }
+      // Check for MOV with an ROR/RRX. There is no 16-bit ROR immediate and no 16-bit RRX.
+      if (so.GetShift() == ROR || so.GetShift() == RRX) {
+        return true;
+      }
+      // 16-bit shifts set condition codes if and only if outside IT block,
+      // i.e. if and only if cond == AL.
+      if ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet) {
+        return true;
+      }
+    } else {
+      // Register operand without shift.
+      switch (opcode) {
+        case ADD:
+          // The 16-bit ADD that cannot contain high registers can set condition codes
+          // if and only if outside IT block, i.e. if and only if cond == AL.
+          if (!can_contain_high_register &&
+              ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet)) {
+            return true;
+          }
+          break;
+        case AND:
+        case BIC:
+        case EOR:
+        case ORR:
+        case MVN:
+        case ADC:
+        case SUB:
+        case SBC:
+          // These 16-bit opcodes set condition codes if and only if outside IT block,
+          // i.e. if and only if cond == AL.
+          if ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet) {
+            return true;
+          }
+          break;
+        case RSB:
+        case RSC:
+          // No 16-bit RSB/RSC Rd, Rm, Rn. It would be equivalent to SUB/SBC Rd, Rn, Rm.
+          return true;
+        case CMP:
+        default:
+          break;
+      }
     }
   }
 
@@ -1189,7 +1200,7 @@
 
 void Thumb2Assembler::Emit32BitDataProcessing(Condition cond ATTRIBUTE_UNUSED,
                                               Opcode opcode,
-                                              bool set_cc,
+                                              SetCc set_cc,
                                               Register rn,
                                               Register rd,
                                               const ShifterOperand& so) {
@@ -1203,10 +1214,10 @@
     case ADC: thumb_opcode = 10U /* 0b1010 */; break;
     case SBC: thumb_opcode = 11U /* 0b1011 */; break;
     case RSC: break;
-    case TST: thumb_opcode =  0U /* 0b0000 */; set_cc = true; rd = PC; break;
-    case TEQ: thumb_opcode =  4U /* 0b0100 */; set_cc = true; rd = PC; break;
-    case CMP: thumb_opcode = 13U /* 0b1101 */; set_cc = true; rd = PC; break;
-    case CMN: thumb_opcode =  8U /* 0b1000 */; set_cc = true; rd = PC; break;
+    case TST: thumb_opcode =  0U /* 0b0000 */; DCHECK(set_cc == kCcSet); rd = PC; break;
+    case TEQ: thumb_opcode =  4U /* 0b0100 */; DCHECK(set_cc == kCcSet); rd = PC; break;
+    case CMP: thumb_opcode = 13U /* 0b1101 */; DCHECK(set_cc == kCcSet); rd = PC; break;
+    case CMN: thumb_opcode =  8U /* 0b1000 */; DCHECK(set_cc == kCcSet); rd = PC; break;
     case ORR: thumb_opcode =  2U /* 0b0010 */; break;
     case MOV: thumb_opcode =  2U /* 0b0010 */; rn = PC; break;
     case BIC: thumb_opcode =  1U /* 0b0001 */; break;
@@ -1224,7 +1235,7 @@
   if (so.IsImmediate()) {
     // Check special cases.
     if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12))) {
-      if (!set_cc) {
+      if (set_cc != kCcSet) {
         if (opcode == SUB) {
           thumb_opcode = 5U;
         } else if (opcode == ADD) {
@@ -1238,7 +1249,7 @@
       uint32_t imm8 = imm & 0xff;
 
       encoding = B31 | B30 | B29 | B28 |
-          (set_cc ? B20 : B25) |
+          (set_cc == kCcSet ? B20 : B25) |
           thumb_opcode << 21 |
           rn << 16 |
           rd << 8 |
@@ -1254,7 +1265,7 @@
       }
       encoding = B31 | B30 | B29 | B28 |
           thumb_opcode << 21 |
-          (set_cc ? B20 : 0) |
+          (set_cc == kCcSet ? B20 : 0) |
           rn << 16 |
           rd << 8 |
           imm;
@@ -1263,7 +1274,7 @@
     // Register (possibly shifted)
     encoding = B31 | B30 | B29 | B27 | B25 |
         thumb_opcode << 21 |
-        (set_cc ? B20 : 0) |
+        (set_cc == kCcSet ? B20 : 0) |
         rn << 16 |
         rd << 8 |
         so.encodingThumb();
@@ -1274,7 +1285,7 @@
 
 void Thumb2Assembler::Emit16BitDataProcessing(Condition cond,
                                               Opcode opcode,
-                                              bool set_cc,
+                                              SetCc set_cc,
                                               Register rn,
                                               Register rd,
                                               const ShifterOperand& so) {
@@ -1304,19 +1315,25 @@
     rn = so.GetRegister();
 
     switch (so.GetShift()) {
-    case LSL: thumb_opcode = 0U /* 0b00 */; break;
-    case LSR: thumb_opcode = 1U /* 0b01 */; break;
-    case ASR: thumb_opcode = 2U /* 0b10 */; break;
-    case ROR:
-      // ROR doesn't allow immediates.
-      thumb_opcode = 7U /* 0b111 */;
-      dp_opcode = 1U /* 0b01 */;
-      opcode_shift = 6;
-      use_immediate = false;
+    case LSL:
+      DCHECK_LE(immediate, 31u);
+      thumb_opcode = 0U /* 0b00 */;
       break;
-    case RRX: break;
+    case LSR:
+      DCHECK(1 <= immediate && immediate <= 32);
+      immediate &= 31;  // 32 is encoded as 0.
+      thumb_opcode = 1U /* 0b01 */;
+      break;
+    case ASR:
+      DCHECK(1 <= immediate && immediate <= 32);
+      immediate &= 31;  // 32 is encoded as 0.
+      thumb_opcode = 2U /* 0b10 */;
+      break;
+    case ROR:  // No 16-bit ROR immediate.
+    case RRX:  // No 16-bit RRX.
     default:
-     break;
+      LOG(FATAL) << "Unexpected shift: " << so.GetShift();
+      UNREACHABLE();
     }
   } else {
     if (so.IsImmediate()) {
@@ -1334,6 +1351,9 @@
         case ADC:
         case SBC:
         case BIC: {
+          // Sets condition codes if and only if outside IT block,
+          // check that it complies with set_cc.
+          DCHECK((cond == AL) ? set_cc != kCcKeep : set_cc != kCcSet);
           if (rn == rd) {
             rn = so.GetRegister();
           } else {
@@ -1348,9 +1368,17 @@
           rn = so.GetRegister();
           break;
         }
-        case TST:
-        case TEQ:
         case MVN: {
+          // Sets condition codes if and only if outside IT block,
+          // check that it complies with set_cc.
+          DCHECK((cond == AL) ? set_cc != kCcKeep : set_cc != kCcSet);
+          CHECK_EQ(rn, 0);
+          rn = so.GetRegister();
+          break;
+        }
+        case TST:
+        case TEQ: {
+          DCHECK(set_cc == kCcSet);
           CHECK_EQ(rn, 0);
           rn = so.GetRegister();
           break;
@@ -1371,6 +1399,7 @@
       case TST: thumb_opcode = 8U /* 0b1000 */; CHECK(!use_immediate); break;
       case MVN: thumb_opcode = 15U /* 0b1111 */; CHECK(!use_immediate); break;
       case CMP: {
+        DCHECK(set_cc == kCcSet);
         if (use_immediate) {
           // T2 encoding.
           dp_opcode = 0;
@@ -1378,6 +1407,13 @@
           thumb_opcode = 5U /* 0b101 */;
           rd_shift = 8;
           rn_shift = 8;
+        } else if (IsHighRegister(rd) || IsHighRegister(rn)) {
+          // Special cmp for high registers.
+          dp_opcode = 1U /* 0b01 */;
+          opcode_shift = 7;
+          // Put the top bit of rd into the bottom bit of the opcode.
+          thumb_opcode = 10U /* 0b0001010 */ | static_cast<uint32_t>(rd) >> 3;
+          rd = static_cast<Register>(static_cast<uint32_t>(rd) & 7U /* 0b111 */);
         } else {
           thumb_opcode = 10U /* 0b1010 */;
         }
@@ -1399,7 +1435,7 @@
           rn_shift = 8;
         } else {
           rn = so.GetRegister();
-          if (IsHighRegister(rn) || IsHighRegister(rd)) {
+          if (set_cc != kCcSet) {
             // Special mov for high registers.
             dp_opcode = 1U /* 0b01 */;
             opcode_shift = 7;
@@ -1407,6 +1443,8 @@
             thumb_opcode = 12U /* 0b0001100 */ | static_cast<uint32_t>(rd) >> 3;
             rd = static_cast<Register>(static_cast<uint32_t>(rd) & 7U /* 0b111 */);
           } else {
+            DCHECK(!IsHighRegister(rn));
+            DCHECK(!IsHighRegister(rd));
             thumb_opcode = 0;
           }
         }
@@ -1436,9 +1474,9 @@
 
 
 // ADD and SUB are complex enough to warrant their own emitter.
-void Thumb2Assembler::Emit16BitAddSub(Condition cond ATTRIBUTE_UNUSED,
+void Thumb2Assembler::Emit16BitAddSub(Condition cond,
                                       Opcode opcode,
-                                      bool set_cc,
+                                      SetCc set_cc,
                                       Register rn,
                                       Register rd,
                                       const ShifterOperand& so) {
@@ -1449,7 +1487,7 @@
   uint8_t immediate_shift = 0;
   bool use_immediate = false;
   uint32_t immediate = 0;  // Should be at most 9 bits but keep the full immediate for CHECKs.
-  uint8_t thumb_opcode;;
+  uint8_t thumb_opcode;
 
   if (so.IsImmediate()) {
     use_immediate = true;
@@ -1460,7 +1498,7 @@
     case ADD:
       if (so.IsRegister()) {
         Register rm = so.GetRegister();
-        if (rn == rd && !set_cc) {
+        if (rn == rd && set_cc != kCcSet) {
           // Can use T2 encoding (allows 4 bit registers)
           dp_opcode = 1U /* 0b01 */;
           opcode_shift = 10;
@@ -1471,6 +1509,12 @@
           rd = static_cast<Register>(static_cast<uint32_t>(rd) & 7U /* 0b111 */);
         } else {
           // T1.
+          DCHECK(!IsHighRegister(rd));
+          DCHECK(!IsHighRegister(rn));
+          DCHECK(!IsHighRegister(rm));
+          // Sets condition codes if and only if outside IT block,
+          // check that it complies with set_cc.
+          DCHECK((cond == AL) ? set_cc != kCcKeep : set_cc != kCcSet);
           opcode_shift = 9;
           thumb_opcode = 12U /* 0b01100 */;
           immediate = static_cast<uint32_t>(so.GetRegister());
@@ -1523,40 +1567,47 @@
 
     case SUB:
       if (so.IsRegister()) {
-         // T1.
-         opcode_shift = 9;
-         thumb_opcode = 13U /* 0b01101 */;
-         immediate = static_cast<uint32_t>(so.GetRegister());
-         use_immediate = true;
-         immediate_shift = 6;
-       } else {
-         if (rd == SP && rn == SP) {
-           // SUB sp, sp, #imm
-           dp_opcode = 2U /* 0b10 */;
-           thumb_opcode = 0x61 /* 0b1100001 */;
-           opcode_shift = 7;
-           CHECK_LT(immediate, (1u << 9));
-           CHECK_ALIGNED(immediate, 4);
+        // T1.
+        Register rm = so.GetRegister();
+        DCHECK(!IsHighRegister(rd));
+        DCHECK(!IsHighRegister(rn));
+        DCHECK(!IsHighRegister(rm));
+        // Sets condition codes if and only if outside IT block,
+        // check that it complies with set_cc.
+        DCHECK((cond == AL) ? set_cc != kCcKeep : set_cc != kCcSet);
+        opcode_shift = 9;
+        thumb_opcode = 13U /* 0b01101 */;
+        immediate = static_cast<uint32_t>(rm);
+        use_immediate = true;
+        immediate_shift = 6;
+      } else {
+        if (rd == SP && rn == SP) {
+          // SUB sp, sp, #imm
+          dp_opcode = 2U /* 0b10 */;
+          thumb_opcode = 0x61 /* 0b1100001 */;
+          opcode_shift = 7;
+          CHECK_LT(immediate, (1u << 9));
+          CHECK_ALIGNED(immediate, 4);
 
-           // Remove rd and rn from instruction by orring it with immed and clearing bits.
-           rn = R0;
-           rd = R0;
-           rd_shift = 0;
-           rn_shift = 0;
-           immediate >>= 2;
-         } else if (rn != rd) {
-           // Must use T1.
-           opcode_shift = 9;
-           thumb_opcode = 15U /* 0b01111 */;
-           immediate_shift = 6;
-         } else {
-           // T2 encoding.
-           opcode_shift = 11;
-           thumb_opcode = 7U /* 0b111 */;
-           rd_shift = 8;
-           rn_shift = 8;
-         }
-       }
+          // Remove rd and rn from instruction by orring it with immed and clearing bits.
+          rn = R0;
+          rd = R0;
+          rd_shift = 0;
+          rn_shift = 0;
+          immediate >>= 2;
+        } else if (rn != rd) {
+          // Must use T1.
+          opcode_shift = 9;
+          thumb_opcode = 15U /* 0b01111 */;
+          immediate_shift = 6;
+        } else {
+          // T2 encoding.
+          opcode_shift = 11;
+          thumb_opcode = 7U /* 0b111 */;
+          rd_shift = 8;
+          rn_shift = 8;
+        }
+      }
       break;
     default:
       LOG(FATAL) << "This opcode is not an ADD or SUB: " << opcode;
@@ -1575,7 +1626,7 @@
 
 void Thumb2Assembler::EmitDataProcessing(Condition cond,
                                          Opcode opcode,
-                                         bool set_cc,
+                                         SetCc set_cc,
                                          Register rn,
                                          Register rd,
                                          const ShifterOperand& so) {
@@ -1589,9 +1640,15 @@
   }
 }
 
-void Thumb2Assembler::EmitShift(Register rd, Register rm, Shift shift, uint8_t amount, bool setcc) {
+void Thumb2Assembler::EmitShift(Register rd,
+                                Register rm,
+                                Shift shift,
+                                uint8_t amount,
+                                Condition cond,
+                                SetCc set_cc) {
   CHECK_LT(amount, (1 << 5));
-  if (IsHighRegister(rd) || IsHighRegister(rm) || shift == ROR || shift == RRX) {
+  if ((IsHighRegister(rd) || IsHighRegister(rm) || shift == ROR || shift == RRX) ||
+      ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet)) {
     uint16_t opcode = 0;
     switch (shift) {
       case LSL: opcode = 0U /* 0b00 */; break;
@@ -1605,7 +1662,7 @@
     }
     // 32 bit.
     int32_t encoding = B31 | B30 | B29 | B27 | B25 | B22 |
-        0xf << 16 | (setcc ? B20 : 0);
+        0xf << 16 | (set_cc == kCcSet ? B20 : 0);
     uint32_t imm3 = amount >> 2;
     uint32_t imm2 = amount & 3U /* 0b11 */;
     encoding |= imm3 << 12 | imm2 << 6 | static_cast<int16_t>(rm) |
@@ -1628,10 +1685,16 @@
   }
 }
 
-void Thumb2Assembler::EmitShift(Register rd, Register rn, Shift shift, Register rm, bool setcc) {
+void Thumb2Assembler::EmitShift(Register rd,
+                                Register rn,
+                                Shift shift,
+                                Register rm,
+                                Condition cond,
+                                SetCc set_cc) {
   CHECK_NE(shift, RRX);
   bool must_be_32bit = false;
-  if (IsHighRegister(rd) || IsHighRegister(rm) || IsHighRegister(rn) || rd != rn) {
+  if (IsHighRegister(rd) || IsHighRegister(rm) || IsHighRegister(rn) || rd != rn ||
+      ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet)) {
     must_be_32bit = true;
   }
 
@@ -1648,7 +1711,7 @@
      }
      // 32 bit.
      int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 |
-         0xf << 12 | (setcc ? B20 : 0);
+         0xf << 12 | (set_cc == kCcSet ? B20 : 0);
      encoding |= static_cast<int16_t>(rn) << 16 | static_cast<int16_t>(rm) |
          static_cast<int16_t>(rd) << 8 | opcode << 21;
      Emit32(encoding);
@@ -1658,6 +1721,7 @@
       case LSL: opcode = 2U /* 0b0010 */; break;
       case LSR: opcode = 3U /* 0b0011 */; break;
       case ASR: opcode = 4U /* 0b0100 */; break;
+      case ROR: opcode = 7U /* 0b0111 */; break;
       default:
         LOG(FATAL) << "Unsupported thumb2 shift opcode";
         UNREACHABLE();
@@ -2915,70 +2979,70 @@
 
 
 void Thumb2Assembler::Lsl(Register rd, Register rm, uint32_t shift_imm,
-                          bool setcc, Condition cond) {
+                          Condition cond, SetCc set_cc) {
   CHECK_LE(shift_imm, 31u);
   CheckCondition(cond);
-  EmitShift(rd, rm, LSL, shift_imm, setcc);
+  EmitShift(rd, rm, LSL, shift_imm, cond, set_cc);
 }
 
 
 void Thumb2Assembler::Lsr(Register rd, Register rm, uint32_t shift_imm,
-                          bool setcc, Condition cond) {
+                          Condition cond, SetCc set_cc) {
   CHECK(1u <= shift_imm && shift_imm <= 32u);
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
   CheckCondition(cond);
-  EmitShift(rd, rm, LSR, shift_imm, setcc);
+  EmitShift(rd, rm, LSR, shift_imm, cond, set_cc);
 }
 
 
 void Thumb2Assembler::Asr(Register rd, Register rm, uint32_t shift_imm,
-                          bool setcc, Condition cond) {
+                          Condition cond, SetCc set_cc) {
   CHECK(1u <= shift_imm && shift_imm <= 32u);
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
   CheckCondition(cond);
-  EmitShift(rd, rm, ASR, shift_imm, setcc);
+  EmitShift(rd, rm, ASR, shift_imm, cond, set_cc);
 }
 
 
 void Thumb2Assembler::Ror(Register rd, Register rm, uint32_t shift_imm,
-                          bool setcc, Condition cond) {
+                          Condition cond, SetCc set_cc) {
   CHECK(1u <= shift_imm && shift_imm <= 31u);
   CheckCondition(cond);
-  EmitShift(rd, rm, ROR, shift_imm, setcc);
+  EmitShift(rd, rm, ROR, shift_imm, cond, set_cc);
 }
 
 
-void Thumb2Assembler::Rrx(Register rd, Register rm, bool setcc, Condition cond) {
+void Thumb2Assembler::Rrx(Register rd, Register rm, Condition cond, SetCc set_cc) {
   CheckCondition(cond);
-  EmitShift(rd, rm, RRX, rm, setcc);
+  EmitShift(rd, rm, RRX, rm, cond, set_cc);
 }
 
 
 void Thumb2Assembler::Lsl(Register rd, Register rm, Register rn,
-                          bool setcc, Condition cond) {
+                          Condition cond, SetCc set_cc) {
   CheckCondition(cond);
-  EmitShift(rd, rm, LSL, rn, setcc);
+  EmitShift(rd, rm, LSL, rn, cond, set_cc);
 }
 
 
 void Thumb2Assembler::Lsr(Register rd, Register rm, Register rn,
-                          bool setcc, Condition cond) {
+                          Condition cond, SetCc set_cc) {
   CheckCondition(cond);
-  EmitShift(rd, rm, LSR, rn, setcc);
+  EmitShift(rd, rm, LSR, rn, cond, set_cc);
 }
 
 
 void Thumb2Assembler::Asr(Register rd, Register rm, Register rn,
-                          bool setcc, Condition cond) {
+                          Condition cond, SetCc set_cc) {
   CheckCondition(cond);
-  EmitShift(rd, rm, ASR, rn, setcc);
+  EmitShift(rd, rm, ASR, rn, cond, set_cc);
 }
 
 
 void Thumb2Assembler::Ror(Register rd, Register rm, Register rn,
-                          bool setcc, Condition cond) {
+                          Condition cond, SetCc set_cc) {
   CheckCondition(cond);
-  EmitShift(rd, rm, ROR, rn, setcc);
+  EmitShift(rd, rm, ROR, rn, cond, set_cc);
 }
 
 
@@ -3173,24 +3237,24 @@
                                           Condition cond) {
   ShifterOperand shifter_op;
   if (ShifterOperandCanHold(rd, rn, ADD, value, &shifter_op)) {
-    adds(rd, rn, shifter_op, cond);
+    add(rd, rn, shifter_op, cond, kCcSet);
   } else if (ShifterOperandCanHold(rd, rn, ADD, -value, &shifter_op)) {
-    subs(rd, rn, shifter_op, cond);
+    sub(rd, rn, shifter_op, cond, kCcSet);
   } else {
     CHECK(rn != IP);
     if (ShifterOperandCanHold(rd, rn, MVN, ~value, &shifter_op)) {
       mvn(IP, shifter_op, cond);
-      adds(rd, rn, ShifterOperand(IP), cond);
+      add(rd, rn, ShifterOperand(IP), cond, kCcSet);
     } else if (ShifterOperandCanHold(rd, rn, MVN, ~(-value), &shifter_op)) {
       mvn(IP, shifter_op, cond);
-      subs(rd, rn, ShifterOperand(IP), cond);
+      sub(rd, rn, ShifterOperand(IP), cond, kCcSet);
     } else {
       movw(IP, Low16Bits(value), cond);
       uint16_t value_high = High16Bits(value);
       if (value_high != 0) {
         movt(IP, value_high, cond);
       }
-      adds(rd, rn, ShifterOperand(IP), cond);
+      add(rd, rn, ShifterOperand(IP), cond, kCcSet);
     }
   }
 }
@@ -3316,7 +3380,7 @@
       }
     }
     LoadImmediate(tmp_reg, offset, cond);
-    add(tmp_reg, tmp_reg, ShifterOperand(base), cond);
+    add(tmp_reg, tmp_reg, ShifterOperand(base), AL);
     base = tmp_reg;
     offset = 0;
   }
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 41eb5d3..c802c27 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -63,25 +63,29 @@
   void FinalizeCode() OVERRIDE;
 
   // Data-processing instructions.
-  void and_(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void and_(Register rd, Register rn, const ShifterOperand& so,
+                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void eor(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void eor(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void sub(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-  void subs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void sub(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void rsb(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-  void rsbs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void rsb(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void add(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void add(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void adds(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void adc(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void adc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void sbc(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void sbc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-
-  void rsc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void rsc(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
   void tst(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
 
@@ -91,16 +95,17 @@
 
   void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
 
-  void orr(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-  void orrs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void orr(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void mov(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-  void movs(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void mov(Register rd, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void bic(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void bic(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void mvn(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-  void mvns(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void mvn(Register rd, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
   // Miscellaneous data-processing instructions.
   void clz(Register rd, Register rm, Condition cond = AL) OVERRIDE;
@@ -245,25 +250,25 @@
   void blx(Register rm, Condition cond = AL) OVERRIDE;
   void bx(Register rm, Condition cond = AL) OVERRIDE;
 
-  void Lsl(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Lsr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Asr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Ror(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Rrx(Register rd, Register rm, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
+  virtual void Lsl(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Lsr(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Asr(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Ror(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Rrx(Register rd, Register rm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void Lsl(Register rd, Register rm, Register rn, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Lsr(Register rd, Register rm, Register rn, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Asr(Register rd, Register rm, Register rn, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Ror(Register rd, Register rm, Register rn, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
+  virtual void Lsl(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Lsr(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Asr(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Ror(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
   void Push(Register rd, Condition cond = AL) OVERRIDE;
   void Pop(Register rd, Condition cond = AL) OVERRIDE;
@@ -600,7 +605,7 @@
   // Emit a single 32 or 16 bit data processing instruction.
   void EmitDataProcessing(Condition cond,
                           Opcode opcode,
-                          bool set_cc,
+                          SetCc set_cc,
                           Register rn,
                           Register rd,
                           const ShifterOperand& so);
@@ -609,7 +614,7 @@
   // in 16 bits?
   bool Is32BitDataProcessing(Condition cond,
                              Opcode opcode,
-                             bool set_cc,
+                             SetCc set_cc,
                              Register rn,
                              Register rd,
                              const ShifterOperand& so);
@@ -617,7 +622,7 @@
   // Emit a 32 bit data processing instruction.
   void Emit32BitDataProcessing(Condition cond,
                                Opcode opcode,
-                               bool set_cc,
+                               SetCc set_cc,
                                Register rn,
                                Register rd,
                                const ShifterOperand& so);
@@ -625,14 +630,14 @@
   // Emit a 16 bit data processing instruction.
   void Emit16BitDataProcessing(Condition cond,
                                Opcode opcode,
-                               bool set_cc,
+                               SetCc set_cc,
                                Register rn,
                                Register rd,
                                const ShifterOperand& so);
 
   void Emit16BitAddSub(Condition cond,
                        Opcode opcode,
-                       bool set_cc,
+                       SetCc set_cc,
                        Register rn,
                        Register rd,
                        const ShifterOperand& so);
@@ -694,8 +699,10 @@
   static int DecodeBranchOffset(int32_t inst);
   int32_t EncodeTstOffset(int offset, int32_t inst);
   int DecodeTstOffset(int32_t inst);
-  void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount, bool setcc = false);
-  void EmitShift(Register rd, Register rn, Shift shift, Register rm, bool setcc = false);
+  void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount,
+                 Condition cond = AL, SetCc set_cc = kCcDontCare);
+  void EmitShift(Register rd, Register rn, Shift shift, Register rm,
+                 Condition cond = AL, SetCc set_cc = kCcDontCare);
 
   // Whether the assembler can relocate branches. If false, unresolved branches will be
   // emitted on 32bits.
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index cb01cea..b2a354b 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -199,6 +199,7 @@
 TEST(Thumb2AssemblerTest, SimpleMov) {
   arm::Thumb2Assembler assembler;
 
+  __ movs(R0, ShifterOperand(R1));
   __ mov(R0, ShifterOperand(R1));
   __ mov(R8, ShifterOperand(R9));
 
@@ -222,8 +223,8 @@
   arm::Thumb2Assembler assembler;
 
   __ mov(R0, ShifterOperand(R1));
-  __ add(R0, R1, ShifterOperand(R2));
-  __ add(R0, R1, ShifterOperand());
+  __ adds(R0, R1, ShifterOperand(R2));
+  __ add(R0, R1, ShifterOperand(0));
 
   EmitAndCheck(&assembler, "SimpleMovAdd");
 }
@@ -231,41 +232,132 @@
 TEST(Thumb2AssemblerTest, DataProcessingRegister) {
   arm::Thumb2Assembler assembler;
 
+  // 32 bit variants using low registers.
+  __ mvn(R0, ShifterOperand(R1), AL, kCcKeep);
+  __ add(R0, R1, ShifterOperand(R2), AL, kCcKeep);
+  __ sub(R0, R1, ShifterOperand(R2), AL, kCcKeep);
+  __ and_(R0, R1, ShifterOperand(R2), AL, kCcKeep);
+  __ orr(R0, R1, ShifterOperand(R2), AL, kCcKeep);
+  __ eor(R0, R1, ShifterOperand(R2), AL, kCcKeep);
+  __ bic(R0, R1, ShifterOperand(R2), AL, kCcKeep);
+  __ adc(R0, R1, ShifterOperand(R2), AL, kCcKeep);
+  __ sbc(R0, R1, ShifterOperand(R2), AL, kCcKeep);
+  __ rsb(R0, R1, ShifterOperand(R2), AL, kCcKeep);
+  __ teq(R0, ShifterOperand(R1));
+
+  // 16 bit variants using low registers.
+  __ movs(R0, ShifterOperand(R1));
+  __ mov(R0, ShifterOperand(R1), AL, kCcKeep);
+  __ mvns(R0, ShifterOperand(R1));
+  __ add(R0, R0, ShifterOperand(R1), AL, kCcKeep);
+  __ adds(R0, R1, ShifterOperand(R2));
+  __ subs(R0, R1, ShifterOperand(R2));
+  __ adcs(R0, R0, ShifterOperand(R1));
+  __ sbcs(R0, R0, ShifterOperand(R1));
+  __ ands(R0, R0, ShifterOperand(R1));
+  __ orrs(R0, R0, ShifterOperand(R1));
+  __ eors(R0, R0, ShifterOperand(R1));
+  __ bics(R0, R0, ShifterOperand(R1));
+  __ tst(R0, ShifterOperand(R1));
+  __ cmp(R0, ShifterOperand(R1));
+  __ cmn(R0, ShifterOperand(R1));
+
+  // 16-bit variants using high registers.
+  __ mov(R1, ShifterOperand(R8), AL, kCcKeep);
+  __ mov(R9, ShifterOperand(R0), AL, kCcKeep);
+  __ mov(R8, ShifterOperand(R9), AL, kCcKeep);
+  __ add(R1, R1, ShifterOperand(R8), AL, kCcKeep);
+  __ add(R9, R9, ShifterOperand(R0), AL, kCcKeep);
+  __ add(R8, R8, ShifterOperand(R9), AL, kCcKeep);
+  __ cmp(R0, ShifterOperand(R9));
+  __ cmp(R8, ShifterOperand(R1));
+  __ cmp(R9, ShifterOperand(R8));
+
+  // The 16-bit RSBS Rd, Rn, #0, also known as NEGS Rd, Rn is specified using
+  // an immediate (0) but emitted without any, so we test it here.
+  __ rsbs(R0, R1, ShifterOperand(0));
+  __ rsbs(R0, R0, ShifterOperand(0));  // Check Rd == Rn code path.
+
+  // 32 bit variants using high registers that would be 16-bit if using low registers.
+  __ movs(R0, ShifterOperand(R8));
+  __ mvns(R0, ShifterOperand(R8));
+  __ add(R0, R1, ShifterOperand(R8), AL, kCcKeep);
+  __ adds(R0, R1, ShifterOperand(R8));
+  __ subs(R0, R1, ShifterOperand(R8));
+  __ adcs(R0, R0, ShifterOperand(R8));
+  __ sbcs(R0, R0, ShifterOperand(R8));
+  __ ands(R0, R0, ShifterOperand(R8));
+  __ orrs(R0, R0, ShifterOperand(R8));
+  __ eors(R0, R0, ShifterOperand(R8));
+  __ bics(R0, R0, ShifterOperand(R8));
+  __ tst(R0, ShifterOperand(R8));
+  __ cmn(R0, ShifterOperand(R8));
+  __ rsbs(R0, R8, ShifterOperand(0));  // Check that this is not emitted as 16-bit.
+  __ rsbs(R8, R8, ShifterOperand(0));  // Check that this is not emitted as 16-bit (Rd == Rn).
+
+  // 32-bit variants of instructions that would be 16-bit outside IT block.
+  __ it(arm::EQ);
+  __ mvns(R0, ShifterOperand(R1), arm::EQ);
+  __ it(arm::EQ);
+  __ adds(R0, R1, ShifterOperand(R2), arm::EQ);
+  __ it(arm::EQ);
+  __ subs(R0, R1, ShifterOperand(R2), arm::EQ);
+  __ it(arm::EQ);
+  __ adcs(R0, R0, ShifterOperand(R1), arm::EQ);
+  __ it(arm::EQ);
+  __ sbcs(R0, R0, ShifterOperand(R1), arm::EQ);
+  __ it(arm::EQ);
+  __ ands(R0, R0, ShifterOperand(R1), arm::EQ);
+  __ it(arm::EQ);
+  __ orrs(R0, R0, ShifterOperand(R1), arm::EQ);
+  __ it(arm::EQ);
+  __ eors(R0, R0, ShifterOperand(R1), arm::EQ);
+  __ it(arm::EQ);
+  __ bics(R0, R0, ShifterOperand(R1), arm::EQ);
+
+  // 16-bit variants of instructions that would be 32-bit outside IT block.
+  __ it(arm::EQ);
+  __ mvn(R0, ShifterOperand(R1), arm::EQ, kCcKeep);
+  __ it(arm::EQ);
+  __ add(R0, R1, ShifterOperand(R2), arm::EQ, kCcKeep);
+  __ it(arm::EQ);
+  __ sub(R0, R1, ShifterOperand(R2), arm::EQ, kCcKeep);
+  __ it(arm::EQ);
+  __ adc(R0, R0, ShifterOperand(R1), arm::EQ, kCcKeep);
+  __ it(arm::EQ);
+  __ sbc(R0, R0, ShifterOperand(R1), arm::EQ, kCcKeep);
+  __ it(arm::EQ);
+  __ and_(R0, R0, ShifterOperand(R1), arm::EQ, kCcKeep);
+  __ it(arm::EQ);
+  __ orr(R0, R0, ShifterOperand(R1), arm::EQ, kCcKeep);
+  __ it(arm::EQ);
+  __ eor(R0, R0, ShifterOperand(R1), arm::EQ, kCcKeep);
+  __ it(arm::EQ);
+  __ bic(R0, R0, ShifterOperand(R1), arm::EQ, kCcKeep);
+
+  // 16 bit variants selected for the default kCcDontCare.
   __ mov(R0, ShifterOperand(R1));
   __ mvn(R0, ShifterOperand(R1));
-
-  // 32 bit variants.
+  __ add(R0, R0, ShifterOperand(R1));
   __ add(R0, R1, ShifterOperand(R2));
   __ sub(R0, R1, ShifterOperand(R2));
-  __ and_(R0, R1, ShifterOperand(R2));
-  __ orr(R0, R1, ShifterOperand(R2));
-  __ eor(R0, R1, ShifterOperand(R2));
-  __ bic(R0, R1, ShifterOperand(R2));
-  __ adc(R0, R1, ShifterOperand(R2));
-  __ sbc(R0, R1, ShifterOperand(R2));
-  __ rsb(R0, R1, ShifterOperand(R2));
-
-  // 16 bit variants.
-  __ add(R0, R1, ShifterOperand());
-  __ sub(R0, R1, ShifterOperand());
+  __ adc(R0, R0, ShifterOperand(R1));
+  __ sbc(R0, R0, ShifterOperand(R1));
   __ and_(R0, R0, ShifterOperand(R1));
   __ orr(R0, R0, ShifterOperand(R1));
   __ eor(R0, R0, ShifterOperand(R1));
   __ bic(R0, R0, ShifterOperand(R1));
-  __ adc(R0, R0, ShifterOperand(R1));
-  __ sbc(R0, R0, ShifterOperand(R1));
-  __ rsb(R0, R0, ShifterOperand(R1));
+  __ mov(R1, ShifterOperand(R8));
+  __ mov(R9, ShifterOperand(R0));
+  __ mov(R8, ShifterOperand(R9));
+  __ add(R1, R1, ShifterOperand(R8));
+  __ add(R9, R9, ShifterOperand(R0));
+  __ add(R8, R8, ShifterOperand(R9));
+  __ rsb(R0, R1, ShifterOperand(0));
+  __ rsb(R0, R0, ShifterOperand(0));
 
-  __ tst(R0, ShifterOperand(R1));
-  __ teq(R0, ShifterOperand(R1));
-  __ cmp(R0, ShifterOperand(R1));
-  __ cmn(R0, ShifterOperand(R1));
-
-  __ movs(R0, ShifterOperand(R1));
-  __ mvns(R0, ShifterOperand(R1));
-
-  // 32 bit variants.
-  __ add(R12, R1, ShifterOperand(R0));
+  // And an arbitrary 32-bit instruction using IP.
+  __ add(R12, R1, ShifterOperand(R0), AL, kCcKeep);
 
   EmitAndCheck(&assembler, "DataProcessingRegister");
 }
@@ -296,6 +388,9 @@
   __ movs(R0, ShifterOperand(0x55));
   __ mvns(R0, ShifterOperand(0x55));
 
+  __ adds(R0, R1, ShifterOperand(5));
+  __ subs(R0, R1, ShifterOperand(5));
+
   EmitAndCheck(&assembler, "DataProcessingImmediate");
 }
 
@@ -340,18 +435,30 @@
 TEST(Thumb2AssemblerTest, DataProcessingShiftedRegister) {
   arm::Thumb2Assembler assembler;
 
-  __ mov(R3, ShifterOperand(R4, LSL, 4));
-  __ mov(R3, ShifterOperand(R4, LSR, 5));
-  __ mov(R3, ShifterOperand(R4, ASR, 6));
-  __ mov(R3, ShifterOperand(R4, ROR, 7));
-  __ mov(R3, ShifterOperand(R4, ROR));
+  // 16-bit variants.
+  __ movs(R3, ShifterOperand(R4, LSL, 4));
+  __ movs(R3, ShifterOperand(R4, LSR, 5));
+  __ movs(R3, ShifterOperand(R4, ASR, 6));
 
-  // 32 bit variants.
-  __ mov(R8, ShifterOperand(R4, LSL, 4));
-  __ mov(R8, ShifterOperand(R4, LSR, 5));
-  __ mov(R8, ShifterOperand(R4, ASR, 6));
-  __ mov(R8, ShifterOperand(R4, ROR, 7));
-  __ mov(R8, ShifterOperand(R4, RRX));
+  // 32-bit ROR because ROR immediate doesn't have the same 16-bit version as other shifts.
+  __ movs(R3, ShifterOperand(R4, ROR, 7));
+
+  // 32-bit RRX because RRX has no 16-bit version.
+  __ movs(R3, ShifterOperand(R4, RRX));
+
+  // 32 bit variants (not setting condition codes).
+  __ mov(R3, ShifterOperand(R4, LSL, 4), AL, kCcKeep);
+  __ mov(R3, ShifterOperand(R4, LSR, 5), AL, kCcKeep);
+  __ mov(R3, ShifterOperand(R4, ASR, 6), AL, kCcKeep);
+  __ mov(R3, ShifterOperand(R4, ROR, 7), AL, kCcKeep);
+  __ mov(R3, ShifterOperand(R4, RRX), AL, kCcKeep);
+
+  // 32 bit variants (high registers).
+  __ movs(R8, ShifterOperand(R4, LSL, 4));
+  __ movs(R8, ShifterOperand(R4, LSR, 5));
+  __ movs(R8, ShifterOperand(R4, ASR, 6));
+  __ movs(R8, ShifterOperand(R4, ROR, 7));
+  __ movs(R8, ShifterOperand(R4, RRX));
 
   EmitAndCheck(&assembler, "DataProcessingShiftedRegister");
 }
@@ -1023,7 +1130,7 @@
 TEST(Thumb2AssemblerTest, Shifts) {
   arm::Thumb2Assembler assembler;
 
-  // 16 bit
+  // 16 bit selected for CcDontCare.
   __ Lsl(R0, R1, 5);
   __ Lsr(R0, R1, 5);
   __ Asr(R0, R1, 5);
@@ -1031,6 +1138,32 @@
   __ Lsl(R0, R0, R1);
   __ Lsr(R0, R0, R1);
   __ Asr(R0, R0, R1);
+  __ Ror(R0, R0, R1);
+
+  // 16 bit with kCcSet.
+  __ Lsls(R0, R1, 5);
+  __ Lsrs(R0, R1, 5);
+  __ Asrs(R0, R1, 5);
+
+  __ Lsls(R0, R0, R1);
+  __ Lsrs(R0, R0, R1);
+  __ Asrs(R0, R0, R1);
+  __ Rors(R0, R0, R1);
+
+  // 32-bit with kCcKeep.
+  __ Lsl(R0, R1, 5, AL, kCcKeep);
+  __ Lsr(R0, R1, 5, AL, kCcKeep);
+  __ Asr(R0, R1, 5, AL, kCcKeep);
+
+  __ Lsl(R0, R0, R1, AL, kCcKeep);
+  __ Lsr(R0, R0, R1, AL, kCcKeep);
+  __ Asr(R0, R0, R1, AL, kCcKeep);
+  __ Ror(R0, R0, R1, AL, kCcKeep);
+
+  // 32-bit because ROR immediate doesn't have a 16-bit version like the other shifts.
+  __ Ror(R0, R1, 5);
+  __ Rors(R0, R1, 5);
+  __ Ror(R0, R1, 5, AL, kCcKeep);
 
   // 32 bit due to high registers.
   __ Lsl(R8, R1, 5);
@@ -1052,21 +1185,21 @@
   // S bit (all 32 bit)
 
   // 32 bit due to high registers.
-  __ Lsl(R8, R1, 5, true);
-  __ Lsr(R0, R8, 5, true);
-  __ Asr(R8, R1, 5, true);
-  __ Ror(R0, R8, 5, true);
+  __ Lsls(R8, R1, 5);
+  __ Lsrs(R0, R8, 5);
+  __ Asrs(R8, R1, 5);
+  __ Rors(R0, R8, 5);
 
   // 32 bit due to different Rd and Rn.
-  __ Lsl(R0, R1, R2, true);
-  __ Lsr(R0, R1, R2, true);
-  __ Asr(R0, R1, R2, true);
-  __ Ror(R0, R1, R2, true);
+  __ Lsls(R0, R1, R2);
+  __ Lsrs(R0, R1, R2);
+  __ Asrs(R0, R1, R2);
+  __ Rors(R0, R1, R2);
 
   // 32 bit due to use of high registers.
-  __ Lsl(R8, R1, R2, true);
-  __ Lsr(R0, R8, R2, true);
-  __ Asr(R0, R1, R8, true);
+  __ Lsls(R8, R1, R2);
+  __ Lsrs(R0, R8, R2);
+  __ Asrs(R0, R1, R8);
 
   EmitAndCheck(&assembler, "Shifts");
 }
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 280ed77..82ad642 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -1,8 +1,9 @@
 const char* SimpleMovResults[] = {
   "   0:	0008      	movs	r0, r1\n",
-  "   2:	46c8      	mov	r8, r9\n",
-  "   4:	2001      	movs	r0, #1\n",
-  "   6:	f04f 0809 	mov.w	r8, #9\n",
+  "   2:	4608      	mov	r0, r1\n",
+  "   4:	46c8      	mov	r8, r9\n",
+  "   6:	2001      	movs	r0, #1\n",
+  "   8:	f04f 0809 	mov.w	r8, #9\n",
   nullptr
 };
 const char* SimpleMov32Results[] = {
@@ -11,39 +12,120 @@
   nullptr
 };
 const char* SimpleMovAddResults[] = {
-  "   0:	0008      	movs	r0, r1\n",
+  "   0:	4608      	mov	r0, r1\n",
   "   2:	1888      	adds	r0, r1, r2\n",
   "   4:	1c08      	adds	r0, r1, #0\n",
   nullptr
 };
 const char* DataProcessingRegisterResults[] = {
-  "   0:	0008      	movs	r0, r1\n",
-  "   2:	43c8      	mvns	r0, r1\n",
-  "   4:	1888      	adds	r0, r1, r2\n",
-  "   6:	1a88      	subs	r0, r1, r2\n",
-  "   8:	ea01 0002 	and.w	r0, r1, r2\n",
-  "   c:	ea41 0002 	orr.w	r0, r1, r2\n",
-  "  10:	ea81 0002 	eor.w	r0, r1, r2\n",
-  "  14:	ea21 0002 	bic.w	r0, r1, r2\n",
-  "  18:	eb41 0002 	adc.w	r0, r1, r2\n",
-  "  1c:	eb61 0002 	sbc.w	r0, r1, r2\n",
-  "  20:	ebc1 0002 	rsb	r0, r1, r2\n",
-  "  24:	1c08      	adds	r0, r1, #0\n",
-  "  26:	1e08      	subs	r0, r1, #0\n",
-  "  28:	4008      	ands	r0, r1\n",
-  "  2a:	4308      	orrs	r0, r1\n",
-  "  2c:	4048      	eors	r0, r1\n",
-  "  2e:	4388      	bics	r0, r1\n",
-  "  30:	4148      	adcs	r0, r1\n",
-  "  32:	4188      	sbcs	r0, r1\n",
-  "  34:	4248      	negs	r0, r1\n",
-  "  36:	4208      	tst	r0, r1\n",
-  "  38:	ea90 0f01 	teq	r0, r1\n",
-  "  3c:	4288      	cmp	r0, r1\n",
-  "  3e:	42c8      	cmn	r0, r1\n",
-  "  40:	0008      	movs	r0, r1\n",
-  "  42:	43c8      	mvns	r0, r1\n",
-  "  44:	eb01 0c00   add.w	ip, r1, r0\n",
+  "   0:	ea6f 0001 	mvn.w	r0, r1\n",
+  "   4:	eb01 0002 	add.w	r0, r1, r2\n",
+  "   8:	eba1 0002 	sub.w	r0, r1, r2\n",
+  "   c:	ea01 0002 	and.w	r0, r1, r2\n",
+  "  10:	ea41 0002 	orr.w	r0, r1, r2\n",
+  "  14:	ea81 0002 	eor.w	r0, r1, r2\n",
+  "  18:	ea21 0002 	bic.w	r0, r1, r2\n",
+  "  1c:	eb41 0002 	adc.w	r0, r1, r2\n",
+  "  20:	eb61 0002 	sbc.w	r0, r1, r2\n",
+  "  24:	ebc1 0002 	rsb	r0, r1, r2\n",
+  "  28:	ea90 0f01 	teq	r0, r1\n",
+  "  2c:	0008      	movs	r0, r1\n",
+  "  2e:	4608      	mov	r0, r1\n",
+  "  30:	43c8      	mvns	r0, r1\n",
+  "  32:	4408      	add	r0, r1\n",
+  "  34:	1888      	adds	r0, r1, r2\n",
+  "  36:	1a88      	subs	r0, r1, r2\n",
+  "  38:	4148      	adcs	r0, r1\n",
+  "  3a:	4188      	sbcs	r0, r1\n",
+  "  3c:	4008      	ands	r0, r1\n",
+  "  3e:	4308      	orrs	r0, r1\n",
+  "  40:	4048      	eors	r0, r1\n",
+  "  42:	4388      	bics	r0, r1\n",
+  "  44:	4208      	tst	r0, r1\n",
+  "  46:	4288      	cmp	r0, r1\n",
+  "  48:	42c8      	cmn	r0, r1\n",
+  "  4a:	4641		mov	r1, r8\n",
+  "  4c:	4681		mov	r9, r0\n",
+  "  4e:	46c8		mov	r8, r9\n",
+  "  50:	4441		add	r1, r8\n",
+  "  52:	4481		add	r9, r0\n",
+  "  54:	44c8		add	r8, r9\n",
+  "  56:	4548		cmp	r0, r9\n",
+  "  58:	4588		cmp	r8, r1\n",
+  "  5a:	45c1		cmp	r9, r8\n",
+  "  5c:	4248   	   	negs	r0, r1\n",
+  "  5e:	4240   	   	negs	r0, r0\n",
+  "  60:	ea5f 0008  	movs.w	r0, r8\n",
+  "  64:	ea7f 0008  	mvns.w	r0, r8\n",
+  "  68:	eb01 0008 	add.w	r0, r1, r8\n",
+  "  6c:	eb11 0008 	adds.w	r0, r1, r8\n",
+  "  70:	ebb1 0008 	subs.w	r0, r1, r8\n",
+  "  74:	eb50 0008 	adcs.w	r0, r0, r8\n",
+  "  78:	eb70 0008 	sbcs.w	r0, r0, r8\n",
+  "  7c:	ea10 0008 	ands.w	r0, r0, r8\n",
+  "  80:	ea50 0008 	orrs.w	r0, r0, r8\n",
+  "  84:	ea90 0008 	eors.w	r0, r0, r8\n",
+  "  88:	ea30 0008 	bics.w	r0, r0, r8\n",
+  "  8c:	ea10 0f08 	tst.w	r0, r8\n",
+  "  90:	eb10 0f08 	cmn.w	r0, r8\n",
+  "  94:	f1d8 0000 	rsbs	r0, r8, #0\n",
+  "  98:	f1d8 0800 	rsbs	r8, r8, #0\n",
+  "  9c:	bf08       	it	eq\n",
+  "  9e:	ea7f 0001  	mvnseq.w	r0, r1\n",
+  "  a2:	bf08       	it	eq\n",
+  "  a4:	eb11 0002 	addseq.w	r0, r1, r2\n",
+  "  a8:	bf08       	it	eq\n",
+  "  aa:	ebb1 0002 	subseq.w	r0, r1, r2\n",
+  "  ae:	bf08       	it	eq\n",
+  "  b0:	eb50 0001 	adcseq.w	r0, r0, r1\n",
+  "  b4:	bf08       	it	eq\n",
+  "  b6:	eb70 0001 	sbcseq.w	r0, r0, r1\n",
+  "  ba:	bf08       	it	eq\n",
+  "  bc:	ea10 0001 	andseq.w	r0, r0, r1\n",
+  "  c0:	bf08       	it	eq\n",
+  "  c2:	ea50 0001 	orrseq.w	r0, r0, r1\n",
+  "  c6:	bf08       	it	eq\n",
+  "  c8:	ea90 0001 	eorseq.w	r0, r0, r1\n",
+  "  cc:	bf08       	it	eq\n",
+  "  ce:	ea30 0001 	bicseq.w	r0, r0, r1\n",
+  "  d2:	bf08       	it	eq\n",
+  "  d4:	43c8      	mvneq	r0, r1\n",
+  "  d6:	bf08       	it	eq\n",
+  "  d8:	1888      	addeq	r0, r1, r2\n",
+  "  da:	bf08       	it	eq\n",
+  "  dc:	1a88      	subeq	r0, r1, r2\n",
+  "  de:	bf08       	it	eq\n",
+  "  e0:	4148      	adceq	r0, r1\n",
+  "  e2:	bf08       	it	eq\n",
+  "  e4:	4188      	sbceq	r0, r1\n",
+  "  e6:	bf08       	it	eq\n",
+  "  e8:	4008      	andeq	r0, r1\n",
+  "  ea:	bf08       	it	eq\n",
+  "  ec:	4308      	orreq	r0, r1\n",
+  "  ee:	bf08       	it	eq\n",
+  "  f0:	4048      	eoreq	r0, r1\n",
+  "  f2:	bf08       	it	eq\n",
+  "  f4:	4388      	biceq	r0, r1\n",
+  "  f6:	4608      	mov	r0, r1\n",
+  "  f8:	43c8      	mvns	r0, r1\n",
+  "  fa:	4408      	add	r0, r1\n",
+  "  fc:	1888      	adds	r0, r1, r2\n",
+  "  fe:	1a88      	subs	r0, r1, r2\n",
+  " 100:	4148      	adcs	r0, r1\n",
+  " 102:	4188      	sbcs	r0, r1\n",
+  " 104:	4008      	ands	r0, r1\n",
+  " 106:	4308      	orrs	r0, r1\n",
+  " 108:	4048      	eors	r0, r1\n",
+  " 10a:	4388      	bics	r0, r1\n",
+  " 10c:	4641		mov	r1, r8\n",
+  " 10e:	4681		mov	r9, r0\n",
+  " 110:	46c8		mov	r8, r9\n",
+  " 112:	4441		add	r1, r8\n",
+  " 114:	4481		add	r9, r0\n",
+  " 116:	44c8		add	r8, r9\n",
+  " 118:	4248   	   	negs	r0, r1\n",
+  " 11a:	4240   	   	negs	r0, r0\n",
+  " 11c:	eb01 0c00 	add.w	ip, r1, r0\n",
   nullptr
 };
 const char* DataProcessingImmediateResults[] = {
@@ -66,6 +148,8 @@
   "  3a:	1f48      	subs	r0, r1, #5\n",
   "  3c:	2055      	movs	r0, #85	; 0x55\n",
   "  3e:	f07f 0055 	mvns.w	r0, #85	; 0x55\n",
+  "  42:	1d48      	adds  r0, r1, #5\n",
+  "  44:	1f48      	subs  r0, r1, #5\n",
   nullptr
 };
 const char* DataProcessingModifiedImmediateResults[] = {
@@ -100,13 +184,18 @@
   "   0:	0123      	lsls	r3, r4, #4\n",
   "   2:	0963      	lsrs	r3, r4, #5\n",
   "   4:	11a3      	asrs	r3, r4, #6\n",
-  "   6:	ea4f 13f4 	mov.w	r3, r4, ror #7\n",
-  "   a:	41e3      	rors	r3, r4\n",
-  "   c:	ea4f 1804 	mov.w	r8, r4, lsl #4\n",
-  "  10:	ea4f 1854 	mov.w	r8, r4, lsr #5\n",
-  "  14:	ea4f 18a4 	mov.w	r8, r4, asr #6\n",
-  "  18:	ea4f 18f4 	mov.w	r8, r4, ror #7\n",
-  "  1c:	ea4f 0834 	mov.w	r8, r4, rrx\n",
+  "   6:	ea5f 13f4 	movs.w	r3, r4, ror #7\n",
+  "   a:	ea5f 0334 	movs.w	r3, r4, rrx\n",
+  "   e:	ea4f 1304 	mov.w	r3, r4, lsl #4\n",
+  "  12:	ea4f 1354 	mov.w	r3, r4, lsr #5\n",
+  "  16:	ea4f 13a4 	mov.w	r3, r4, asr #6\n",
+  "  1a:	ea4f 13f4 	mov.w	r3, r4, ror #7\n",
+  "  1e:	ea4f 0334 	mov.w	r3, r4, rrx\n",
+  "  22:	ea5f 1804 	movs.w	r8, r4, lsl #4\n",
+  "  26:	ea5f 1854 	movs.w	r8, r4, lsr #5\n",
+  "  2a:	ea5f 18a4 	movs.w	r8, r4, asr #6\n",
+  "  2e:	ea5f 18f4 	movs.w	r8, r4, ror #7\n",
+  "  32:	ea5f 0834 	movs.w	r8, r4, rrx\n",
   nullptr
 };
 const char* BasicLoadResults[] = {
@@ -1511,7 +1600,7 @@
   " 7fc:	23fa      	movs	r3, #250	; 0xfa\n",
   " 7fe:	23fc      	movs	r3, #252	; 0xfc\n",
   " 800:	23fe      	movs	r3, #254	; 0xfe\n",
-  " 802:	0011      	movs	r1, r2\n",
+  " 802:	4611      	mov	r1, r2\n",
   nullptr
 };
 const char* Branch32Results[] = {
@@ -2541,7 +2630,7 @@
   " 800:	23fc      	movs	r3, #252	; 0xfc\n",
   " 802:	23fe      	movs	r3, #254	; 0xfe\n",
   " 804:	2300      	movs	r3, #0\n",
-  " 806:	0011      	movs	r1, r2\n",
+  " 806:	4611      	mov	r1, r2\n",
   nullptr
 };
 const char* CompareAndBranchMaxResults[] = {
@@ -2610,7 +2699,7 @@
   "  7c:	237a      	movs	r3, #122	; 0x7a\n",
   "  7e:	237c      	movs	r3, #124	; 0x7c\n",
   "  80:	237e      	movs	r3, #126	; 0x7e\n",
-  "  82:	0011      	movs	r1, r2\n",
+  "  82:	4611      	mov	r1, r2\n",
   nullptr
 };
 const char* CompareAndBranchRelocation16Results[] = {
@@ -2681,7 +2770,7 @@
   "  80:	237c      	movs	r3, #124	; 0x7c\n",
   "  82:	237e      	movs	r3, #126	; 0x7e\n",
   "  84:	2380      	movs	r3, #128	; 0x80\n",
-  "  86:	0011      	movs	r1, r2\n",
+  "  86:	4611      	mov	r1, r2\n",
   nullptr
 };
 const char* CompareAndBranchRelocation32Results[] = {
@@ -3712,7 +3801,7 @@
   " 802:	23fc      	movs	r3, #252	; 0xfc\n",
   " 804:	23fe      	movs	r3, #254	; 0xfe\n",
   " 806:	2300      	movs	r3, #0\n",
-  " 808:	0011      	movs	r1, r2\n",
+  " 808:	4611      	mov	r1, r2\n",
   nullptr
 };
 const char* MixedBranch32Results[] = {
@@ -4743,7 +4832,7 @@
   " 802:	23fe      	movs	r3, #254	; 0xfe\n",
   " 804:	2300      	movs	r3, #0\n",
   " 806:	f7ff bbfd 	b.w	4 <MixedBranch32+0x4>\n",
-  " 80a:	0011      	movs	r1, r2\n",
+  " 80a:	4611      	mov	r1, r2\n",
   nullptr
 };
 const char* ShiftsResults[] = {
@@ -4753,28 +4842,46 @@
   "   6:	4088      	lsls	r0, r1\n",
   "   8:	40c8      	lsrs	r0, r1\n",
   "   a:	4108      	asrs	r0, r1\n",
-  "   c:	ea4f 1841 	mov.w	r8, r1, lsl #5\n",
-  "  10:	ea4f 1058 	mov.w	r0, r8, lsr #5\n",
-  "  14:	ea4f 1861 	mov.w	r8, r1, asr #5\n",
-  "  18:	ea4f 1078 	mov.w	r0, r8, ror #5\n",
-  "  1c:	fa01 f002 	lsl.w	r0, r1, r2\n",
-  "  20:	fa21 f002 	lsr.w	r0, r1, r2\n",
-  "  24:	fa41 f002 	asr.w	r0, r1, r2\n",
-  "  28:	fa61 f002 	ror.w	r0, r1, r2\n",
-  "  2c:	fa01 f802 	lsl.w	r8, r1, r2\n",
-  "  30:	fa28 f002 	lsr.w	r0, r8, r2\n",
-  "  34:	fa41 f008 	asr.w	r0, r1, r8\n",
-  "  38:	ea5f 1841 	movs.w	r8, r1, lsl #5\n",
-  "  3c:	ea5f 1058 	movs.w	r0, r8, lsr #5\n",
-  "  40:	ea5f 1861 	movs.w	r8, r1, asr #5\n",
-  "  44:	ea5f 1078 	movs.w	r0, r8, ror #5\n",
-  "  48:	fa11 f002 	lsls.w	r0, r1, r2\n",
-  "  4c:	fa31 f002 	lsrs.w	r0, r1, r2\n",
-  "  50:	fa51 f002 	asrs.w	r0, r1, r2\n",
-  "  54:	fa71 f002 	rors.w	r0, r1, r2\n",
-  "  58:	fa11 f802 	lsls.w	r8, r1, r2\n",
-  "  5c:	fa38 f002 	lsrs.w	r0, r8, r2\n",
-  "  60:	fa51 f008 	asrs.w	r0, r1, r8\n",
+  "   c:	41c8      	rors	r0, r1\n",
+  "   e:	0148      	lsls	r0, r1, #5\n",
+  "  10:	0948      	lsrs	r0, r1, #5\n",
+  "  12:	1148      	asrs	r0, r1, #5\n",
+  "  14:	4088      	lsls	r0, r1\n",
+  "  16:	40c8      	lsrs	r0, r1\n",
+  "  18:	4108      	asrs	r0, r1\n",
+  "  1a:	41c8      	rors	r0, r1\n",
+  "  1c:	ea4f 1041 	mov.w	r0, r1, lsl #5\n",
+  "  20:	ea4f 1051 	mov.w	r0, r1, lsr #5\n",
+  "  24:	ea4f 1061 	mov.w	r0, r1, asr #5\n",
+  "  28:	fa00 f001 	lsl.w	r0, r0, r1\n",
+  "  2c:	fa20 f001 	lsr.w	r0, r0, r1\n",
+  "  30:	fa40 f001 	asr.w	r0, r0, r1\n",
+  "  34:	fa60 f001 	ror.w	r0, r0, r1\n",
+  "  38:	ea4f 1071 	mov.w	r0, r1, ror #5\n",
+  "  3c:	ea5f 1071 	movs.w	r0, r1, ror #5\n",
+  "  40:	ea4f 1071 	mov.w	r0, r1, ror #5\n",
+  "  44:	ea4f 1841 	mov.w	r8, r1, lsl #5\n",
+  "  48:	ea4f 1058 	mov.w	r0, r8, lsr #5\n",
+  "  4c:	ea4f 1861 	mov.w	r8, r1, asr #5\n",
+  "  50:	ea4f 1078 	mov.w	r0, r8, ror #5\n",
+  "  54:	fa01 f002 	lsl.w	r0, r1, r2\n",
+  "  58:	fa21 f002 	lsr.w	r0, r1, r2\n",
+  "  5c:	fa41 f002 	asr.w	r0, r1, r2\n",
+  "  60:	fa61 f002 	ror.w	r0, r1, r2\n",
+  "  64:	fa01 f802 	lsl.w	r8, r1, r2\n",
+  "  68:	fa28 f002 	lsr.w	r0, r8, r2\n",
+  "  6c:	fa41 f008 	asr.w	r0, r1, r8\n",
+  "  70:	ea5f 1841 	movs.w	r8, r1, lsl #5\n",
+  "  74:	ea5f 1058 	movs.w	r0, r8, lsr #5\n",
+  "  78:	ea5f 1861 	movs.w	r8, r1, asr #5\n",
+  "  7c:	ea5f 1078 	movs.w	r0, r8, ror #5\n",
+  "  80:	fa11 f002 	lsls.w	r0, r1, r2\n",
+  "  84:	fa31 f002 	lsrs.w	r0, r1, r2\n",
+  "  88:	fa51 f002 	asrs.w	r0, r1, r2\n",
+  "  8c:	fa71 f002 	rors.w	r0, r1, r2\n",
+  "  90:	fa11 f802 	lsls.w	r8, r1, r2\n",
+  "  94:	fa38 f002 	lsrs.w	r0, r8, r2\n",
+  "  98:	fa51 f008 	asrs.w	r0, r1, r8\n",
   nullptr
 };
 const char* LoadStoreRegOffsetResults[] = {
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 99736e9..f9520be 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -309,6 +309,8 @@
   UsageError("      stripped using standard command line tools such as strip or objcopy.");
   UsageError("      (enabled by default in debug builds, disabled by default otherwise)");
   UsageError("");
+  UsageError("  --debuggable: Produce debuggable code. Implies --generate-debug-info.");
+  UsageError("");
   UsageError("  --no-generate-debug-info: Do not generate debug information for native debugging.");
   UsageError("");
   UsageError("  --runtime-arg <argument>: used to specify various arguments for the runtime,");
@@ -1207,6 +1209,14 @@
     oat_file_.reset();
   }
 
+  void Shutdown() {
+    ScopedObjectAccess soa(Thread::Current());
+    for (jobject dex_cache : dex_caches_) {
+      soa.Env()->DeleteLocalRef(dex_cache);
+    }
+    dex_caches_.clear();
+  }
+
   // Set up the environment for compilation. Includes starting the runtime and loading/opening the
   // boot class path.
   bool Setup() {
@@ -1320,8 +1330,9 @@
       compiled_methods_.reset(nullptr);  // By default compile everything.
     }
 
+    ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
     if (boot_image_option_.empty()) {
-      dex_files_ = Runtime::Current()->GetClassLinker()->GetBootClassPath();
+      dex_files_ = class_linker->GetBootClassPath();
     } else {
       if (dex_filenames_.empty()) {
         ATRACE_BEGIN("Opening zip archive from file descriptor");
@@ -1374,11 +1385,15 @@
         }
       }
     }
-    // Ensure opened dex files are writable for dex-to-dex transformations.
+    // Ensure opened dex files are writable for dex-to-dex transformations. Also ensure that
+    // the dex caches stay live since we don't want class unloading to occur during compilation.
     for (const auto& dex_file : dex_files_) {
       if (!dex_file->EnableWrite()) {
         PLOG(ERROR) << "Failed to make .dex file writeable '" << dex_file->GetLocation() << "'\n";
       }
+      ScopedObjectAccess soa(self);
+      dex_caches_.push_back(soa.AddLocalReference<jobject>(
+          class_linker->RegisterDexFile(*dex_file)));
     }
 
     // If we use a swap file, ensure we are above the threshold to make it necessary.
@@ -1423,6 +1438,7 @@
     // Handle and ClassLoader creation needs to come after Runtime::Create
     jobject class_loader = nullptr;
     Thread* self = Thread::Current();
+
     if (!boot_image_option_.empty()) {
       ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
       OpenClassPathFiles(runtime_->GetClassPathString(), dex_files_, &class_path_files_);
@@ -1957,6 +1973,7 @@
   bool is_host_;
   std::string android_root_;
   std::vector<const DexFile*> dex_files_;
+  std::vector<jobject> dex_caches_;
   std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
   std::unique_ptr<CompilerDriver> driver_;
   std::vector<std::string> verbose_methods_;
@@ -2107,11 +2124,15 @@
     return EXIT_FAILURE;
   }
 
+  bool result;
   if (dex2oat.IsImage()) {
-    return CompileImage(dex2oat);
+    result = CompileImage(dex2oat);
   } else {
-    return CompileApp(dex2oat);
+    result = CompileApp(dex2oat);
   }
+
+  dex2oat.Shutdown();
+  return result;
 }
 }  // namespace art
 
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index a2aa77e..c553a18 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -827,11 +827,15 @@
       DumpDexCode(vios->Stream(), dex_file, code_item);
     }
 
+    std::unique_ptr<StackHandleScope<1>> hs;
     std::unique_ptr<verifier::MethodVerifier> verifier;
     if (Runtime::Current() != nullptr) {
+      // We need to have the handle scope stay live until after the verifier since the verifier has
+      // a handle to the dex cache from hs.
+      hs.reset(new StackHandleScope<1>(Thread::Current()));
       vios->Stream() << "VERIFIER TYPE ANALYSIS:\n";
       ScopedIndentation indent2(vios);
-      verifier.reset(DumpVerifier(vios,
+      verifier.reset(DumpVerifier(vios, hs.get(),
                                   dex_method_idx, &dex_file, class_def, code_item,
                                   method_access_flags));
     }
@@ -1404,6 +1408,7 @@
   }
 
   verifier::MethodVerifier* DumpVerifier(VariableIndentationOutputStream* vios,
+                                         StackHandleScope<1>* hs,
                                          uint32_t dex_method_idx,
                                          const DexFile* dex_file,
                                          const DexFile::ClassDef& class_def,
@@ -1411,9 +1416,8 @@
                                          uint32_t method_access_flags) {
     if ((method_access_flags & kAccNative) == 0) {
       ScopedObjectAccess soa(Thread::Current());
-      StackHandleScope<1> hs(soa.Self());
       Handle<mirror::DexCache> dex_cache(
-          hs.NewHandle(Runtime::Current()->GetClassLinker()->FindDexCache(*dex_file)));
+          hs->NewHandle(Runtime::Current()->GetClassLinker()->RegisterDexFile(*dex_file)));
       DCHECK(options_.class_loader_ != nullptr);
       return verifier::MethodVerifier::VerifyMethodAndDump(
           soa.Self(), vios, dex_method_idx, dex_file, dex_cache, *options_.class_loader_,
@@ -1614,10 +1618,13 @@
       dex_cache_arrays_.clear();
       {
         ReaderMutexLock mu(self, *class_linker->DexLock());
-        for (size_t i = 0; i < class_linker->GetDexCacheCount(); ++i) {
-          auto* dex_cache = class_linker->GetDexCache(i);
-          dex_cache_arrays_.insert(dex_cache->GetResolvedFields());
-          dex_cache_arrays_.insert(dex_cache->GetResolvedMethods());
+        for (jobject weak_root : class_linker->GetDexCaches()) {
+          mirror::DexCache* dex_cache =
+              down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
+          if (dex_cache != nullptr) {
+            dex_cache_arrays_.insert(dex_cache->GetResolvedFields());
+            dex_cache_arrays_.insert(dex_cache->GetResolvedMethods());
+          }
         }
       }
       ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index be9af98..1599025 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -167,7 +167,8 @@
   qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
 
-  qpoints->pDeoptimize = art_quick_deoptimize;
+  // Deoptimization from compiled code.
+  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
 
   // Read barrier
   qpoints->pReadBarrierJni = ReadBarrierJni;
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index d6396c1..e45d828 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1141,6 +1141,17 @@
 END art_quick_deoptimize
 
     /*
+     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
+     * will long jump to the upcall with a special exception of -1.
+     */
+    .extern artDeoptimizeFromCompiledCode
+ENTRY art_quick_deoptimize_from_compiled_code
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0, r1
+    mov    r0, r9                         @ Set up args.
+    blx    artDeoptimizeFromCompiledCode  @ artDeoptimizeFromCompiledCode(Thread*)
+END art_quick_deoptimize_from_compiled_code
+
+    /*
      * Signed 64-bit integer multiply.
      *
      * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 0f06727..e9c816f 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -150,8 +150,8 @@
   qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
 
-  // Deoptimize
-  qpoints->pDeoptimize = art_quick_deoptimize;
+  // Deoptimization from compiled code.
+  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
 
   // Read barrier
   qpoints->pReadBarrierJni = ReadBarrierJni;
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index bfef0fa..169bc38 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1739,6 +1739,18 @@
     brk 0
 END art_quick_deoptimize
 
+    /*
+     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
+     * will long jump to the upcall with a special exception of -1.
+     */
+    .extern artDeoptimizeFromCompiledCode
+ENTRY art_quick_deoptimize_from_compiled_code
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    mov    x0, xSELF                      // Pass thread.
+    bl     artDeoptimizeFromCompiledCode  // artDeoptimizeFromCompiledCode(Thread*)
+    brk 0
+END art_quick_deoptimize_from_compiled_code
+
 
     /*
      * String's indexOf.
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 4e4b91f..6721e54 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -267,8 +267,8 @@
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
   static_assert(!IsDirectEntrypoint(kQuickThrowStackOverflow), "Non-direct C stub marked direct.");
 
-  // Deoptimize
-  qpoints->pDeoptimize = art_quick_deoptimize;
+  // Deoptimization from compiled code.
+  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
   static_assert(!IsDirectEntrypoint(kQuickDeoptimize), "Non-direct C stub marked direct.");
 
   // Atomic 64-bit load/store
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index cb49cf5..ba58c3f 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1542,6 +1542,18 @@
 END art_quick_deoptimize
 
     /*
+     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
+     * will long jump to the upcall with a special exception of -1.
+     */
+    .extern artDeoptimizeFromCompiledCode
+ENTRY art_quick_deoptimize_from_compiled_code
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    jal      artDeoptimizeFromCompiledCode  # artDeoptimizeFromCompiledCode(Thread*)
+                                            # Returns caller method's frame size.
+    move     $a0, rSELF                     # pass Thread::current
+END art_quick_deoptimize_from_compiled_code
+
+    /*
      * Long integer shift.  This is different from the generic 32/64-bit
      * binary operations because vAA/vBB are 64-bit but vCC (the shift
      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index ec02d5a..9f1f0e0 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -176,8 +176,8 @@
   qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
 
-  // Deoptimize
-  qpoints->pDeoptimize = art_quick_deoptimize;
+  // Deoptimization from compiled code.
+  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
 
   // TODO - use lld/scd instructions for Mips64
   // Atomic 64-bit load/store
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 4bc049c..1b50b2e 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1603,5 +1603,17 @@
     move     $a0, rSELF        # pass Thread::current
 END art_quick_deoptimize
 
+    /*
+     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
+     * will long jump to the upcall with a special exception of -1.
+     */
+    .extern artDeoptimizeFromCompiledCode
+ENTRY art_quick_deoptimize_from_compiled_code
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    jal      artDeoptimizeFromCompiledCode    # artDeoptimizeFromCompiledCode(Thread*, SP)
+                                              # Returns caller method's frame size.
+    move     $a0, rSELF                       # pass Thread::current
+END art_quick_deoptimize_from_compiled_code
+
 UNIMPLEMENTED art_quick_indexof
 UNIMPLEMENTED art_quick_string_compareto
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index e2632c1..10fc281 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -140,7 +140,7 @@
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
 
   // Deoptimize
-  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_slow_path;
+  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
 
   // Read barrier
   qpoints->pReadBarrierJni = ReadBarrierJni;
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 9b2d59d..029a296 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1684,9 +1684,6 @@
      */
 DEFINE_FUNCTION art_quick_deoptimize
     PUSH ebx                      // Entry point for a jump. Fake that we were called.
-.globl SYMBOL(art_quick_deoptimize_from_compiled_slow_path)  // Entry point for real calls
-                                                             // from compiled slow paths.
-SYMBOL(art_quick_deoptimize_from_compiled_slow_path):
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx
     subl LITERAL(12), %esp        // Align stack.
     CFI_ADJUST_CFA_OFFSET(12)
@@ -1697,6 +1694,20 @@
 END_FUNCTION art_quick_deoptimize
 
     /*
+     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
+     * will long jump to the upcall with a special exception of -1.
+     */
+DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx
+    subl LITERAL(12), %esp                      // Align stack.
+    CFI_ADJUST_CFA_OFFSET(12)
+    pushl %fs:THREAD_SELF_OFFSET                // Pass Thread::Current().
+    CFI_ADJUST_CFA_OFFSET(4)
+    call SYMBOL(artDeoptimizeFromCompiledCode)  // artDeoptimizeFromCompiledCode(Thread*)
+    UNREACHABLE
+END_FUNCTION art_quick_deoptimize_from_compiled_code
+
+    /*
      * String's compareTo.
      *
      * On entry:
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index ef1bb5f..5cc72e3 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -144,7 +144,7 @@
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
 
   // Deoptimize
-  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_slow_path;
+  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
 
   // Read barrier
   qpoints->pReadBarrierJni = ReadBarrierJni;
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 88270d9..1498a4b 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1728,9 +1728,6 @@
 DEFINE_FUNCTION art_quick_deoptimize
     pushq %rsi                     // Entry point for a jump. Fake that we were called.
                                    // Use hidden arg.
-.globl SYMBOL(art_quick_deoptimize_from_compiled_slow_path)  // Entry point for real calls
-                                                             // from compiled slow paths.
-SYMBOL(art_quick_deoptimize_from_compiled_slow_path):
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
                                    // Stack should be aligned now.
     movq %gs:THREAD_SELF_OFFSET, %rdi         // Pass Thread.
@@ -1739,6 +1736,18 @@
 END_FUNCTION art_quick_deoptimize
 
     /*
+     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
+     * will long jump to the upcall with a special exception of -1.
+     */
+DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+                                                // Stack should be aligned now.
+    movq %gs:THREAD_SELF_OFFSET, %rdi           // Pass Thread.
+    call SYMBOL(artDeoptimizeFromCompiledCode)  // artDeoptimizeFromCompiledCode(Thread*)
+    UNREACHABLE
+END_FUNCTION art_quick_deoptimize_from_compiled_code
+
+    /*
      * String's compareTo.
      *
      * On entry:
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 56f7b35..e46402d 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -427,9 +427,16 @@
         self->ClearException();
         ShadowFrame* shadow_frame =
             self->PopStackedShadowFrame(StackedShadowFrameType::kDeoptimizationShadowFrame);
-        result->SetJ(self->PopDeoptimizationReturnValue().GetJ());
+        mirror::Throwable* pending_exception = nullptr;
+        self->PopDeoptimizationContext(result, &pending_exception);
         self->SetTopOfStack(nullptr);
         self->SetTopOfShadowStack(shadow_frame);
+
+        // Restore the exception that was pending before deoptimization then interpret the
+        // deoptimized frames.
+        if (pending_exception != nullptr) {
+          self->SetException(pending_exception);
+        }
         interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, result);
       }
       if (kLogInvocationStartAndReturn) {
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 084c88e..5c1922e 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -89,7 +89,7 @@
             art::Thread::ThinLockIdOffset<__SIZEOF_POINTER__>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.card_table.
-#define THREAD_CARD_TABLE_OFFSET 136
+#define THREAD_CARD_TABLE_OFFSET 128
 ADD_TEST_EQ(THREAD_CARD_TABLE_OFFSET,
             art::Thread::CardTableOffset<__SIZEOF_POINTER__>().Int32Value())
 
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index e5832e1..3a4bccd 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -32,32 +32,43 @@
 
 template <bool kCount>
 const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = {
-  "Misc       ",
-  "BasicBlock ",
-  "BBList     ",
-  "BBPreds    ",
-  "DfsPreOrd  ",
-  "DfsPostOrd ",
-  "DomPostOrd ",
-  "TopoOrd    ",
-  "Lowering   ",
-  "LIR        ",
-  "LIR masks  ",
-  "SwitchTbl  ",
-  "FillArray  ",
-  "SlowPaths  ",
-  "MIR        ",
-  "DataFlow   ",
-  "GrowList   ",
-  "GrowBitMap ",
-  "SSA2Dalvik ",
-  "Dalvik2SSA ",
-  "DebugInfo  ",
-  "Successor  ",
-  "RegAlloc   ",
-  "Data       ",
-  "Preds      ",
-  "STL        ",
+  "Misc         ",
+  "BBList       ",
+  "BBPreds      ",
+  "DfsPreOrd    ",
+  "DfsPostOrd   ",
+  "DomPostOrd   ",
+  "TopoOrd      ",
+  "Lowering     ",
+  "LIR          ",
+  "LIR masks    ",
+  "SwitchTbl    ",
+  "FillArray    ",
+  "SlowPaths    ",
+  "MIR          ",
+  "DataFlow     ",
+  "GrowList     ",
+  "GrowBitMap   ",
+  "SSA2Dalvik   ",
+  "Dalvik2SSA   ",
+  "DebugInfo    ",
+  "Successor    ",
+  "RegAlloc     ",
+  "Data         ",
+  "Preds        ",
+  "STL          ",
+  "Graph        ",
+  "BasicBlock   ",
+  "Instruction  ",
+  "LoopInfo     ",
+  "TryCatchInf  ",
+  "UseListNode  ",
+  "Environment  ",
+  "MoveOperands ",
+  "CodeBuffer   ",
+  "StackMaps    ",
+  "BaselineMaps ",
+  "Optimization ",
 };
 
 template <bool kCount>
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 05c66f0..af2bfbc 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -43,7 +43,6 @@
 // Type of allocation for memory tuning.
 enum ArenaAllocKind {
   kArenaAllocMisc,
-  kArenaAllocBB,
   kArenaAllocBBList,
   kArenaAllocBBPredecessors,
   kArenaAllocDfsPreOrder,
@@ -68,6 +67,18 @@
   kArenaAllocData,
   kArenaAllocPredecessors,
   kArenaAllocSTL,
+  kArenaAllocGraph,
+  kArenaAllocBasicBlock,
+  kArenaAllocInstruction,
+  kArenaAllocLoopInfo,
+  kArenaAllocTryCatchInfo,
+  kArenaAllocUseListNode,
+  kArenaAllocEnvironment,
+  kArenaAllocMoveOperands,
+  kArenaAllocCodeBuffer,
+  kArenaAllocStackMaps,
+  kArenaAllocBaselineMaps,
+  kArenaAllocOptimization,
   kNumArenaAllocKinds
 };
 
diff --git a/runtime/base/arena_containers.h b/runtime/base/arena_containers.h
index a7aafdf..810c1c4 100644
--- a/runtime/base/arena_containers.h
+++ b/runtime/base/arena_containers.h
@@ -76,6 +76,7 @@
 class ArenaAllocatorAdapterKindImpl {
  public:
   explicit ArenaAllocatorAdapterKindImpl(ArenaAllocKind kind) : kind_(kind) { }
+  ArenaAllocatorAdapterKindImpl(const ArenaAllocatorAdapterKindImpl&) = default;
   ArenaAllocatorAdapterKindImpl& operator=(const ArenaAllocatorAdapterKindImpl&) = default;
   ArenaAllocKind Kind() { return kind_; }
 
diff --git a/runtime/base/hash_set_test.cc b/runtime/base/hash_set_test.cc
index 4ef1f9e..6d2c5e0 100644
--- a/runtime/base/hash_set_test.cc
+++ b/runtime/base/hash_set_test.cc
@@ -17,9 +17,11 @@
 #include "hash_set.h"
 
 #include <map>
+#include <forward_list>
 #include <sstream>
 #include <string>
 #include <unordered_set>
+#include <vector>
 
 #include <gtest/gtest.h>
 #include "hash_map.h"
@@ -258,4 +260,59 @@
   ASSERT_EQ(it->second, 124);
 }
 
+struct IsEmptyFnVectorInt {
+  void MakeEmpty(std::vector<int>& item) const {
+    item.clear();
+  }
+  bool IsEmpty(const std::vector<int>& item) const {
+    return item.empty();
+  }
+};
+
+template <typename T>
+size_t HashIntSequence(T begin, T end) {
+  size_t hash = 0;
+  for (auto iter = begin; iter != end; ++iter) {
+    hash = hash * 2 + *iter;
+  }
+  return hash;
+};
+
+struct VectorIntHashEquals {
+  std::size_t operator()(const std::vector<int>& item) const {
+    return HashIntSequence(item.begin(), item.end());
+  }
+
+  std::size_t operator()(const std::forward_list<int>& item) const {
+    return HashIntSequence(item.begin(), item.end());
+  }
+
+  bool operator()(const std::vector<int>& a, const std::vector<int>& b) const {
+    return a == b;
+  }
+
+  bool operator()(const std::vector<int>& a, const std::forward_list<int>& b) const {
+    auto aiter = a.begin();
+    auto biter = b.begin();
+    while (aiter != a.end() && biter != b.end()) {
+      if (*aiter != *biter) {
+        return false;
+      }
+      aiter++;
+      biter++;
+    }
+    return (aiter == a.end() && biter == b.end());
+  }
+};
+
+TEST_F(HashSetTest, TestLookupByAlternateKeyType) {
+  HashSet<std::vector<int>, IsEmptyFnVectorInt, VectorIntHashEquals, VectorIntHashEquals> hash_set;
+  hash_set.Insert(std::vector<int>({1, 2, 3, 4}));
+  hash_set.Insert(std::vector<int>({4, 2}));
+  ASSERT_EQ(hash_set.end(), hash_set.Find(std::vector<int>({1, 1, 1, 1})));
+  ASSERT_NE(hash_set.end(), hash_set.Find(std::vector<int>({1, 2, 3, 4})));
+  ASSERT_EQ(hash_set.end(), hash_set.Find(std::forward_list<int>({1, 1, 1, 1})));
+  ASSERT_NE(hash_set.end(), hash_set.Find(std::forward_list<int>({1, 2, 3, 4})));
+}
+
 }  // namespace art
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 848c904..6bf203c 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -64,6 +64,8 @@
   kJdwpSocketLock,
   kRegionSpaceRegionLock,
   kTransactionLogLock,
+  kMarkSweepMarkStackLock,
+  kJniWeakGlobalsLock,
   kReferenceQueueSoftReferencesLock,
   kReferenceQueuePhantomReferencesLock,
   kReferenceQueueFinalizerReferencesLock,
@@ -79,7 +81,6 @@
   kArenaPoolLock,
   kDexFileMethodInlinerLock,
   kDexFileToMethodInlinerMapLock,
-  kMarkSweepMarkStackLock,
   kInternTableLock,
   kOatFileSecondaryLookupLock,
   kTracingUniqueMethodsLock,
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index 11901b3..d2dbff6 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -195,12 +195,6 @@
   return klass;
 }
 
-inline mirror::DexCache* ClassLinker::GetDexCache(size_t idx) {
-  dex_lock_.AssertSharedHeld(Thread::Current());
-  DCHECK(idx < dex_caches_.size());
-  return dex_caches_[idx].Read();
-}
-
 }  // namespace art
 
 #endif  // ART_RUNTIME_CLASS_LINKER_INL_H_
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index b074dec..287aca9 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -273,7 +273,6 @@
       array_iftable_(nullptr),
       find_array_class_cache_next_victim_(0),
       init_done_(false),
-      log_new_dex_caches_roots_(false),
       log_new_class_table_roots_(false),
       intern_table_(intern_table),
       quick_resolution_trampoline_(nullptr),
@@ -332,6 +331,12 @@
   java_lang_Class->SetSuperClass(java_lang_Object.Get());
   mirror::Class::SetStatus(java_lang_Object, mirror::Class::kStatusLoaded, self);
 
+  java_lang_Object->SetObjectSize(sizeof(mirror::Object));
+  runtime->SetSentinel(heap->AllocObject<true>(self,
+                                               java_lang_Object.Get(),
+                                               java_lang_Object->GetObjectSize(),
+                                               VoidFunctor()));
+
   // Object[] next to hold class roots.
   Handle<mirror::Class> object_array_class(hs.NewHandle(
       AllocClass(self, java_lang_Class.Get(),
@@ -1143,11 +1148,11 @@
   quick_imt_conflict_trampoline_ = oat_file.GetOatHeader().GetQuickImtConflictTrampoline();
   quick_generic_jni_trampoline_ = oat_file.GetOatHeader().GetQuickGenericJniTrampoline();
   quick_to_interpreter_bridge_trampoline_ = oat_file.GetOatHeader().GetQuickToInterpreterBridge();
+  StackHandleScope<2> hs(self);
   mirror::Object* dex_caches_object = space->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches);
-  mirror::ObjectArray<mirror::DexCache>* dex_caches =
-      dex_caches_object->AsObjectArray<mirror::DexCache>();
+  Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches(
+      hs.NewHandle(dex_caches_object->AsObjectArray<mirror::DexCache>()));
 
-  StackHandleScope<1> hs(self);
   Handle<mirror::ObjectArray<mirror::Class>> class_roots(hs.NewHandle(
           space->GetImageHeader().GetImageRoot(ImageHeader::kClassRoots)->
           AsObjectArray<mirror::Class>()));
@@ -1157,6 +1162,13 @@
   // as being Strings or not
   mirror::String::SetClass(GetClassRoot(kJavaLangString));
 
+  mirror::Class* java_lang_Object = GetClassRoot(kJavaLangObject);
+  java_lang_Object->SetObjectSize(sizeof(mirror::Object));
+  Runtime::Current()->SetSentinel(Runtime::Current()->GetHeap()->AllocObject<true>(self,
+                                                          java_lang_Object,
+                                                          java_lang_Object->GetObjectSize(),
+                                                          VoidFunctor()));
+
   CHECK_EQ(oat_file.GetOatHeader().GetDexFileCount(),
            static_cast<uint32_t>(dex_caches->GetLength()));
   for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
@@ -1250,7 +1262,6 @@
 }
 
 bool ClassLinker::ClassInClassTable(mirror::Class* klass) {
-  ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
   ClassTable* const class_table = ClassTableForClassLoader(klass->GetClassLoader());
   return class_table != nullptr && class_table->Contains(klass);
 }
@@ -1307,27 +1318,6 @@
 // mapped image.
 void ClassLinker::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) {
   class_roots_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
-  Thread* const self = Thread::Current();
-  {
-    ReaderMutexLock mu(self, dex_lock_);
-    if ((flags & kVisitRootFlagAllRoots) != 0) {
-      for (GcRoot<mirror::DexCache>& dex_cache : dex_caches_) {
-        dex_cache.VisitRoot(visitor, RootInfo(kRootVMInternal));
-      }
-    } else if ((flags & kVisitRootFlagNewRoots) != 0) {
-      for (size_t index : new_dex_cache_roots_) {
-        dex_caches_[index].VisitRoot(visitor, RootInfo(kRootVMInternal));
-      }
-    }
-    if ((flags & kVisitRootFlagClearRootLog) != 0) {
-      new_dex_cache_roots_.clear();
-    }
-    if ((flags & kVisitRootFlagStartLoggingNewRoots) != 0) {
-      log_new_dex_caches_roots_ = true;
-    } else if ((flags & kVisitRootFlagStopLoggingNewRoots) != 0) {
-      log_new_dex_caches_roots_ = false;
-    }
-  }
   VisitClassRoots(visitor, flags);
   array_iftable_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
   for (GcRoot<mirror::Class>& root : find_array_class_cache_) {
@@ -1702,7 +1692,6 @@
                 long_array->GetWithoutChecks(j)));
             const DexFile::ClassDef* dex_class_def = cp_dex_file->FindClassDef(descriptor, hash);
             if (dex_class_def != nullptr) {
-              RegisterDexFile(*cp_dex_file);
               mirror::Class* klass = DefineClass(self, descriptor, hash, class_loader,
                                                  *cp_dex_file, *dex_class_def);
               if (klass == nullptr) {
@@ -1845,11 +1834,15 @@
     klass.Assign(AllocClass(self, SizeOfClassWithoutEmbeddedTables(dex_file, dex_class_def)));
   }
   if (UNLIKELY(klass.Get() == nullptr)) {
-    CHECK(self->IsExceptionPending());  // Expect an OOME.
+    self->AssertPendingOOMException();
     return nullptr;
   }
-  klass->SetDexCache(FindDexCache(dex_file));
-
+  mirror::DexCache* dex_cache = RegisterDexFile(dex_file);
+  if (dex_cache == nullptr) {
+    self->AssertPendingOOMException();
+    return nullptr;
+  }
+  klass->SetDexCache(dex_cache);
   SetupClass(dex_file, dex_class_def, klass, class_loader.Get());
 
   // Mark the string class by setting its access flag.
@@ -2482,58 +2475,54 @@
   RegisterDexFile(dex_file, dex_cache);
 }
 
-bool ClassLinker::IsDexFileRegisteredLocked(const DexFile& dex_file) {
-  dex_lock_.AssertSharedHeld(Thread::Current());
-  for (GcRoot<mirror::DexCache>& root : dex_caches_) {
-    mirror::DexCache* dex_cache = root.Read();
-    if (dex_cache->GetDexFile() == &dex_file) {
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ClassLinker::IsDexFileRegistered(const DexFile& dex_file) {
-  ReaderMutexLock mu(Thread::Current(), dex_lock_);
-  return IsDexFileRegisteredLocked(dex_file);
-}
-
 void ClassLinker::RegisterDexFileLocked(const DexFile& dex_file,
                                         Handle<mirror::DexCache> dex_cache) {
-  dex_lock_.AssertExclusiveHeld(Thread::Current());
+  Thread* const self = Thread::Current();
+  dex_lock_.AssertExclusiveHeld(self);
   CHECK(dex_cache.Get() != nullptr) << dex_file.GetLocation();
   CHECK(dex_cache->GetLocation()->Equals(dex_file.GetLocation()))
       << dex_cache->GetLocation()->ToModifiedUtf8() << " " << dex_file.GetLocation();
-  dex_caches_.push_back(GcRoot<mirror::DexCache>(dex_cache.Get()));
-  dex_cache->SetDexFile(&dex_file);
-  if (log_new_dex_caches_roots_) {
-    // TODO: This is not safe if we can remove dex caches.
-    new_dex_cache_roots_.push_back(dex_caches_.size() - 1);
+  // Clean up pass to remove null dex caches.
+  // Null dex caches can occur due to class unloading and we are lazily removing null entries.
+  JavaVMExt* const vm = self->GetJniEnv()->vm;
+  for (auto it = dex_caches_.begin(); it != dex_caches_.end();) {
+    mirror::Object* dex_cache_root = self->DecodeJObject(*it);
+    if (dex_cache_root == nullptr) {
+      vm->DeleteWeakGlobalRef(self, *it);
+      it = dex_caches_.erase(it);
+    } else {
+      ++it;
+    }
   }
+  dex_caches_.push_back(vm->AddWeakGlobalRef(self, dex_cache.Get()));
+  dex_cache->SetDexFile(&dex_file);
 }
 
-void ClassLinker::RegisterDexFile(const DexFile& dex_file) {
+mirror::DexCache* ClassLinker::RegisterDexFile(const DexFile& dex_file) {
   Thread* self = Thread::Current();
   {
     ReaderMutexLock mu(self, dex_lock_);
-    if (IsDexFileRegisteredLocked(dex_file)) {
-      return;
+    mirror::DexCache* dex_cache = FindDexCacheLocked(self, dex_file, true);
+    if (dex_cache != nullptr) {
+      return dex_cache;
     }
   }
   // Don't alloc while holding the lock, since allocation may need to
   // suspend all threads and another thread may need the dex_lock_ to
   // get to a suspend point.
   StackHandleScope<1> hs(self);
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(AllocDexCache(self, dex_file)));
-  CHECK(dex_cache.Get() != nullptr) << "Failed to allocate dex cache for "
-                                    << dex_file.GetLocation();
-  {
-    WriterMutexLock mu(self, dex_lock_);
-    if (IsDexFileRegisteredLocked(dex_file)) {
-      return;
-    }
-    RegisterDexFileLocked(dex_file, dex_cache);
+  Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(AllocDexCache(self, dex_file)));
+  WriterMutexLock mu(self, dex_lock_);
+  mirror::DexCache* dex_cache = FindDexCacheLocked(self, dex_file, true);
+  if (dex_cache != nullptr) {
+    return dex_cache;
   }
+  if (h_dex_cache.Get() == nullptr) {
+    self->AssertPendingOOMException();
+    return nullptr;
+  }
+  RegisterDexFileLocked(dex_file, h_dex_cache);
+  return h_dex_cache.Get();
 }
 
 void ClassLinker::RegisterDexFile(const DexFile& dex_file,
@@ -2542,36 +2531,49 @@
   RegisterDexFileLocked(dex_file, dex_cache);
 }
 
-mirror::DexCache* ClassLinker::FindDexCache(const DexFile& dex_file) {
-  ReaderMutexLock mu(Thread::Current(), dex_lock_);
+mirror::DexCache* ClassLinker::FindDexCache(Thread* self,
+                                            const DexFile& dex_file,
+                                            bool allow_failure) {
+  ReaderMutexLock mu(self, dex_lock_);
+  return FindDexCacheLocked(self, dex_file, allow_failure);
+}
+
+mirror::DexCache* ClassLinker::FindDexCacheLocked(Thread* self,
+                                                  const DexFile& dex_file,
+                                                  bool allow_failure) {
   // Search assuming unique-ness of dex file.
-  for (size_t i = 0; i != dex_caches_.size(); ++i) {
-    mirror::DexCache* dex_cache = GetDexCache(i);
-    if (dex_cache->GetDexFile() == &dex_file) {
+  JavaVMExt* const vm = self->GetJniEnv()->vm;
+  for (jobject weak_root : dex_caches_) {
+    DCHECK_EQ(GetIndirectRefKind(weak_root), kWeakGlobal);
+    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(
+        vm->DecodeWeakGlobal(self, weak_root));
+    if (dex_cache != nullptr && dex_cache->GetDexFile() == &dex_file) {
       return dex_cache;
     }
   }
-  // Search matching by location name.
+  if (allow_failure) {
+    return nullptr;
+  }
   std::string location(dex_file.GetLocation());
-  for (size_t i = 0; i != dex_caches_.size(); ++i) {
-    mirror::DexCache* dex_cache = GetDexCache(i);
-    if (dex_cache->GetDexFile()->GetLocation() == location) {
-      return dex_cache;
-    }
-  }
   // Failure, dump diagnostic and abort.
-  for (size_t i = 0; i != dex_caches_.size(); ++i) {
-    mirror::DexCache* dex_cache = GetDexCache(i);
-    LOG(ERROR) << "Registered dex file " << i << " = " << dex_cache->GetDexFile()->GetLocation();
+  for (jobject weak_root : dex_caches_) {
+    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
+    if (dex_cache != nullptr) {
+      LOG(ERROR) << "Registered dex file " << dex_cache->GetDexFile()->GetLocation();
+    }
   }
   LOG(FATAL) << "Failed to find DexCache for DexFile " << location;
   UNREACHABLE();
 }
 
 void ClassLinker::FixupDexCaches(ArtMethod* resolution_method) {
-  ReaderMutexLock mu(Thread::Current(), dex_lock_);
-  for (auto& dex_cache : dex_caches_) {
-    dex_cache.Read()->Fixup(resolution_method, image_pointer_size_);
+  Thread* const self = Thread::Current();
+  ReaderMutexLock mu(self, dex_lock_);
+  for (jobject weak_root : dex_caches_) {
+    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
+    if (dex_cache != nullptr) {
+      dex_cache->Fixup(resolution_method, image_pointer_size_);
+    }
   }
 }
 
@@ -3407,11 +3409,13 @@
   DCHECK(proxy_class->IsProxyClass());
   DCHECK(proxy_method->IsProxyMethod());
   {
-    ReaderMutexLock mu(Thread::Current(), dex_lock_);
+    Thread* const self = Thread::Current();
+    ReaderMutexLock mu(self, dex_lock_);
     // Locate the dex cache of the original interface/Object
-    for (const GcRoot<mirror::DexCache>& root : dex_caches_) {
-      auto* dex_cache = root.Read();
-      if (proxy_method->HasSameDexCacheResolvedTypes(dex_cache->GetResolvedTypes())) {
+    for (jobject weak_root : dex_caches_) {
+      mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
+      if (dex_cache != nullptr &&
+          proxy_method->HasSameDexCacheResolvedTypes(dex_cache->GetResolvedTypes())) {
         ArtMethod* resolved_method = dex_cache->GetResolvedMethod(
             proxy_method->GetDexMethodIndex(), image_pointer_size_);
         CHECK(resolved_method != nullptr);
@@ -5878,11 +5882,6 @@
   // We could move the jobject to the callers, but all call-sites do this...
   ScopedObjectAccessUnchecked soa(self);
 
-  // Register the dex files.
-  for (const DexFile* dex_file : dex_files) {
-    RegisterDexFile(*dex_file);
-  }
-
   // For now, create a libcore-level DexFile for each ART DexFile. This "explodes" multidex.
   StackHandleScope<10> hs(self);
 
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index fbf4035..2a7162b 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -278,7 +278,7 @@
   void RunRootClinits() SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
-  void RegisterDexFile(const DexFile& dex_file)
+  mirror::DexCache* RegisterDexFile(const DexFile& dex_file)
       REQUIRES(!dex_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
   void RegisterDexFile(const DexFile& dex_file, Handle<mirror::DexCache> dex_cache)
       REQUIRES(!dex_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
@@ -309,9 +309,9 @@
   void VisitRoots(RootVisitor* visitor, VisitRootFlags flags)
       REQUIRES(!dex_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  mirror::DexCache* FindDexCache(const DexFile& dex_file)
-      REQUIRES(!dex_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
-  bool IsDexFileRegistered(const DexFile& dex_file)
+  mirror::DexCache* FindDexCache(Thread* self,
+                                 const DexFile& dex_file,
+                                 bool allow_failure = false)
       REQUIRES(!dex_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
   void FixupDexCaches(ArtMethod* resolution_method)
       REQUIRES(!dex_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
@@ -471,7 +471,7 @@
 
   // Used by image writer for checking.
   bool ClassInClassTable(mirror::Class* klass)
-      REQUIRES(!Locks::classlinker_classes_lock_)
+      REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* CreateRuntimeMethod();
@@ -561,8 +561,11 @@
 
   void RegisterDexFileLocked(const DexFile& dex_file, Handle<mirror::DexCache> dex_cache)
       REQUIRES(dex_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
-  bool IsDexFileRegisteredLocked(const DexFile& dex_file)
-      SHARED_REQUIRES(dex_lock_, Locks::mutator_lock_);
+  mirror::DexCache* FindDexCacheLocked(Thread* self,
+                                       const DexFile& dex_file,
+                                       bool allow_failure)
+      REQUIRES(dex_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool InitializeClass(Thread* self, Handle<mirror::Class> klass, bool can_run_clinit,
                        bool can_init_parents)
@@ -631,7 +634,9 @@
   size_t GetDexCacheCount() SHARED_REQUIRES(Locks::mutator_lock_, dex_lock_) {
     return dex_caches_.size();
   }
-  mirror::DexCache* GetDexCache(size_t idx) SHARED_REQUIRES(Locks::mutator_lock_, dex_lock_);
+  const std::list<jobject>& GetDexCaches() SHARED_REQUIRES(Locks::mutator_lock_, dex_lock_) {
+    return dex_caches_;
+  }
 
   const OatFile* FindOpenedOatFileFromOatLocation(const std::string& oat_location)
       REQUIRES(!dex_lock_);
@@ -702,8 +707,9 @@
   std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
 
   mutable ReaderWriterMutex dex_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  std::vector<size_t> new_dex_cache_roots_ GUARDED_BY(dex_lock_);
-  std::vector<GcRoot<mirror::DexCache>> dex_caches_ GUARDED_BY(dex_lock_);
+  // JNI weak globals to allow dex caches to get unloaded. We lazily delete weak globals when we
+  // register new dex files.
+  std::list<jobject> dex_caches_ GUARDED_BY(dex_lock_);
   std::vector<const OatFile*> oat_files_ GUARDED_BY(dex_lock_);
 
   // This contains the class laoders which have class tables. It is populated by
@@ -736,7 +742,6 @@
   size_t find_array_class_cache_next_victim_;
 
   bool init_done_;
-  bool log_new_dex_caches_roots_ GUARDED_BY(dex_lock_);
   bool log_new_class_table_roots_ GUARDED_BY(Locks::classlinker_classes_lock_);
 
   InternTable* intern_table_;
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 0d1c875..c3191fa 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -355,7 +355,7 @@
     TestRootVisitor visitor;
     class_linker_->VisitRoots(&visitor, kVisitRootFlagAllRoots);
     // Verify the dex cache has resolution methods in all resolved method slots
-    mirror::DexCache* dex_cache = class_linker_->FindDexCache(dex);
+    mirror::DexCache* dex_cache = class_linker_->FindDexCache(Thread::Current(), dex);
     auto* resolved_methods = dex_cache->GetResolvedMethods();
     for (size_t i = 0; i < static_cast<size_t>(resolved_methods->GetLength()); i++) {
       EXPECT_TRUE(resolved_methods->GetElementPtrSize<ArtMethod*>(i, sizeof(void*)) != nullptr)
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 5f9e413..56c5d1a 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -551,7 +551,8 @@
   }
 
   Thread* self = Thread::Current();
-  jobject class_loader = Runtime::Current()->GetClassLinker()->CreatePathClassLoader(self,                                                                                   class_path);
+  jobject class_loader = Runtime::Current()->GetClassLinker()->CreatePathClassLoader(self,
+                                                                                     class_path);
   self->SetClassLoaderOverride(class_loader);
   return class_loader;
 }
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 67099d7..8d34f5a 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -1191,6 +1191,10 @@
       if (error != JDWP::ERR_NONE) {
         return error;
       }
+      // Check if the object's type is compatible with the array's type.
+      if (o != nullptr && !o->InstanceOf(oa->GetClass()->GetComponentType())) {
+        return JDWP::ERR_TYPE_MISMATCH;
+      }
       oa->Set<false>(offset + i, o);
     }
   }
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 213f25d..5526883 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -1132,7 +1132,7 @@
   return ProcessAnnotationSet(field_class, annotation_set, kDexVisibilityRuntime);
 }
 
-mirror::ObjectArray<mirror::Object>* DexFile::GetSignatureAnnotationForField(ArtField* field)
+mirror::ObjectArray<mirror::String>* DexFile::GetSignatureAnnotationForField(ArtField* field)
     const {
   const AnnotationSetItem* annotation_set = FindAnnotationSetForField(field);
   if (annotation_set == nullptr) {
@@ -1253,7 +1253,7 @@
   return ProcessAnnotationSet(method_class, annotation_set, kDexVisibilityRuntime);
 }
 
-mirror::ObjectArray<mirror::Object>* DexFile::GetExceptionTypesForMethod(ArtMethod* method) const {
+mirror::ObjectArray<mirror::Class>* DexFile::GetExceptionTypesForMethod(ArtMethod* method) const {
   const AnnotationSetItem* annotation_set = FindAnnotationSetForMethod(method);
   if (annotation_set == nullptr) {
     return nullptr;
@@ -1289,7 +1289,7 @@
   Handle<mirror::Class> method_class(hs.NewHandle(method->GetDeclaringClass()));
   const AnnotationItem* annotation_item = GetAnnotationItemFromAnnotationSet(
       method_class, annotation_set, kDexVisibilityRuntime, annotation_class);
-  return (annotation_item != nullptr);
+  return annotation_item != nullptr;
 }
 
 const DexFile::AnnotationSetItem* DexFile::FindAnnotationSetForClass(Handle<mirror::Class> klass)
@@ -1317,6 +1317,153 @@
   return ProcessAnnotationSet(klass, annotation_set, kDexVisibilityRuntime);
 }
 
+mirror::ObjectArray<mirror::Class>* DexFile::GetDeclaredClasses(Handle<mirror::Class> klass) const {
+  const AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  if (annotation_set == nullptr) {
+    return nullptr;
+  }
+  const AnnotationItem* annotation_item = SearchAnnotationSet(
+      annotation_set, "Ldalvik/annotation/MemberClasses;", kDexVisibilitySystem);
+  if (annotation_item == nullptr) {
+    return nullptr;
+  }
+  StackHandleScope<1> hs(Thread::Current());
+  mirror::Class* class_class = mirror::Class::GetJavaLangClass();
+  Handle<mirror::Class> class_array_class(hs.NewHandle(
+      Runtime::Current()->GetClassLinker()->FindArrayClass(hs.Self(), &class_class)));
+  if (class_array_class.Get() == nullptr) {
+    return nullptr;
+  }
+  mirror::Object* obj = GetAnnotationValue(
+      klass, annotation_item, "value", class_array_class, kDexAnnotationArray);
+  if (obj == nullptr) {
+    return nullptr;
+  }
+  return obj->AsObjectArray<mirror::Class>();
+}
+
+mirror::Class* DexFile::GetDeclaringClass(Handle<mirror::Class> klass) const {
+  const AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  if (annotation_set == nullptr) {
+    return nullptr;
+  }
+  const AnnotationItem* annotation_item = SearchAnnotationSet(
+      annotation_set, "Ldalvik/annotation/EnclosingClass;", kDexVisibilitySystem);
+  if (annotation_item == nullptr) {
+    return nullptr;
+  }
+  mirror::Object* obj = GetAnnotationValue(
+      klass, annotation_item, "value", NullHandle<mirror::Class>(), kDexAnnotationType);
+  if (obj == nullptr) {
+    return nullptr;
+  }
+  return obj->AsClass();
+}
+
+mirror::Class* DexFile::GetEnclosingClass(Handle<mirror::Class> klass) const {
+  mirror::Class* declaring_class = GetDeclaringClass(klass);
+  if (declaring_class != nullptr) {
+    return declaring_class;
+  }
+  const AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  if (annotation_set == nullptr) {
+    return nullptr;
+  }
+  const AnnotationItem* annotation_item = SearchAnnotationSet(
+      annotation_set, "Ldalvik/annotation/EnclosingMethod;", kDexVisibilitySystem);
+  if (annotation_item == nullptr) {
+    return nullptr;
+  }
+  const uint8_t* annotation = SearchEncodedAnnotation(annotation_item->annotation_, "value");
+  if (annotation == nullptr) {
+    return nullptr;
+  }
+  AnnotationValue annotation_value;
+  if (!ProcessAnnotationValue(
+      klass, &annotation, &annotation_value, NullHandle<mirror::Class>(), kAllRaw)) {
+    return nullptr;
+  }
+  if (annotation_value.type_ != kDexAnnotationMethod) {
+    return nullptr;
+  }
+  StackHandleScope<2> hs(Thread::Current());
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(klass->GetDexCache()));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader()));
+  ArtMethod* method = Runtime::Current()->GetClassLinker()->ResolveMethodWithoutInvokeType(
+      klass->GetDexFile(), annotation_value.value_.GetI(), dex_cache, class_loader);
+  if (method == nullptr) {
+    return nullptr;
+  }
+  return method->GetDeclaringClass();
+}
+
+mirror::Object* DexFile::GetEnclosingMethod(Handle<mirror::Class> klass) const {
+  const AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  if (annotation_set == nullptr) {
+    return nullptr;
+  }
+  const AnnotationItem* annotation_item = SearchAnnotationSet(
+      annotation_set, "Ldalvik/annotation/EnclosingMethod;", kDexVisibilitySystem);
+  if (annotation_item == nullptr) {
+    return nullptr;
+  }
+  return GetAnnotationValue(
+      klass, annotation_item, "value", NullHandle<mirror::Class>(), kDexAnnotationMethod);
+}
+
+bool DexFile::GetInnerClass(Handle<mirror::Class> klass, mirror::String** name) const {
+  const AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  if (annotation_set == nullptr) {
+    return false;
+  }
+  const AnnotationItem* annotation_item = SearchAnnotationSet(
+      annotation_set, "Ldalvik/annotation/InnerClass;", kDexVisibilitySystem);
+  if (annotation_item == nullptr) {
+    return false;
+  }
+  const uint8_t* annotation = SearchEncodedAnnotation(annotation_item->annotation_, "name");
+  if (annotation == nullptr) {
+    return false;
+  }
+  AnnotationValue annotation_value;
+  if (!ProcessAnnotationValue(
+      klass, &annotation, &annotation_value, NullHandle<mirror::Class>(), kAllObjects)) {
+    return false;
+  }
+  if (annotation_value.type_ != kDexAnnotationNull &&
+      annotation_value.type_ != kDexAnnotationString) {
+    return false;
+  }
+  *name = down_cast<mirror::String*>(annotation_value.value_.GetL());
+  return true;
+}
+
+bool DexFile::GetInnerClassFlags(Handle<mirror::Class> klass, uint32_t* flags) const {
+  const AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  if (annotation_set == nullptr) {
+    return false;
+  }
+  const AnnotationItem* annotation_item = SearchAnnotationSet(
+      annotation_set, "Ldalvik/annotation/InnerClass;", kDexVisibilitySystem);
+  if (annotation_item == nullptr) {
+    return false;
+  }
+  const uint8_t* annotation = SearchEncodedAnnotation(annotation_item->annotation_, "accessFlags");
+  if (annotation == nullptr) {
+    return false;
+  }
+  AnnotationValue annotation_value;
+  if (!ProcessAnnotationValue(
+      klass, &annotation, &annotation_value, NullHandle<mirror::Class>(), kAllRaw)) {
+    return false;
+  }
+  if (annotation_value.type_ != kDexAnnotationInt) {
+    return false;
+  }
+  *flags = annotation_value.value_.GetI();
+  return true;
+}
+
 bool DexFile::IsClassAnnotationPresent(Handle<mirror::Class> klass,
                                        Handle<mirror::Class> annotation_class) const {
   const AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
@@ -1325,7 +1472,7 @@
   }
   const AnnotationItem* annotation_item = GetAnnotationItemFromAnnotationSet(
       klass, annotation_set, kDexVisibilityRuntime, annotation_class);
-  return (annotation_item != nullptr);
+  return annotation_item != nullptr;
 }
 
 mirror::Object* DexFile::CreateAnnotationMember(Handle<mirror::Class> klass,
@@ -1440,7 +1587,7 @@
   return annotation_value.value_.GetL();
 }
 
-mirror::ObjectArray<mirror::Object>* DexFile::GetSignatureValue(Handle<mirror::Class> klass,
+mirror::ObjectArray<mirror::String>* DexFile::GetSignatureValue(Handle<mirror::Class> klass,
     const AnnotationSetItem* annotation_set) const {
   StackHandleScope<1> hs(Thread::Current());
   const AnnotationItem* annotation_item =
@@ -1451,15 +1598,18 @@
   mirror::Class* string_class = mirror::String::GetJavaLangString();
   Handle<mirror::Class> string_array_class(hs.NewHandle(
       Runtime::Current()->GetClassLinker()->FindArrayClass(Thread::Current(), &string_class)));
+  if (string_array_class.Get() == nullptr) {
+    return nullptr;
+  }
   mirror::Object* obj =
       GetAnnotationValue(klass, annotation_item, "value", string_array_class, kDexAnnotationArray);
   if (obj == nullptr) {
     return nullptr;
   }
-  return obj->AsObjectArray<mirror::Object>();
+  return obj->AsObjectArray<mirror::String>();
 }
 
-mirror::ObjectArray<mirror::Object>* DexFile::GetThrowsValue(Handle<mirror::Class> klass,
+mirror::ObjectArray<mirror::Class>* DexFile::GetThrowsValue(Handle<mirror::Class> klass,
     const AnnotationSetItem* annotation_set) const {
   StackHandleScope<1> hs(Thread::Current());
   const AnnotationItem* annotation_item =
@@ -1470,12 +1620,15 @@
   mirror::Class* class_class = mirror::Class::GetJavaLangClass();
   Handle<mirror::Class> class_array_class(hs.NewHandle(
       Runtime::Current()->GetClassLinker()->FindArrayClass(Thread::Current(), &class_class)));
+  if (class_array_class.Get() == nullptr) {
+    return nullptr;
+  }
   mirror::Object* obj =
       GetAnnotationValue(klass, annotation_item, "value", class_array_class, kDexAnnotationArray);
   if (obj == nullptr) {
     return nullptr;
   }
-  return obj->AsObjectArray<mirror::Object>();
+  return obj->AsObjectArray<mirror::Class>();
 }
 
 mirror::ObjectArray<mirror::Object>* DexFile::ProcessAnnotationSet(Handle<mirror::Class> klass,
@@ -1507,6 +1660,8 @@
     if (annotation_obj != nullptr) {
       result->SetWithoutChecks<false>(dest_index, annotation_obj);
       ++dest_index;
+    } else if (self->IsExceptionPending()) {
+      return nullptr;
     }
   }
 
@@ -1516,6 +1671,10 @@
 
   mirror::ObjectArray<mirror::Object>* trimmed_result =
       mirror::ObjectArray<mirror::Object>::Alloc(self, annotation_array_class.Get(), dest_index);
+  if (trimmed_result == nullptr) {
+    return nullptr;
+  }
+
   for (uint32_t i = 0; i < dest_index; ++i) {
     mirror::Object* obj = result->GetWithoutChecks(i);
     trimmed_result->SetWithoutChecks<false>(i, obj);
@@ -1533,6 +1692,9 @@
       soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_annotation_Annotation__array);
   mirror::Class* annotation_array_array_class =
       Runtime::Current()->GetClassLinker()->FindArrayClass(self, &annotation_array_class);
+  if (annotation_array_array_class == nullptr) {
+    return nullptr;
+  }
   Handle<mirror::ObjectArray<mirror::Object>> annotation_array_array(hs.NewHandle(
       mirror::ObjectArray<mirror::Object>::Alloc(self, annotation_array_array_class, size)));
   if (annotation_array_array.Get() == nullptr) {
@@ -1625,9 +1787,15 @@
             klass->GetDexFile(), index, klass.Get());
         set_object = true;
         if (element_object == nullptr) {
-          self->ClearException();
-          const char* msg = StringByTypeIdx(index);
-          self->ThrowNewException("Ljava/lang/TypeNotPresentException;", msg);
+          CHECK(self->IsExceptionPending());
+          if (result_style == kAllObjects) {
+            const char* msg = StringByTypeIdx(index);
+            self->ThrowNewWrappedException("Ljava/lang/TypeNotPresentException;", msg);
+            element_object = self->GetException();
+            self->ClearException();
+          } else {
+            return false;
+          }
         }
       }
       break;
@@ -1831,8 +1999,10 @@
       soa.Decode<mirror::Class*>(WellKnownClasses::libcore_reflect_AnnotationMember);
   mirror::Class* annotation_member_array_class =
       class_linker->FindArrayClass(self, &annotation_member_class);
+  if (annotation_member_array_class == nullptr) {
+    return nullptr;
+  }
   mirror::ObjectArray<mirror::Object>* element_array = nullptr;
-
   if (size > 0) {
     element_array =
         mirror::ObjectArray<mirror::Object>::Alloc(self, annotation_member_array_class, size);
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 8928321..98d4e59 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -931,7 +931,7 @@
       const SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::ObjectArray<mirror::Object>* GetAnnotationsForField(ArtField* field) const
       SHARED_REQUIRES(Locks::mutator_lock_);
-  mirror::ObjectArray<mirror::Object>* GetSignatureAnnotationForField(ArtField* field) const
+  mirror::ObjectArray<mirror::String>* GetSignatureAnnotationForField(ArtField* field) const
       SHARED_REQUIRES(Locks::mutator_lock_);
   bool IsFieldAnnotationPresent(ArtField* field, Handle<mirror::Class> annotation_class) const
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -946,7 +946,7 @@
       const SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::ObjectArray<mirror::Object>* GetAnnotationsForMethod(ArtMethod* method) const
       SHARED_REQUIRES(Locks::mutator_lock_);
-  mirror::ObjectArray<mirror::Object>* GetExceptionTypesForMethod(ArtMethod* method) const
+  mirror::ObjectArray<mirror::Class>* GetExceptionTypesForMethod(ArtMethod* method) const
       SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::ObjectArray<mirror::Object>* GetParameterAnnotations(ArtMethod* method) const
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -960,6 +960,18 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::ObjectArray<mirror::Object>* GetAnnotationsForClass(Handle<mirror::Class> klass) const
       SHARED_REQUIRES(Locks::mutator_lock_);
+  mirror::ObjectArray<mirror::Class>* GetDeclaredClasses(Handle<mirror::Class> klass) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  mirror::Class* GetDeclaringClass(Handle<mirror::Class> klass) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  mirror::Class* GetEnclosingClass(Handle<mirror::Class> klass) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  mirror::Object* GetEnclosingMethod(Handle<mirror::Class> klass) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  bool GetInnerClass(Handle<mirror::Class> klass, mirror::String** name) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  bool GetInnerClassFlags(Handle<mirror::Class> klass, uint32_t* flags) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
   bool IsClassAnnotationPresent(Handle<mirror::Class> klass, Handle<mirror::Class> annotation_class)
       const SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -983,11 +995,11 @@
                                      Handle<mirror::Class> array_class,
                                      uint32_t expected_type) const
       SHARED_REQUIRES(Locks::mutator_lock_);
-  mirror::ObjectArray<mirror::Object>* GetSignatureValue(Handle<mirror::Class> klass,
+  mirror::ObjectArray<mirror::String>* GetSignatureValue(Handle<mirror::Class> klass,
                                                          const AnnotationSetItem* annotation_set)
       const SHARED_REQUIRES(Locks::mutator_lock_);
-  mirror::ObjectArray<mirror::Object>* GetThrowsValue(Handle<mirror::Class> klass,
-                                                      const AnnotationSetItem* annotation_set) const
+  mirror::ObjectArray<mirror::Class>* GetThrowsValue(Handle<mirror::Class> klass,
+                                                     const AnnotationSetItem* annotation_set) const
       SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::ObjectArray<mirror::Object>* ProcessAnnotationSet(Handle<mirror::Class> klass,
                                                             const AnnotationSetItem* annotation_set,
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index a4feac1..d749664 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -28,17 +28,30 @@
 
 namespace art {
 
-extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
-  ScopedQuickEntrypointChecks sqec(self);
-
+NO_RETURN static void artDeoptimizeImpl(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
   if (VLOG_IS_ON(deopt)) {
     LOG(INFO) << "Deopting:";
     self->Dump(LOG(INFO));
   }
 
-  self->PushAndClearDeoptimizationReturnValue();
+  self->AssertHasDeoptimizationContext();
   self->SetException(Thread::GetDeoptimizationException());
   self->QuickDeliverException();
 }
 
+extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
+  artDeoptimizeImpl(self);
+}
+
+extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
+  // Before deoptimizing to interpreter, we must push the deoptimization context.
+  JValue return_value;
+  return_value.SetJ(0);  // we never deoptimize from compiled code with an invoke result.
+  self->PushDeoptimizationContext(return_value, false, self->GetException());
+  artDeoptimizeImpl(self);
+}
+
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
index ad5ee84..8e660a2 100644
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
@@ -51,6 +51,9 @@
                                                               uint64_t gpr_result,
                                                               uint64_t fpr_result)
     SHARED_REQUIRES(Locks::mutator_lock_) {
+  // Instrumentation exit stub must not be entered with a pending exception.
+  CHECK(!self->IsExceptionPending()) << "Enter instrumentation exit stub with pending exception "
+                                     << self->GetException()->Dump();
   // Compute address of return PC and sanity check that it currently holds 0.
   size_t return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kRefsOnly);
   uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(sp) +
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index aa35ec1..0c7caf3 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -688,8 +688,12 @@
     // Request a stack deoptimization if needed
     ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp);
     if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
+      // Push the context of the deoptimization stack so we can restore the return value and the
+      // exception before executing the deoptimized frames.
+      self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException());
+
+      // Set special exception to cause deoptimization.
       self->SetException(Thread::GetDeoptimizationException());
-      self->SetDeoptimizationReturnValue(result, shorty[0] == 'L');
     }
 
     // No need to restore the args since the method has already been run by the interpreter.
diff --git a/runtime/entrypoints/runtime_asm_entrypoints.h b/runtime/entrypoints/runtime_asm_entrypoints.h
index 8209dc8..2842c5a 100644
--- a/runtime/entrypoints/runtime_asm_entrypoints.h
+++ b/runtime/entrypoints/runtime_asm_entrypoints.h
@@ -70,7 +70,8 @@
   return reinterpret_cast<const void*>(art_quick_instrumentation_entry);
 }
 
-extern "C" void art_quick_deoptimize_from_compiled_slow_path();
+// Stub to deoptimize from compiled code.
+extern "C" void art_quick_deoptimize_from_compiled_code();
 
 // The return_pc of instrumentation exit stub.
 extern "C" void art_quick_instrumentation_exit();
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index f7a3cd5..7db8888 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -72,15 +72,12 @@
     EXPECT_OFFSET_DIFFP(Thread, tls32_, throwing_OutOfMemoryError, no_thread_suspension, 4);
     EXPECT_OFFSET_DIFFP(Thread, tls32_, no_thread_suspension, thread_exit_check_count, 4);
     EXPECT_OFFSET_DIFFP(Thread, tls32_, thread_exit_check_count, handling_signal_, 4);
-    EXPECT_OFFSET_DIFFP(Thread, tls32_, handling_signal_,
-                        deoptimization_return_value_is_reference, 4);
 
     // TODO: Better connection. Take alignment into account.
     EXPECT_OFFSET_DIFF_GT3(Thread, tls32_.thread_exit_check_count, tls64_.trace_clock_base, 4,
                            thread_tls32_to_tls64);
 
-    EXPECT_OFFSET_DIFFP(Thread, tls64_, trace_clock_base, deoptimization_return_value, 8);
-    EXPECT_OFFSET_DIFFP(Thread, tls64_, deoptimization_return_value, stats, 8);
+    EXPECT_OFFSET_DIFFP(Thread, tls64_, trace_clock_base, stats, 8);
 
     // TODO: Better connection. Take alignment into account.
     EXPECT_OFFSET_DIFF_GT3(Thread, tls64_.stats, tlsPtr_.card_table, 8, thread_tls64_to_tlsptr);
@@ -108,8 +105,8 @@
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, single_step_control, stacked_shadow_frame_record,
                         sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, stacked_shadow_frame_record,
-                        deoptimization_return_value_stack, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, deoptimization_return_value_stack, name, sizeof(void*));
+                        deoptimization_context_stack, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, deoptimization_context_stack, name, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, name, pthread_self, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, pthread_self, last_no_thread_suspension_cause,
                         sizeof(void*));
diff --git a/runtime/exception_test.cc b/runtime/exception_test.cc
index de4b3f4..33d756e 100644
--- a/runtime/exception_test.cc
+++ b/runtime/exception_test.cc
@@ -108,8 +108,8 @@
   const DexFile* dex_;
 
   std::vector<uint8_t> fake_code_;
-  Leb128EncodingVector fake_mapping_data_;
-  Leb128EncodingVector fake_vmap_table_data_;
+  Leb128EncodingVector<> fake_mapping_data_;
+  Leb128EncodingVector<> fake_vmap_table_data_;
   std::vector<uint8_t> fake_gc_map_;
   std::vector<uint8_t> fake_header_code_and_maps_;
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index aec8d63..4bc44d3 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1900,6 +1900,11 @@
   CollectGarbageInternal(gc_plan_.back(), kGcCauseExplicit, clear_soft_references);
 }
 
+bool Heap::SupportHomogeneousSpaceCompactAndCollectorTransitions() const {
+  return main_space_backup_.get() != nullptr && main_space_ != nullptr &&
+      foreground_collector_type_ == kCollectorTypeCMS;
+}
+
 HomogeneousSpaceCompactResult Heap::PerformHomogeneousSpaceCompact() {
   Thread* self = Thread::Current();
   // Inc requested homogeneous space compaction.
@@ -1919,7 +1924,10 @@
     // exit.
     if (disable_moving_gc_count_ != 0 || IsMovingGc(collector_type_) ||
         !main_space_->CanMoveObjects()) {
-      return HomogeneousSpaceCompactResult::kErrorReject;
+      return kErrorReject;
+    }
+    if (!SupportHomogeneousSpaceCompactAndCollectorTransitions()) {
+      return kErrorUnsupported;
     }
     collector_type_running_ = kCollectorTypeHomogeneousSpaceCompact;
   }
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 85688ae..8bffe5e 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -105,6 +105,8 @@
   kSuccess,
   // Reject due to disabled moving GC.
   kErrorReject,
+  // Unsupported due to the current configuration.
+  kErrorUnsupported,
   // System is shutting down.
   kErrorVMShuttingDown,
 };
@@ -753,6 +755,10 @@
 
   void DisableGCForShutdown() REQUIRES(!*gc_complete_lock_);
 
+  // Create a new alloc space and compact default alloc space to it.
+  HomogeneousSpaceCompactResult PerformHomogeneousSpaceCompact() REQUIRES(!*gc_complete_lock_);
+  bool SupportHomogeneousSpaceCompactAndCollectorTransitions() const;
+
  private:
   class ConcurrentGCTask;
   class CollectorTransitionTask;
@@ -905,9 +911,6 @@
   // Find a collector based on GC type.
   collector::GarbageCollector* FindCollectorByGcType(collector::GcType gc_type);
 
-  // Create a new alloc space and compact default alloc space to it.
-  HomogeneousSpaceCompactResult PerformHomogeneousSpaceCompact() REQUIRES(!*gc_complete_lock_);
-
   // Create the main free list malloc space, either a RosAlloc space or DlMalloc space.
   void CreateMainMallocSpace(MemMap* mem_map,
                              size_t initial_size,
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index ee6b020..e2094dc 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -765,8 +765,9 @@
       okay = !file_output.Errors();
 
       if (okay) {
-        // Check for expected size.
-        CHECK_EQ(file_output.SumLength(), overall_size);
+        // Check for expected size. Output is expected to be less-or-equal than first phase, see
+        // b/23521263.
+        DCHECK_LE(file_output.SumLength(), overall_size);
       }
       output_ = nullptr;
     }
@@ -810,8 +811,8 @@
     // Write the dump.
     ProcessHeap(true);
 
-    // Check for expected size.
-    CHECK_EQ(net_output.SumLength(), overall_size + kChunkHeaderSize);
+    // Check for expected size. See DumpToFile for comment.
+    DCHECK_LE(net_output.SumLength(), overall_size + kChunkHeaderSize);
     output_ = nullptr;
 
     return true;
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index c398555..d13526b 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -290,9 +290,7 @@
 
   // Synchronized get which reads a reference, acquiring a lock if necessary.
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  mirror::Object* SynchronizedGet(Thread* /*self*/, ReaderWriterMutex* /*mutex*/,
-                                  IndirectRef iref) const
-      SHARED_REQUIRES(Locks::mutator_lock_) {
+  mirror::Object* SynchronizedGet(IndirectRef iref) const SHARED_REQUIRES(Locks::mutator_lock_) {
     return Get<kReadBarrierOption>(iref);
   }
 
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index e28d578..63c02ed 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -1016,7 +1016,8 @@
                                 PrettyMethod(method).c_str(),
                                 return_value.GetJ()) << *self;
     }
-    self->SetDeoptimizationReturnValue(return_value, return_shorty == 'L');
+    self->PushDeoptimizationContext(return_value, return_shorty == 'L',
+                                    nullptr /* no pending exception */);
     return GetTwoWordSuccessValue(*return_pc,
                                   reinterpret_cast<uintptr_t>(GetQuickDeoptimizationEntryPoint()));
   } else {
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index 9d41018..8060e3d 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -373,9 +373,9 @@
       globals_(gGlobalsInitial, gGlobalsMax, kGlobal),
       libraries_(new Libraries),
       unchecked_functions_(&gJniInvokeInterface),
-      weak_globals_lock_("JNI weak global reference table lock"),
+      weak_globals_lock_("JNI weak global reference table lock", kJniWeakGlobalsLock),
       weak_globals_(kWeakGlobalsInitial, kWeakGlobalsMax, kWeakGlobal),
-      allow_new_weak_globals_(true),
+      allow_accessing_weak_globals_(true),
       weak_globals_add_condition_("weak globals add condition", weak_globals_lock_) {
   functions = unchecked_functions_;
   SetCheckJniEnabled(runtime_options.Exists(RuntimeArgumentMap::CheckJni));
@@ -473,8 +473,7 @@
     return nullptr;
   }
   MutexLock mu(self, weak_globals_lock_);
-  while (UNLIKELY((!kUseReadBarrier && !allow_new_weak_globals_) ||
-                  (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) {
+  while (UNLIKELY(!MayAccessWeakGlobals(self))) {
     weak_globals_add_condition_.WaitHoldingLocks(self);
   }
   IndirectRef ref = weak_globals_.Add(IRT_FIRST_SEGMENT, obj);
@@ -542,14 +541,19 @@
 }
 
 void JavaVMExt::DisallowNewWeakGlobals() {
-  MutexLock mu(Thread::Current(), weak_globals_lock_);
-  allow_new_weak_globals_ = false;
+  Thread* const self = Thread::Current();
+  MutexLock mu(self, weak_globals_lock_);
+  // DisallowNewWeakGlobals is only called by CMS during the pause. It is required to have the
+  // mutator lock exclusively held so that we don't have any threads in the middle of
+  // DecodeWeakGlobal.
+  Locks::mutator_lock_->AssertExclusiveHeld(self);
+  allow_accessing_weak_globals_.StoreSequentiallyConsistent(false);
 }
 
 void JavaVMExt::AllowNewWeakGlobals() {
   Thread* self = Thread::Current();
   MutexLock mu(self, weak_globals_lock_);
-  allow_new_weak_globals_ = true;
+  allow_accessing_weak_globals_.StoreSequentiallyConsistent(true);
   weak_globals_add_condition_.Broadcast(self);
 }
 
@@ -557,7 +561,7 @@
   // Lock and unlock once to ensure that no threads are still in the
   // middle of adding new weak globals.
   MutexLock mu(Thread::Current(), weak_globals_lock_);
-  CHECK(!allow_new_weak_globals_);
+  CHECK(!allow_accessing_weak_globals_.LoadSequentiallyConsistent());
 }
 
 void JavaVMExt::BroadcastForNewWeakGlobals() {
@@ -567,8 +571,8 @@
   weak_globals_add_condition_.Broadcast(self);
 }
 
-mirror::Object* JavaVMExt::DecodeGlobal(Thread* self, IndirectRef ref) {
-  return globals_.SynchronizedGet(self, &globals_lock_, ref);
+mirror::Object* JavaVMExt::DecodeGlobal(IndirectRef ref) {
+  return globals_.SynchronizedGet(ref);
 }
 
 void JavaVMExt::UpdateGlobal(Thread* self, IndirectRef ref, mirror::Object* result) {
@@ -576,10 +580,34 @@
   globals_.Update(ref, result);
 }
 
+inline bool JavaVMExt::MayAccessWeakGlobals(Thread* self) const {
+  return MayAccessWeakGlobalsUnlocked(self);
+}
+
+inline bool JavaVMExt::MayAccessWeakGlobalsUnlocked(Thread* self) const {
+  return kUseReadBarrier ? self->GetWeakRefAccessEnabled() :
+      allow_accessing_weak_globals_.LoadSequentiallyConsistent();
+}
+
 mirror::Object* JavaVMExt::DecodeWeakGlobal(Thread* self, IndirectRef ref) {
+  // It is safe to access GetWeakRefAccessEnabled without the lock since CC uses checkpoints to call
+  // SetWeakRefAccessEnabled, and the other collectors only modify allow_accessing_weak_globals_
+  // when the mutators are paused.
+  // This only applies in the case where MayAccessWeakGlobals goes from false to true. In the other
+  // case, it may be racy, this is benign since DecodeWeakGlobalLocked does the correct behavior
+  // if MayAccessWeakGlobals is false.
+  if (LIKELY(MayAccessWeakGlobalsUnlocked(self))) {
+    return weak_globals_.SynchronizedGet(ref);
+  }
   MutexLock mu(self, weak_globals_lock_);
-  while (UNLIKELY((!kUseReadBarrier && !allow_new_weak_globals_) ||
-                  (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) {
+  return DecodeWeakGlobalLocked(self, ref);
+}
+
+mirror::Object* JavaVMExt::DecodeWeakGlobalLocked(Thread* self, IndirectRef ref) {
+  if (kDebugLocking) {
+    weak_globals_lock_.AssertHeld(self);
+  }
+  while (UNLIKELY(!MayAccessWeakGlobals(self))) {
     weak_globals_add_condition_.WaitHoldingLocks(self);
   }
   return weak_globals_.Get(ref);
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index d70fc47..d68a85f 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -126,14 +126,23 @@
   void SweepJniWeakGlobals(IsMarkedVisitor* visitor)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!weak_globals_lock_);
 
-  mirror::Object* DecodeGlobal(Thread* self, IndirectRef ref)
+  mirror::Object* DecodeGlobal(IndirectRef ref)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void UpdateGlobal(Thread* self, IndirectRef ref, mirror::Object* result)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!globals_lock_);
 
   mirror::Object* DecodeWeakGlobal(Thread* self, IndirectRef ref)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!weak_globals_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!weak_globals_lock_);
+
+  mirror::Object* DecodeWeakGlobalLocked(Thread* self, IndirectRef ref)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(weak_globals_lock_);
+
+  Mutex& WeakGlobalsLock() RETURN_CAPABILITY(weak_globals_lock_) {
+    return weak_globals_lock_;
+  }
 
   void UpdateWeakGlobal(Thread* self, IndirectRef ref, mirror::Object* result)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!weak_globals_lock_);
@@ -146,6 +155,12 @@
       REQUIRES(!globals_lock_);
 
  private:
+  // Return true if self can currently access weak globals.
+  bool MayAccessWeakGlobalsUnlocked(Thread* self) const SHARED_REQUIRES(Locks::mutator_lock_);
+  bool MayAccessWeakGlobals(Thread* self) const
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(weak_globals_lock_);
+
   Runtime* const runtime_;
 
   // Used for testing. By default, we'll LOG(FATAL) the reason.
@@ -175,8 +190,10 @@
   // Since weak_globals_ contain weak roots, be careful not to
   // directly access the object references in it. Use Get() with the
   // read barrier enabled.
-  IndirectReferenceTable weak_globals_ GUARDED_BY(weak_globals_lock_);
-  bool allow_new_weak_globals_ GUARDED_BY(weak_globals_lock_);
+  // Not guarded by weak_globals_lock since we may use SynchronizedGet in DecodeWeakGlobal.
+  IndirectReferenceTable weak_globals_;
+  // Not guarded by weak_globals_lock since we may use SynchronizedGet in DecodeWeakGlobal.
+  Atomic<bool> allow_accessing_weak_globals_;
   ConditionVariable weak_globals_add_condition_ GUARDED_BY(weak_globals_lock_);
 
   DISALLOW_COPY_AND_ASSIGN(JavaVMExt);
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index 5a9a0f5..1139a1e 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -126,9 +126,12 @@
  * Write a packet of "length" bytes. Grabs a mutex to assure atomicity.
  */
 ssize_t JdwpNetStateBase::WritePacket(ExpandBuf* pReply, size_t length) {
-  MutexLock mu(Thread::Current(), socket_lock_);
-  DCHECK(IsConnected()) << "Connection with debugger is closed";
   DCHECK_LE(length, expandBufGetLength(pReply));
+  if (!IsConnected()) {
+    LOG(WARNING) << "Connection with debugger is closed";
+    return -1;
+  }
+  MutexLock mu(Thread::Current(), socket_lock_);
   return TEMP_FAILURE_RETRY(write(clientSock, expandBufGetBuffer(pReply), length));
 }
 
diff --git a/runtime/leb128.h b/runtime/leb128.h
index 976936d..baf9da2 100644
--- a/runtime/leb128.h
+++ b/runtime/leb128.h
@@ -178,10 +178,11 @@
   dest->push_back(out);
 }
 
-// An encoder that pushed uint32_t data onto the given std::vector.
+// An encoder that pushes int32_t/uint32_t data onto the given std::vector.
+template <typename Allocator = std::allocator<uint8_t>>
 class Leb128Encoder {
  public:
-  explicit Leb128Encoder(std::vector<uint8_t>* data) : data_(data) {
+  explicit Leb128Encoder(std::vector<uint8_t, Allocator>* data) : data_(data) {
     DCHECK(data != nullptr);
   }
 
@@ -211,22 +212,27 @@
     }
   }
 
-  const std::vector<uint8_t>& GetData() const {
+  const std::vector<uint8_t, Allocator>& GetData() const {
     return *data_;
   }
 
  protected:
-  std::vector<uint8_t>* const data_;
+  std::vector<uint8_t, Allocator>* const data_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(Leb128Encoder);
 };
 
 // An encoder with an API similar to vector<uint32_t> where the data is captured in ULEB128 format.
-class Leb128EncodingVector FINAL : private std::vector<uint8_t>, public Leb128Encoder {
+template <typename Allocator = std::allocator<uint8_t>>
+class Leb128EncodingVector FINAL : private std::vector<uint8_t, Allocator>,
+                                   public Leb128Encoder<Allocator> {
  public:
-  Leb128EncodingVector() : Leb128Encoder(this) {
-  }
+  Leb128EncodingVector() : Leb128Encoder<Allocator>(this) { }
+
+  explicit Leb128EncodingVector(const Allocator& alloc)
+    : std::vector<uint8_t, Allocator>(alloc),
+      Leb128Encoder<Allocator>(this) { }
 
  private:
   DISALLOW_COPY_AND_ASSIGN(Leb128EncodingVector);
diff --git a/runtime/leb128_test.cc b/runtime/leb128_test.cc
index 1bb493d..09f7ecc 100644
--- a/runtime/leb128_test.cc
+++ b/runtime/leb128_test.cc
@@ -94,7 +94,7 @@
 TEST(Leb128Test, UnsignedSinglesVector) {
   // Test individual encodings.
   for (size_t i = 0; i < arraysize(uleb128_tests); ++i) {
-    Leb128EncodingVector builder;
+    Leb128EncodingVector<> builder;
     builder.PushBackUnsigned(uleb128_tests[i].decoded);
     EXPECT_EQ(UnsignedLeb128Size(uleb128_tests[i].decoded), builder.GetData().size());
     const uint8_t* data_ptr = &uleb128_tests[i].leb128_data[0];
@@ -131,7 +131,7 @@
 
 TEST(Leb128Test, UnsignedStreamVector) {
   // Encode a number of entries.
-  Leb128EncodingVector builder;
+  Leb128EncodingVector<> builder;
   for (size_t i = 0; i < arraysize(uleb128_tests); ++i) {
     builder.PushBackUnsigned(uleb128_tests[i].decoded);
   }
@@ -175,7 +175,7 @@
 TEST(Leb128Test, SignedSinglesVector) {
   // Test individual encodings.
   for (size_t i = 0; i < arraysize(sleb128_tests); ++i) {
-    Leb128EncodingVector builder;
+    Leb128EncodingVector<> builder;
     builder.PushBackSigned(sleb128_tests[i].decoded);
     EXPECT_EQ(SignedLeb128Size(sleb128_tests[i].decoded), builder.GetData().size());
     const uint8_t* data_ptr = &sleb128_tests[i].leb128_data[0];
@@ -212,7 +212,7 @@
 
 TEST(Leb128Test, SignedStreamVector) {
   // Encode a number of entries.
-  Leb128EncodingVector builder;
+  Leb128EncodingVector<> builder;
   for (size_t i = 0; i < arraysize(sleb128_tests); ++i) {
     builder.PushBackSigned(sleb128_tests[i].decoded);
   }
@@ -275,7 +275,7 @@
 TEST(Leb128Test, Speed) {
   std::unique_ptr<Histogram<uint64_t>> enc_hist(new Histogram<uint64_t>("Leb128EncodeSpeedTest", 5));
   std::unique_ptr<Histogram<uint64_t>> dec_hist(new Histogram<uint64_t>("Leb128DecodeSpeedTest", 5));
-  Leb128EncodingVector builder;
+  Leb128EncodingVector<> builder;
   // Push back 1024 chunks of 1024 values measuring encoding speed.
   uint64_t last_time = NanoTime();
   for (size_t i = 0; i < 1024; i++) {
diff --git a/runtime/length_prefixed_array.h b/runtime/length_prefixed_array.h
index d9bc656..0ff6d7a 100644
--- a/runtime/length_prefixed_array.h
+++ b/runtime/length_prefixed_array.h
@@ -19,7 +19,6 @@
 
 #include <stddef.h>  // for offsetof()
 
-#include "linear_alloc.h"
 #include "stride_iterator.h"
 #include "base/bit_utils.h"
 #include "base/casts.h"
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index eda6c9b..28a830d 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -146,8 +146,8 @@
 inline size_t String::SizeOf() {
   size_t size = sizeof(String) + (sizeof(uint16_t) * GetLength<kVerifyFlags>());
   // String.equals() intrinsics assume zero-padding up to kObjectAlignment,
-  // so make sure the padding is actually zero-initialized if the allocator
-  // chooses to clear, or GC compaction chooses to copy, only SizeOf() bytes.
+  // so make sure the zero-padding is actually copied around if GC compaction
+  // chooses to copy only SizeOf() bytes.
   // http://b/23528461
   return RoundUp(size, kObjectAlignment);
 }
@@ -155,21 +155,35 @@
 template <bool kIsInstrumented, typename PreFenceVisitor>
 inline String* String::Alloc(Thread* self, int32_t utf16_length, gc::AllocatorType allocator_type,
                              const PreFenceVisitor& pre_fence_visitor) {
-  size_t header_size = sizeof(String);
-  size_t data_size = sizeof(uint16_t) * utf16_length;
+  constexpr size_t header_size = sizeof(String);
+  static_assert(sizeof(utf16_length) <= sizeof(size_t),
+                "static_cast<size_t>(utf16_length) must not lose bits.");
+  size_t length = static_cast<size_t>(utf16_length);
+  size_t data_size = sizeof(uint16_t) * length;
   size_t size = header_size + data_size;
+  // String.equals() intrinsics assume zero-padding up to kObjectAlignment,
+  // so make sure the allocator clears the padding as well.
+  // http://b/23528461
+  size_t alloc_size = RoundUp(size, kObjectAlignment);
   Class* string_class = GetJavaLangString();
 
   // Check for overflow and throw OutOfMemoryError if this was an unreasonable request.
-  if (UNLIKELY(size < data_size)) {
+  // Do this by comparing with the maximum length that will _not_ cause an overflow.
+  constexpr size_t overflow_length = (-header_size) / sizeof(uint16_t);  // Unsigned arithmetic.
+  constexpr size_t max_alloc_length = overflow_length - 1u;
+  static_assert(IsAligned<sizeof(uint16_t)>(kObjectAlignment),
+                "kObjectAlignment must be at least as big as Java char alignment");
+  constexpr size_t max_length = RoundDown(max_alloc_length, kObjectAlignment / sizeof(uint16_t));
+  if (UNLIKELY(length > max_length)) {
     self->ThrowOutOfMemoryError(StringPrintf("%s of length %d would overflow",
                                              PrettyDescriptor(string_class).c_str(),
                                              utf16_length).c_str());
     return nullptr;
   }
+
   gc::Heap* heap = Runtime::Current()->GetHeap();
   return down_cast<String*>(
-      heap->AllocObjectWithAllocator<kIsInstrumented, true>(self, string_class, size,
+      heap->AllocObjectWithAllocator<kIsInstrumented, true>(self, string_class, alloc_size,
                                                             allocator_type, pre_fence_visitor));
 }
 
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 4f97d20..3b84bfa 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -171,7 +171,7 @@
     if (array == nullptr) {
       ScopedObjectAccess soa(env);
       for (auto& dex_file : dex_files) {
-        if (Runtime::Current()->GetClassLinker()->IsDexFileRegistered(*dex_file)) {
+        if (linker->FindDexCache(soa.Self(), *dex_file, true) != nullptr) {
           dex_file.release();
         }
       }
@@ -208,8 +208,9 @@
   //
   // TODO: The Runtime should support unloading of classes and freeing of the
   // dex files for those unloaded classes rather than leaking dex files here.
-  for (auto& dex_file : *dex_files) {
-    if (!Runtime::Current()->GetClassLinker()->IsDexFileRegistered(*dex_file)) {
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  for (const DexFile* dex_file : *dex_files) {
+    if (class_linker->FindDexCache(soa.Self(), *dex_file, true) == nullptr) {
       delete dex_file;
     }
   }
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 9ea339a..4f95723 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -425,14 +425,17 @@
 static void PreloadDexCachesStatsFilled(DexCacheStats* filled)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   if (!kPreloadDexCachesCollectStats) {
-      return;
+    return;
   }
-  ClassLinker* linker = Runtime::Current()->GetClassLinker();
-  const std::vector<const DexFile*>& boot_class_path = linker->GetBootClassPath();
-  for (size_t i = 0; i< boot_class_path.size(); i++) {
-    const DexFile* dex_file = boot_class_path[i];
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  Thread* const self = Thread::Current();
+  for (const DexFile* dex_file : class_linker->GetBootClassPath()) {
     CHECK(dex_file != nullptr);
-    mirror::DexCache* dex_cache = linker->FindDexCache(*dex_file);
+    mirror::DexCache* const dex_cache = class_linker->FindDexCache(self, *dex_file, true);
+    // If dex cache was deallocated, just continue.
+    if (dex_cache == nullptr) {
+      continue;
+    }
     for (size_t j = 0; j < dex_cache->NumStrings(); j++) {
       mirror::String* string = dex_cache->GetResolvedString(j);
       if (string != nullptr) {
@@ -446,7 +449,7 @@
       }
     }
     for (size_t j = 0; j < dex_cache->NumResolvedFields(); j++) {
-      ArtField* field = linker->GetResolvedField(j, dex_cache);
+      ArtField* field = class_linker->GetResolvedField(j, dex_cache);
       if (field != nullptr) {
         filled->num_fields++;
       }
@@ -490,11 +493,11 @@
   }
 
   const std::vector<const DexFile*>& boot_class_path = linker->GetBootClassPath();
-  for (size_t i = 0; i< boot_class_path.size(); i++) {
+  for (size_t i = 0; i < boot_class_path.size(); i++) {
     const DexFile* dex_file = boot_class_path[i];
     CHECK(dex_file != nullptr);
     StackHandleScope<1> hs(soa.Self());
-    Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->FindDexCache(*dex_file)));
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->RegisterDexFile(*dex_file)));
 
     if (kPreloadDexCachesStrings) {
       for (size_t j = 0; j < dex_cache->NumStrings(); j++) {
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 7e464e9..8fd6849 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -430,7 +430,7 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<2> hs(soa.Self());
   Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
-  if (klass->IsProxyClass()) {
+  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
     return nullptr;
   }
   Handle<mirror::Class> annotation_class(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
@@ -442,7 +442,7 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
-  if (klass->IsProxyClass()) {
+  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
     // Return an empty array instead of a null pointer.
     mirror::Class* annotation_array_class =
         soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_annotation_Annotation__array);
@@ -453,18 +453,141 @@
   return soa.AddLocalReference<jobjectArray>(klass->GetDexFile().GetAnnotationsForClass(klass));
 }
 
+static jobjectArray Class_getDeclaredClasses(JNIEnv* env, jobject javaThis) {
+  ScopedFastNativeObjectAccess soa(env);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
+  mirror::ObjectArray<mirror::Class>* classes = nullptr;
+  if (!klass->IsProxyClass() && klass->GetDexCache() != nullptr) {
+    classes = klass->GetDexFile().GetDeclaredClasses(klass);
+  }
+  if (classes == nullptr) {
+    // Return an empty array instead of a null pointer.
+    mirror::Class* class_class = mirror::Class::GetJavaLangClass();
+    mirror::Class* class_array_class =
+        Runtime::Current()->GetClassLinker()->FindArrayClass(soa.Self(), &class_class);
+    if (class_array_class == nullptr) {
+      return nullptr;
+    }
+    mirror::ObjectArray<mirror::Class>* empty_array =
+        mirror::ObjectArray<mirror::Class>::Alloc(soa.Self(), class_array_class, 0);
+    return soa.AddLocalReference<jobjectArray>(empty_array);
+  }
+  return soa.AddLocalReference<jobjectArray>(classes);
+}
+
+static jclass Class_getEnclosingClass(JNIEnv* env, jobject javaThis) {
+  ScopedFastNativeObjectAccess soa(env);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
+  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+    return nullptr;
+  }
+  return soa.AddLocalReference<jclass>(klass->GetDexFile().GetEnclosingClass(klass));
+}
+
+static jobject Class_getEnclosingConstructorNative(JNIEnv* env, jobject javaThis) {
+  ScopedFastNativeObjectAccess soa(env);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
+  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+    return nullptr;
+  }
+  mirror::Object* method = klass->GetDexFile().GetEnclosingMethod(klass);
+  if (method != nullptr) {
+    if (method->GetClass() ==
+        soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_reflect_Constructor)) {
+      return soa.AddLocalReference<jobject>(method);
+    }
+  }
+  return nullptr;
+}
+
+static jobject Class_getEnclosingMethodNative(JNIEnv* env, jobject javaThis) {
+  ScopedFastNativeObjectAccess soa(env);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
+  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+    return nullptr;
+  }
+  mirror::Object* method = klass->GetDexFile().GetEnclosingMethod(klass);
+  if (method != nullptr) {
+    if (method->GetClass() ==
+        soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_reflect_Method)) {
+      return soa.AddLocalReference<jobject>(method);
+    }
+  }
+  return nullptr;
+}
+
+static jint Class_getInnerClassFlags(JNIEnv* env, jobject javaThis, jint defaultValue) {
+  ScopedFastNativeObjectAccess soa(env);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
+  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+    return defaultValue;
+  }
+  uint32_t flags;
+  if (!klass->GetDexFile().GetInnerClassFlags(klass, &flags)) {
+    return defaultValue;
+  }
+  return flags;
+}
+
+static jstring Class_getInnerClassName(JNIEnv* env, jobject javaThis) {
+  ScopedFastNativeObjectAccess soa(env);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
+  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+    return nullptr;
+  }
+  mirror::String* class_name = nullptr;
+  if (!klass->GetDexFile().GetInnerClass(klass, &class_name)) {
+    return nullptr;
+  }
+  return soa.AddLocalReference<jstring>(class_name);
+}
+
+static jboolean Class_isAnonymousClass(JNIEnv* env, jobject javaThis) {
+  ScopedFastNativeObjectAccess soa(env);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
+  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+    return false;
+  }
+  mirror::String* class_name = nullptr;
+  if (!klass->GetDexFile().GetInnerClass(klass, &class_name)) {
+    return false;
+  }
+  return class_name == nullptr;
+}
+
 static jboolean Class_isDeclaredAnnotationPresent(JNIEnv* env, jobject javaThis,
                                                   jclass annotationType) {
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<2> hs(soa.Self());
   Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
-  if (klass->IsProxyClass()) {
+  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
     return false;
   }
   Handle<mirror::Class> annotation_class(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
   return klass->GetDexFile().IsClassAnnotationPresent(klass, annotation_class);
 }
 
+static jclass Class_getDeclaringClass(JNIEnv* env, jobject javaThis) {
+  ScopedFastNativeObjectAccess soa(env);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
+  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+    return nullptr;
+  }
+  // Return null for anonymous classes.
+  if (Class_isAnonymousClass(env, javaThis)) {
+    return nullptr;
+  }
+  return soa.AddLocalReference<jclass>(klass->GetDexFile().GetDeclaringClass(klass));
+}
+
 static jobject Class_newInstance(JNIEnv* env, jobject javaThis) {
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<4> hs(soa.Self());
@@ -550,6 +673,7 @@
   NATIVE_METHOD(Class, getDeclaredAnnotation,
                 "!(Ljava/lang/Class;)Ljava/lang/annotation/Annotation;"),
   NATIVE_METHOD(Class, getDeclaredAnnotations, "!()[Ljava/lang/annotation/Annotation;"),
+  NATIVE_METHOD(Class, getDeclaredClasses, "!()[Ljava/lang/Class;"),
   NATIVE_METHOD(Class, getDeclaredConstructorInternal,
                 "!([Ljava/lang/Class;)Ljava/lang/reflect/Constructor;"),
   NATIVE_METHOD(Class, getDeclaredConstructorsInternal, "!(Z)[Ljava/lang/reflect/Constructor;"),
@@ -561,9 +685,16 @@
                 "!(Ljava/lang/String;[Ljava/lang/Class;)Ljava/lang/reflect/Method;"),
   NATIVE_METHOD(Class, getDeclaredMethodsUnchecked,
                 "!(Z)[Ljava/lang/reflect/Method;"),
+  NATIVE_METHOD(Class, getDeclaringClass, "!()Ljava/lang/Class;"),
+  NATIVE_METHOD(Class, getEnclosingClass, "!()Ljava/lang/Class;"),
+  NATIVE_METHOD(Class, getEnclosingConstructorNative, "!()Ljava/lang/reflect/Constructor;"),
+  NATIVE_METHOD(Class, getEnclosingMethodNative, "!()Ljava/lang/reflect/Method;"),
+  NATIVE_METHOD(Class, getInnerClassFlags, "!(I)I"),
+  NATIVE_METHOD(Class, getInnerClassName, "!()Ljava/lang/String;"),
   NATIVE_METHOD(Class, getNameNative, "!()Ljava/lang/String;"),
   NATIVE_METHOD(Class, getProxyInterfaces, "!()[Ljava/lang/Class;"),
   NATIVE_METHOD(Class, getPublicDeclaredFields, "!()[Ljava/lang/reflect/Field;"),
+  NATIVE_METHOD(Class, isAnonymousClass, "!()Z"),
   NATIVE_METHOD(Class, isDeclaredAnnotationPresent, "!(Ljava/lang/Class;)Z"),
   NATIVE_METHOD(Class, newInstance, "!()Ljava/lang/Object;"),
 };
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index b4b77e7..e1e9ceb 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -48,15 +48,18 @@
 static jobjectArray Constructor_getExceptionTypes(JNIEnv* env, jobject javaMethod) {
   ScopedFastNativeObjectAccess soa(env);
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
-  mirror::ObjectArray<mirror::Object>* result_array =
+  mirror::ObjectArray<mirror::Class>* result_array =
       method->GetDexFile()->GetExceptionTypesForMethod(method);
   if (result_array == nullptr) {
     // Return an empty array instead of a null pointer.
     mirror::Class* class_class = mirror::Class::GetJavaLangClass();
     mirror::Class* class_array_class =
         Runtime::Current()->GetClassLinker()->FindArrayClass(soa.Self(), &class_class);
-    mirror::ObjectArray<mirror::Object>* empty_array =
-        mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), class_array_class, 0);
+    if (class_array_class == nullptr) {
+      return nullptr;
+    }
+    mirror::ObjectArray<mirror::Class>* empty_array =
+        mirror::ObjectArray<mirror::Class>::Alloc(soa.Self(), class_array_class, 0);
     return soa.AddLocalReference<jobjectArray>(empty_array);
   } else {
     return soa.AddLocalReference<jobjectArray>(result_array);
diff --git a/runtime/native/java_lang_reflect_Method.cc b/runtime/native/java_lang_reflect_Method.cc
index 1219f85..caacba6 100644
--- a/runtime/native/java_lang_reflect_Method.cc
+++ b/runtime/native/java_lang_reflect_Method.cc
@@ -82,15 +82,18 @@
     mirror::ObjectArray<mirror::Class>* declared_exceptions = klass->GetThrows()->Get(throws_index);
     return soa.AddLocalReference<jobjectArray>(declared_exceptions->Clone(soa.Self()));
   } else {
-    mirror::ObjectArray<mirror::Object>* result_array =
+    mirror::ObjectArray<mirror::Class>* result_array =
         method->GetDexFile()->GetExceptionTypesForMethod(method);
     if (result_array == nullptr) {
       // Return an empty array instead of a null pointer
       mirror::Class* class_class = mirror::Class::GetJavaLangClass();
       mirror::Class* class_array_class =
           Runtime::Current()->GetClassLinker()->FindArrayClass(soa.Self(), &class_class);
-      mirror::ObjectArray<mirror::Object>* empty_array =
-          mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), class_array_class, 0);
+      if (class_array_class == nullptr) {
+        return nullptr;
+      }
+      mirror::ObjectArray<mirror::Class>* empty_array =
+          mirror::ObjectArray<mirror::Class>::Alloc(soa.Self(), class_array_class, 0);
       return soa.AddLocalReference<jobjectArray>(empty_array);
     } else {
       return soa.AddLocalReference<jobjectArray>(result_array);
diff --git a/runtime/oat.h b/runtime/oat.h
index 29dd76c..1520a9b 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '6', '8', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '6', '9', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/runtime-inl.h b/runtime/runtime-inl.h
index 380e72b..bfa8c54 100644
--- a/runtime/runtime-inl.h
+++ b/runtime/runtime-inl.h
@@ -20,6 +20,7 @@
 #include "runtime.h"
 
 #include "art_method.h"
+#include "class_linker.h"
 #include "read_barrier-inl.h"
 
 namespace art {
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 1912314..25bb827 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -20,6 +20,7 @@
 #include <sys/mount.h>
 #ifdef __linux__
 #include <linux/fs.h>
+#include <sys/prctl.h>
 #endif
 
 #define ATRACE_TAG ATRACE_TAG_DALVIK
@@ -235,6 +236,9 @@
     self->GetJniEnv()->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons,
                                             WellKnownClasses::java_lang_Daemons_stop);
   }
+
+  Trace::Shutdown();
+
   if (attach_shutdown_thread) {
     DetachCurrentThread();
     self = nullptr;
@@ -245,8 +249,6 @@
     BackgroundMethodSamplingProfiler::Shutdown();
   }
 
-  Trace::Shutdown();
-
   // Make sure to let the GC complete if it is running.
   heap_->WaitForGcToComplete(gc::kGcCauseBackground, self);
   heap_->DeleteThreadPool();
@@ -492,6 +494,14 @@
 
   CHECK(!no_sig_chain_) << "A started runtime should have sig chain enabled";
 
+  // If a debug host build, disable ptrace restriction for debugging and test timeout thread dump.
+  // Only 64-bit as prctl() may fail in 32 bit userspace on a 64-bit kernel.
+#if defined(__linux__) && !defined(__ANDROID__) && defined(__x86_64__)
+  if (kIsDebugBuild) {
+    CHECK_EQ(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY), 0);
+  }
+#endif
+
   // Restore main thread state to kNative as expected by native code.
   Thread* self = Thread::Current();
 
@@ -791,6 +801,12 @@
   return failure_count;
 }
 
+void Runtime::SetSentinel(mirror::Object* sentinel) {
+  CHECK(sentinel_.Read() == nullptr);
+  CHECK(sentinel != nullptr);
+  sentinel_ = GcRoot<mirror::Object>(sentinel);
+}
+
 bool Runtime::Init(const RuntimeOptions& raw_options, bool ignore_unrecognized) {
   ATRACE_BEGIN("Runtime::Init");
   CHECK_EQ(sysconf(_SC_PAGE_SIZE), kPageSize);
@@ -1054,10 +1070,6 @@
 
   CHECK(class_linker_ != nullptr);
 
-  // Initialize the special sentinel_ value early.
-  sentinel_ = GcRoot<mirror::Object>(class_linker_->AllocObject(self));
-  CHECK(sentinel_.Read() != nullptr);
-
   verifier::MethodVerifier::Init();
 
   if (runtime_options.Exists(Opt::MethodTrace)) {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 4577b75..bd21db1 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -568,6 +568,9 @@
     return fingerprint_;
   }
 
+  // Called from class linker.
+  void SetSentinel(mirror::Object* sentinel) SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   static void InitPlatformSignalHandlers();
 
diff --git a/runtime/thread.cc b/runtime/thread.cc
index a33e150..9929487 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -162,27 +162,41 @@
   ResetQuickAllocEntryPoints(&tlsPtr_.quick_entrypoints);
 }
 
-class DeoptimizationReturnValueRecord {
+class DeoptimizationContextRecord {
  public:
-  DeoptimizationReturnValueRecord(const JValue& ret_val,
-                                  bool is_reference,
-                                  DeoptimizationReturnValueRecord* link)
-      : ret_val_(ret_val), is_reference_(is_reference), link_(link) {}
+  DeoptimizationContextRecord(const JValue& ret_val, bool is_reference,
+                              mirror::Throwable* pending_exception,
+                              DeoptimizationContextRecord* link)
+      : ret_val_(ret_val), is_reference_(is_reference), pending_exception_(pending_exception),
+        link_(link) {}
 
   JValue GetReturnValue() const { return ret_val_; }
   bool IsReference() const { return is_reference_; }
-  DeoptimizationReturnValueRecord* GetLink() const { return link_; }
-  mirror::Object** GetGCRoot() {
+  mirror::Throwable* GetPendingException() const { return pending_exception_; }
+  DeoptimizationContextRecord* GetLink() const { return link_; }
+  mirror::Object** GetReturnValueAsGCRoot() {
     DCHECK(is_reference_);
     return ret_val_.GetGCRoot();
   }
+  mirror::Object** GetPendingExceptionAsGCRoot() {
+    return reinterpret_cast<mirror::Object**>(&pending_exception_);
+  }
 
  private:
+  // The value returned by the method at the top of the stack before deoptimization.
   JValue ret_val_;
-  const bool is_reference_;
-  DeoptimizationReturnValueRecord* const link_;
 
-  DISALLOW_COPY_AND_ASSIGN(DeoptimizationReturnValueRecord);
+  // Indicates whether the returned value is a reference. If so, the GC will visit it.
+  const bool is_reference_;
+
+  // The exception that was pending before deoptimization (or null if there was no pending
+  // exception).
+  mirror::Throwable* pending_exception_;
+
+  // A link to the previous DeoptimizationContextRecord.
+  DeoptimizationContextRecord* const link_;
+
+  DISALLOW_COPY_AND_ASSIGN(DeoptimizationContextRecord);
 };
 
 class StackedShadowFrameRecord {
@@ -206,22 +220,28 @@
   DISALLOW_COPY_AND_ASSIGN(StackedShadowFrameRecord);
 };
 
-void Thread::PushAndClearDeoptimizationReturnValue() {
-  DeoptimizationReturnValueRecord* record = new DeoptimizationReturnValueRecord(
-      tls64_.deoptimization_return_value,
-      tls32_.deoptimization_return_value_is_reference,
-      tlsPtr_.deoptimization_return_value_stack);
-  tlsPtr_.deoptimization_return_value_stack = record;
-  ClearDeoptimizationReturnValue();
+void Thread::PushDeoptimizationContext(const JValue& return_value, bool is_reference,
+                                       mirror::Throwable* exception) {
+  DeoptimizationContextRecord* record = new DeoptimizationContextRecord(
+      return_value,
+      is_reference,
+      exception,
+      tlsPtr_.deoptimization_context_stack);
+  tlsPtr_.deoptimization_context_stack = record;
 }
 
-JValue Thread::PopDeoptimizationReturnValue() {
-  DeoptimizationReturnValueRecord* record = tlsPtr_.deoptimization_return_value_stack;
-  DCHECK(record != nullptr);
-  tlsPtr_.deoptimization_return_value_stack = record->GetLink();
-  JValue ret_val(record->GetReturnValue());
+void Thread::PopDeoptimizationContext(JValue* result, mirror::Throwable** exception) {
+  AssertHasDeoptimizationContext();
+  DeoptimizationContextRecord* record = tlsPtr_.deoptimization_context_stack;
+  tlsPtr_.deoptimization_context_stack = record->GetLink();
+  result->SetJ(record->GetReturnValue().GetJ());
+  *exception = record->GetPendingException();
   delete record;
-  return ret_val;
+}
+
+void Thread::AssertHasDeoptimizationContext() {
+  CHECK(tlsPtr_.deoptimization_context_stack != nullptr)
+      << "No deoptimization context for thread " << *this;
 }
 
 void Thread::PushStackedShadowFrame(ShadowFrame* sf, StackedShadowFrameType type) {
@@ -1575,6 +1595,9 @@
   CHECK(tlsPtr_.flip_function == nullptr);
   CHECK_EQ(tls32_.suspended_at_suspend_check, false);
 
+  // Make sure we processed all deoptimization requests.
+  CHECK(tlsPtr_.deoptimization_context_stack == nullptr) << "Missed deoptimization";
+
   // We may be deleting a still born thread.
   SetStateUnsafe(kTerminated);
 
@@ -1705,7 +1728,7 @@
       result = nullptr;
     }
   } else if (kind == kGlobal) {
-    result = tlsPtr_.jni_env->vm->DecodeGlobal(const_cast<Thread*>(this), ref);
+    result = tlsPtr_.jni_env->vm->DecodeGlobal(ref);
   } else {
     DCHECK_EQ(kind, kWeakGlobal);
     result = tlsPtr_.jni_env->vm->DecodeWeakGlobal(const_cast<Thread*>(this), ref);
@@ -2593,7 +2616,7 @@
   visitor->VisitRootIfNonNull(&tlsPtr_.opeer, RootInfo(kRootThreadObject, thread_id));
   if (tlsPtr_.exception != nullptr && tlsPtr_.exception != GetDeoptimizationException()) {
     visitor->VisitRoot(reinterpret_cast<mirror::Object**>(&tlsPtr_.exception),
-                   RootInfo(kRootNativeStack, thread_id));
+                       RootInfo(kRootNativeStack, thread_id));
   }
   visitor->VisitRootIfNonNull(&tlsPtr_.monitor_enter_object, RootInfo(kRootNativeStack, thread_id));
   tlsPtr_.jni_env->locals.VisitRoots(visitor, RootInfo(kRootJNILocal, thread_id));
@@ -2602,6 +2625,7 @@
   if (tlsPtr_.debug_invoke_req != nullptr) {
     tlsPtr_.debug_invoke_req->VisitRoots(visitor, RootInfo(kRootDebugger, thread_id));
   }
+  // Visit roots for deoptimization.
   if (tlsPtr_.stacked_shadow_frame_record != nullptr) {
     RootCallbackVisitor visitor_to_callback(visitor, thread_id);
     ReferenceMapVisitor<RootCallbackVisitor> mapper(this, nullptr, visitor_to_callback);
@@ -2615,14 +2639,16 @@
       }
     }
   }
-  if (tlsPtr_.deoptimization_return_value_stack != nullptr) {
-    for (DeoptimizationReturnValueRecord* record = tlsPtr_.deoptimization_return_value_stack;
+  if (tlsPtr_.deoptimization_context_stack != nullptr) {
+    for (DeoptimizationContextRecord* record = tlsPtr_.deoptimization_context_stack;
          record != nullptr;
          record = record->GetLink()) {
       if (record->IsReference()) {
-        visitor->VisitRootIfNonNull(record->GetGCRoot(),
-            RootInfo(kRootThreadObject, thread_id));
+        visitor->VisitRootIfNonNull(record->GetReturnValueAsGCRoot(),
+                                    RootInfo(kRootThreadObject, thread_id));
       }
+      visitor->VisitRootIfNonNull(record->GetPendingExceptionAsGCRoot(),
+                                  RootInfo(kRootThreadObject, thread_id));
     }
   }
   for (auto* verifier = tlsPtr_.method_verifier; verifier != nullptr; verifier = verifier->link_) {
diff --git a/runtime/thread.h b/runtime/thread.h
index 9bb57bf..2d450f5 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -77,7 +77,7 @@
 class Closure;
 class Context;
 struct DebugInvokeReq;
-class DeoptimizationReturnValueRecord;
+class DeoptimizationContextRecord;
 class DexFile;
 class JavaVMExt;
 struct JNIEnvExt;
@@ -830,19 +830,13 @@
   // and execute Java code, so there might be nested deoptimizations happening.
   // We need to save the ongoing deoptimization shadow frames and return
   // values on stacks.
-  void SetDeoptimizationReturnValue(const JValue& ret_val, bool is_reference) {
-    tls64_.deoptimization_return_value.SetJ(ret_val.GetJ());
-    tls32_.deoptimization_return_value_is_reference = is_reference;
-  }
-  bool IsDeoptimizationReturnValueReference() {
-    return tls32_.deoptimization_return_value_is_reference;
-  }
-  void ClearDeoptimizationReturnValue() {
-    tls64_.deoptimization_return_value.SetJ(0);
-    tls32_.deoptimization_return_value_is_reference = false;
-  }
-  void PushAndClearDeoptimizationReturnValue();
-  JValue PopDeoptimizationReturnValue();
+  void PushDeoptimizationContext(const JValue& return_value, bool is_reference,
+                                 mirror::Throwable* exception)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  void PopDeoptimizationContext(JValue* result, mirror::Throwable** exception)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  void AssertHasDeoptimizationContext()
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void PushStackedShadowFrame(ShadowFrame* sf, StackedShadowFrameType type);
   ShadowFrame* PopStackedShadowFrame(StackedShadowFrameType type);
 
@@ -1102,9 +1096,8 @@
       suspend_count(0), debug_suspend_count(0), thin_lock_thread_id(0), tid(0),
       daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0),
       thread_exit_check_count(0), handling_signal_(false),
-      deoptimization_return_value_is_reference(false), suspended_at_suspend_check(false),
-      ready_for_debug_invoke(false), debug_method_entry_(false), is_gc_marking(false),
-      weak_ref_access_enabled(true) {
+      suspended_at_suspend_check(false), ready_for_debug_invoke(false),
+      debug_method_entry_(false), is_gc_marking(false), weak_ref_access_enabled(true) {
     }
 
     union StateAndFlags state_and_flags;
@@ -1144,10 +1137,6 @@
     // True if signal is being handled by this thread.
     bool32_t handling_signal_;
 
-    // True if the return value for interpreter after deoptimization is a reference.
-    // For gc purpose.
-    bool32_t deoptimization_return_value_is_reference;
-
     // True if the thread is suspended in FullSuspendCheck(). This is
     // used to distinguish runnable threads that are suspended due to
     // a normal suspend check from other threads.
@@ -1178,15 +1167,12 @@
   } tls32_;
 
   struct PACKED(8) tls_64bit_sized_values {
-    tls_64bit_sized_values() : trace_clock_base(0), deoptimization_return_value() {
+    tls_64bit_sized_values() : trace_clock_base(0) {
     }
 
     // The clock base used for tracing.
     uint64_t trace_clock_base;
 
-    // Return value used by deoptimization.
-    JValue deoptimization_return_value;
-
     RuntimeStats stats;
   } tls64_;
 
@@ -1197,7 +1183,7 @@
       stack_trace_sample(nullptr), wait_next(nullptr), monitor_enter_object(nullptr),
       top_handle_scope(nullptr), class_loader_override(nullptr), long_jump_context(nullptr),
       instrumentation_stack(nullptr), debug_invoke_req(nullptr), single_step_control(nullptr),
-      stacked_shadow_frame_record(nullptr), deoptimization_return_value_stack(nullptr),
+      stacked_shadow_frame_record(nullptr), deoptimization_context_stack(nullptr),
       name(nullptr), pthread_self(0),
       last_no_thread_suspension_cause(nullptr), thread_local_start(nullptr),
       thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_objects(0),
@@ -1282,7 +1268,7 @@
     StackedShadowFrameRecord* stacked_shadow_frame_record;
 
     // Deoptimization return value record stack.
-    DeoptimizationReturnValueRecord* deoptimization_return_value_stack;
+    DeoptimizationContextRecord* deoptimization_context_stack;
 
     // A cached copy of the java.lang.Thread's name.
     std::string* name;
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 4393430..4ab5c0e 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -638,9 +638,11 @@
     const std::map<const DexFile*, DexIndexBitSet*>& seen_methods,
     std::set<ArtMethod*>* visited_methods) SHARED_REQUIRES(Locks::mutator_lock_) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  Thread* const self = Thread::Current();
   for (auto& e : seen_methods) {
     DexIndexBitSet* bit_set = e.second;
-    mirror::DexCache* dex_cache = class_linker->FindDexCache(*e.first);
+    // TODO: Visit trace methods as roots.
+    mirror::DexCache* dex_cache = class_linker->FindDexCache(self, *e.first, false);
     for (uint32_t i = 0; i < bit_set->size(); ++i) {
       if ((*bit_set)[i]) {
         visited_methods->insert(dex_cache->GetResolvedMethod(i, sizeof(void*)));
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 8aa1189..27dacea 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1441,14 +1441,6 @@
   return true;
 }
 
-void EncodeUnsignedLeb128(uint32_t data, std::vector<uint8_t>* dst) {
-  Leb128Encoder(dst).PushBackUnsigned(data);
-}
-
-void EncodeSignedLeb128(int32_t data, std::vector<uint8_t>* dst) {
-  Leb128Encoder(dst).PushBackSigned(data);
-}
-
 std::string PrettyDescriptor(Primitive::Type type) {
   return PrettyDescriptor(Primitive::Descriptor(type));
 }
diff --git a/runtime/utils.h b/runtime/utils.h
index d1be51a..16835c2 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -294,9 +294,6 @@
   buf->push_back((data >> 24) & 0xff);
 }
 
-void EncodeUnsignedLeb128(uint32_t data, std::vector<uint8_t>* buf);
-void EncodeSignedLeb128(int32_t data, std::vector<uint8_t>* buf);
-
 // Deleter using free() for use with std::unique_ptr<>. See also UniqueCPtr<> below.
 struct FreeDelete {
   // NOTE: Deleting a const object is valid but free() takes a non-const pointer.
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index bba9c5e..4f921bd 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -3407,6 +3407,7 @@
 ArtMethod* MethodVerifier::ResolveMethodAndCheckAccess(
     uint32_t dex_method_idx, MethodType method_type) {
   const DexFile::MethodId& method_id = dex_file_->GetMethodId(dex_method_idx);
+  // LOG(INFO) << dex_file_->NumTypeIds() << " " << dex_file_->NumClassDefs();
   const RegType& klass_type = ResolveClassAndCheckAccess(method_id.class_idx_);
   if (klass_type.IsConflict()) {
     std::string append(" in attempt to access method ");
diff --git a/test/005-annotations/expected.txt b/test/005-annotations/expected.txt
index 36b3868..e1c3dad 100644
--- a/test/005-annotations/expected.txt
+++ b/test/005-annotations/expected.txt
@@ -101,3 +101,10 @@
 Package declared annotations:
       @android.test.anno.AnnoSimplePackage()
         interface android.test.anno.AnnoSimplePackage
+
+Inner Classes:
+Canonical:android.test.anno.ClassWithInnerClasses.InnerClass Simple:InnerClass
+Canonical:null Simple:
+
+Get annotation with missing class should not throw
+Got expected TypeNotPresentException
diff --git a/test/005-annotations/src/android/test/anno/AnnoMissingClass.java b/test/005-annotations/src/android/test/anno/AnnoMissingClass.java
new file mode 100644
index 0000000..c32e9a2
--- /dev/null
+++ b/test/005-annotations/src/android/test/anno/AnnoMissingClass.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.test.anno;
+
+import java.lang.annotation.*;
+
+@Retention(RetentionPolicy.RUNTIME)
+public @interface AnnoMissingClass {
+    Class value();
+}
diff --git a/test/005-annotations/src/android/test/anno/ClassWithInnerClasses.java b/test/005-annotations/src/android/test/anno/ClassWithInnerClasses.java
new file mode 100644
index 0000000..e151f1a
--- /dev/null
+++ b/test/005-annotations/src/android/test/anno/ClassWithInnerClasses.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.test.anno;
+
+public class ClassWithInnerClasses {
+  public class InnerClass {
+    public String toString() {
+      return "Canonical:" + getClass().getCanonicalName() + " Simple:" + getClass().getSimpleName();
+    }
+  }
+  Object anonymousClass = new Object() {
+    public String toString() {
+      return "Canonical:" + getClass().getCanonicalName() + " Simple:" + getClass().getSimpleName();
+    }
+  };
+
+  public void print() {
+    System.out.println(new InnerClass());
+    System.out.println(anonymousClass);
+  }
+}
diff --git a/test/005-annotations/src/android/test/anno/ClassWithMissingAnnotation.java b/test/005-annotations/src/android/test/anno/ClassWithMissingAnnotation.java
new file mode 100644
index 0000000..8cfdd8c
--- /dev/null
+++ b/test/005-annotations/src/android/test/anno/ClassWithMissingAnnotation.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.test.anno;
+
+// Add annotation for missing type to cause TypeNotPresentException.
+@AnnoMissingClass(MissingAnnotation.class)
+public class ClassWithMissingAnnotation {
+}
diff --git a/test/005-annotations/src/android/test/anno/TestAnnotations.java b/test/005-annotations/src/android/test/anno/TestAnnotations.java
index 1deff33..7b74a73 100644
--- a/test/005-annotations/src/android/test/anno/TestAnnotations.java
+++ b/test/005-annotations/src/android/test/anno/TestAnnotations.java
@@ -180,5 +180,24 @@
         printAnnotationArray("    ", TestAnnotations.class.getPackage().getAnnotations());
         System.out.println("Package declared annotations:");
         printAnnotationArray("    ", TestAnnotations.class.getPackage().getDeclaredAnnotations());
+
+        System.out.println();
+
+        // Test inner classes.
+        System.out.println("Inner Classes:");
+        new ClassWithInnerClasses().print();
+
+        System.out.println();
+
+        // Test TypeNotPresentException.
+        try {
+            AnnoMissingClass missingAnno =
+                ClassWithMissingAnnotation.class.getAnnotation(AnnoMissingClass.class);
+            System.out.println("Get annotation with missing class should not throw");
+            System.out.println(missingAnno.value());
+            System.out.println("Getting value of missing annotaton should have thrown");
+        } catch (TypeNotPresentException expected) {
+            System.out.println("Got expected TypeNotPresentException");
+        }
     }
 }
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index bd606a6..08ccf0e 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -594,6 +594,54 @@
     Assert.assertEquals(Math.ceil(-2.5), -2.0d, 0.0);
     Assert.assertEquals(Math.ceil(-2.9), -2.0d, 0.0);
     Assert.assertEquals(Math.ceil(-3.0), -3.0d, 0.0);
+    // 2^52 - 1.5
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0x432FFFFFFFFFFFFDl)),
+                        Double.longBitsToDouble(0x432FFFFFFFFFFFFEl), 0.0);
+    // 2^52 - 0.5
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0x432FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0x4330000000000000l), 0.0);
+    // 2^52
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0x4330000000000000l)),
+                        Double.longBitsToDouble(0x4330000000000000l), 0.0);
+    // 2^53 - 1
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0x433FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0x433FFFFFFFFFFFFFl), 0.0);
+    // 2^53
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0x4340000000000000l)),
+                        Double.longBitsToDouble(0x4340000000000000l), 0.0);
+    // 2^63 - 2^10
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0x43DFFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0x43DFFFFFFFFFFFFFl), 0.0);
+    // 2^63
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0x43E0000000000000l)),
+                        Double.longBitsToDouble(0x43E0000000000000l), 0.0);
+    // 2^64
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0x43F0000000000000l)),
+                        Double.longBitsToDouble(0x43F0000000000000l), 0.0);
+    // -(2^52 - 1.5)
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0xC32FFFFFFFFFFFFDl)),
+                        Double.longBitsToDouble(0xC32FFFFFFFFFFFFCl), 0.0);
+    // -(2^52 - 0.5)
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0xC32FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0xC32FFFFFFFFFFFFEl), 0.0);
+    // -2^52
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0xC330000000000000l)),
+                        Double.longBitsToDouble(0xC330000000000000l), 0.0);
+    // -(2^53 - 1)
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0xC33FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0xC33FFFFFFFFFFFFFl), 0.0);
+    // -2^53
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0xC340000000000000l)),
+                        Double.longBitsToDouble(0xC340000000000000l), 0.0);
+    // -(2^63 - 2^10)
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0xC3DFFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0xC3DFFFFFFFFFFFFFl), 0.0);
+    // -2^63
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0xC3E0000000000000l)),
+                        Double.longBitsToDouble(0xC3E0000000000000l), 0.0);
+    // -2^64
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0xC3F0000000000000l)),
+                        Double.longBitsToDouble(0xC3F0000000000000l), 0.0);
     Assert.assertEquals(Math.ceil(Double.NaN), Double.NaN, 0.0);
     Assert.assertEquals(Math.ceil(Double.POSITIVE_INFINITY), Double.POSITIVE_INFINITY, 0.0);
     Assert.assertEquals(Math.ceil(Double.NEGATIVE_INFINITY), Double.NEGATIVE_INFINITY, 0.0);
@@ -613,6 +661,54 @@
     Assert.assertEquals(Math.floor(-2.5), -3.0d, 0.0);
     Assert.assertEquals(Math.floor(-2.9), -3.0d, 0.0);
     Assert.assertEquals(Math.floor(-3.0), -3.0d, 0.0);
+    // 2^52 - 1.5
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0x432FFFFFFFFFFFFDl)),
+                        Double.longBitsToDouble(0x432FFFFFFFFFFFFCl), 0.0);
+    // 2^52 - 0.5
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0x432FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0x432FFFFFFFFFFFFEl), 0.0);
+    // 2^52
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0x4330000000000000l)),
+                        Double.longBitsToDouble(0x4330000000000000l), 0.0);
+    // 2^53 - 1
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0x433FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0x433FFFFFFFFFFFFFl), 0.0);
+    // 2^53
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0x4340000000000000l)),
+                        Double.longBitsToDouble(0x4340000000000000l), 0.0);
+    // 2^63 - 2^10
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0x43DFFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0x43DFFFFFFFFFFFFFl), 0.0);
+    // 2^63
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0x43E0000000000000l)),
+                        Double.longBitsToDouble(0x43E0000000000000l), 0.0);
+    // 2^64
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0x43F0000000000000l)),
+                        Double.longBitsToDouble(0x43F0000000000000l), 0.0);
+    // -(2^52 - 1.5)
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0xC32FFFFFFFFFFFFDl)),
+                        Double.longBitsToDouble(0xC32FFFFFFFFFFFFEl), 0.0);
+    // -(2^52 - 0.5)
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0xC32FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0xC330000000000000l), 0.0);
+    // -2^52
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0xC330000000000000l)),
+                        Double.longBitsToDouble(0xC330000000000000l), 0.0);
+    // -(2^53 - 1)
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0xC33FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0xC33FFFFFFFFFFFFFl), 0.0);
+    // -2^53
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0xC340000000000000l)),
+                        Double.longBitsToDouble(0xC340000000000000l), 0.0);
+    // -(2^63 - 2^10)
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0xC3DFFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0xC3DFFFFFFFFFFFFFl), 0.0);
+    // -2^63
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0xC3E0000000000000l)),
+                        Double.longBitsToDouble(0xC3E0000000000000l), 0.0);
+    // -2^64
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0xC3F0000000000000l)),
+                        Double.longBitsToDouble(0xC3F0000000000000l), 0.0);
     Assert.assertEquals(Math.floor(Double.NaN), Double.NaN, 0.0);
     Assert.assertEquals(Math.floor(Double.POSITIVE_INFINITY), Double.POSITIVE_INFINITY, 0.0);
     Assert.assertEquals(Math.floor(Double.NEGATIVE_INFINITY), Double.NEGATIVE_INFINITY, 0.0);
@@ -632,6 +728,54 @@
     Assert.assertEquals(Math.rint(-2.5), -2.0d, 0.0);
     Assert.assertEquals(Math.rint(-2.9), -3.0d, 0.0);
     Assert.assertEquals(Math.rint(-3.0), -3.0d, 0.0);
+    // 2^52 - 1.5
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0x432FFFFFFFFFFFFDl)),
+                        Double.longBitsToDouble(0x432FFFFFFFFFFFFCl), 0.0);
+    // 2^52 - 0.5
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0x432FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0x4330000000000000l), 0.0);
+    // 2^52
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0x4330000000000000l)),
+                        Double.longBitsToDouble(0x4330000000000000l), 0.0);
+    // 2^53 - 1
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0x433FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0x433FFFFFFFFFFFFFl), 0.0);
+    // 2^53
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0x4340000000000000l)),
+                        Double.longBitsToDouble(0x4340000000000000l), 0.0);
+    // 2^63 - 2^10
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0x43DFFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0x43DFFFFFFFFFFFFFl), 0.0);
+    // 2^63
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0x43E0000000000000l)),
+                        Double.longBitsToDouble(0x43E0000000000000l), 0.0);
+    // 2^64
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0x43F0000000000000l)),
+                        Double.longBitsToDouble(0x43F0000000000000l), 0.0);
+    // -(2^52 - 1.5)
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0xC32FFFFFFFFFFFFDl)),
+                        Double.longBitsToDouble(0xC32FFFFFFFFFFFFCl), 0.0);
+    // -(2^52 - 0.5)
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0xC32FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0xC330000000000000l), 0.0);
+    // -2^52
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0xC330000000000000l)),
+                        Double.longBitsToDouble(0xC330000000000000l), 0.0);
+    // -(2^53 - 1)
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0xC33FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0xC33FFFFFFFFFFFFFl), 0.0);
+    // -2^53
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0xC340000000000000l)),
+                        Double.longBitsToDouble(0xC340000000000000l), 0.0);
+    // -(2^63 - 2^10)
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0xC3DFFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0xC3DFFFFFFFFFFFFFl), 0.0);
+    // -2^63
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0xC3E0000000000000l)),
+                        Double.longBitsToDouble(0xC3E0000000000000l), 0.0);
+    // -2^64
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0xC3F0000000000000l)),
+                        Double.longBitsToDouble(0xC3F0000000000000l), 0.0);
     Assert.assertEquals(Math.rint(Double.NaN), Double.NaN, 0.0);
     Assert.assertEquals(Math.rint(Double.POSITIVE_INFINITY), Double.POSITIVE_INFINITY, 0.0);
     Assert.assertEquals(Math.rint(Double.NEGATIVE_INFINITY), Double.NEGATIVE_INFINITY, 0.0);
diff --git a/test/1337-gc-coverage/check b/test/1337-gc-coverage/check
new file mode 100755
index 0000000..842bdc6
--- /dev/null
+++ b/test/1337-gc-coverage/check
@@ -0,0 +1,22 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Check that the string "error" isn't present
+if grep error "$2"; then
+    exit 1
+else
+    exit 0
+fi
diff --git a/test/1337-gc-coverage/expected.txt b/test/1337-gc-coverage/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/1337-gc-coverage/expected.txt
diff --git a/test/1337-gc-coverage/gc_coverage.cc b/test/1337-gc-coverage/gc_coverage.cc
new file mode 100644
index 0000000..7cf30bd
--- /dev/null
+++ b/test/1337-gc-coverage/gc_coverage.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gc/heap.h"
+#include "jni.h"
+#include "runtime.h"
+#include "scoped_thread_state_change.h"
+#include "thread-inl.h"
+
+namespace art {
+namespace {
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_performHomogeneousSpaceCompact(JNIEnv*, jclass) {
+  return Runtime::Current()->GetHeap()->PerformHomogeneousSpaceCompact() == gc::kSuccess ?
+      JNI_TRUE : JNI_FALSE;
+}
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_supportHomogeneousSpaceCompact(JNIEnv*, jclass) {
+  return Runtime::Current()->GetHeap()->SupportHomogeneousSpaceCompactAndCollectorTransitions() ?
+      JNI_TRUE : JNI_FALSE;
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_incrementDisableMovingGC(JNIEnv*, jclass) {
+  Runtime::Current()->GetHeap()->IncrementDisableMovingGC(Thread::Current());
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_decrementDisableMovingGC(JNIEnv*, jclass) {
+  Runtime::Current()->GetHeap()->DecrementDisableMovingGC(Thread::Current());
+}
+
+extern "C" JNIEXPORT jlong JNICALL Java_Main_objectAddress(JNIEnv* env, jclass, jobject object) {
+  ScopedObjectAccess soa(env);
+  return reinterpret_cast<jlong>(soa.Decode<mirror::Object*>(object));
+}
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_supportCollectorTransition(JNIEnv*, jclass) {
+  // Same as supportHomogeneousSpaceCompact for now.
+  return Runtime::Current()->GetHeap()->SupportHomogeneousSpaceCompactAndCollectorTransitions() ?
+      JNI_TRUE : JNI_FALSE;
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_transitionToSS(JNIEnv*, jclass) {
+  Runtime::Current()->GetHeap()->TransitionCollector(gc::kCollectorTypeSS);
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_transitionToCMS(JNIEnv*, jclass) {
+  Runtime::Current()->GetHeap()->TransitionCollector(gc::kCollectorTypeCMS);
+}
+
+}  // namespace
+}  // namespace art
diff --git a/test/1337-gc-coverage/info.txt b/test/1337-gc-coverage/info.txt
new file mode 100644
index 0000000..7e3acd3
--- /dev/null
+++ b/test/1337-gc-coverage/info.txt
@@ -0,0 +1 @@
+Tests internal GC functions which are not exposed through normal APIs.
\ No newline at end of file
diff --git a/test/1337-gc-coverage/src/Main.java b/test/1337-gc-coverage/src/Main.java
new file mode 100644
index 0000000..7875eb1
--- /dev/null
+++ b/test/1337-gc-coverage/src/Main.java
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.TreeMap;
+
+public class Main {
+  private static TreeMap treeMap = new TreeMap();
+
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+    testHomogeneousCompaction();
+    testCollectorTransitions();
+    System.out.println("Done.");
+  }
+
+  private static void allocateStuff() {
+    for (int i = 0; i < 1000; ++i) {
+      Object o = new Object();
+      treeMap.put(o.hashCode(), o);
+    }
+  }
+
+  public static void testHomogeneousCompaction() {
+    System.out.println("Attempting homogeneous compaction");
+    final boolean supportHSC = supportHomogeneousSpaceCompact();
+    Object o = new Object();
+    long addressBefore = objectAddress(o);
+    long addressAfter;
+    allocateStuff();
+    final boolean success = performHomogeneousSpaceCompact();
+    allocateStuff();
+    System.out.println("Homogeneous compaction support=" + supportHSC + " success=" + success);
+    if (supportHSC != success) {
+      System.out.println("error: Expected " + supportHSC + " but got " + success);
+    }
+    if (success) {
+      allocateStuff();
+      addressAfter = objectAddress(o);
+      // This relies on the compaction copying from one space to another space and there being no
+      // overlap.
+      if (addressBefore == addressAfter) {
+        System.out.println("error: Expected different adddress " + addressBefore + " vs " +
+            addressAfter);
+      }
+    }
+    if (supportHSC) {
+      incrementDisableMovingGC();
+      if (performHomogeneousSpaceCompact()) {
+        System.out.println("error: Compaction succeeded when moving GC is disabled");
+      }
+      decrementDisableMovingGC();
+      if (!performHomogeneousSpaceCompact()) {
+        System.out.println("error: Compaction failed when moving GC is enabled");
+      }
+    }
+  }
+
+  private static void testCollectorTransitions() {
+    if (supportCollectorTransition()) {
+      Object o = new Object();
+      // Transition to semi-space collector.
+      allocateStuff();
+      transitionToSS();
+      allocateStuff();
+      long addressBefore = objectAddress(o);
+      Runtime.getRuntime().gc();
+      long addressAfter = objectAddress(o);
+      if (addressBefore == addressAfter) {
+        System.out.println("error: Expected different adddress " + addressBefore + " vs " +
+            addressAfter);
+      }
+      // Transition back to CMS.
+      transitionToCMS();
+      allocateStuff();
+      addressBefore = objectAddress(o);
+      Runtime.getRuntime().gc();
+      addressAfter = objectAddress(o);
+      if (addressBefore != addressAfter) {
+        System.out.println("error: Expected same adddress " + addressBefore + " vs " +
+            addressAfter);
+      }
+    }
+  }
+
+  // Methods to get access to ART internals.
+  private static native boolean supportHomogeneousSpaceCompact();
+  private static native boolean performHomogeneousSpaceCompact();
+  private static native void incrementDisableMovingGC();
+  private static native void decrementDisableMovingGC();
+  private static native long objectAddress(Object object);
+  private static native boolean supportCollectorTransition();
+  private static native void transitionToSS();
+  private static native void transitionToCMS();
+}
diff --git a/test/474-fp-sub-neg/expected.txt b/test/474-fp-sub-neg/expected.txt
index 1c15abb..1c7ded3 100644
--- a/test/474-fp-sub-neg/expected.txt
+++ b/test/474-fp-sub-neg/expected.txt
@@ -1,6 +1,13 @@
 -0.0
-0.0
-0.0
 -0.0
 0.0
 0.0
+0.0
+0.0
+-0.0
+-0.0
+0.0
+0.0
+0.0
+0.0
+d 0.0
diff --git a/test/474-fp-sub-neg/src/Main.java b/test/474-fp-sub-neg/src/Main.java
index c190e8e..796d56c 100644
--- a/test/474-fp-sub-neg/src/Main.java
+++ b/test/474-fp-sub-neg/src/Main.java
@@ -17,33 +17,58 @@
 public class Main {
     public static void floatTest() {
       float f = 0;
+      float nf = -0;
       float fc = 1f;
       for (int i = 0; i < 2; i++) {
         f -= fc;
         f = -f;
+        nf -= fc;
+        nf = -nf;
       }
 
       System.out.println(f);
+      System.out.println(nf);
       System.out.println(f + 0f);
       System.out.println(f - (-0f));
+      System.out.println(-f - (-nf));
+      System.out.println(-f + (-nf));
     }
 
     public static void doubleTest() {
       double d = 0;
+      double nd = -0;
       double dc = 1f;
       for (int i = 0; i < 2; i++) {
         d -= dc;
         d = -d;
+        nd -= dc;
+        nd = -nd;
       }
 
       System.out.println(d);
+      System.out.println(nd);
       System.out.println(d + 0f);
       System.out.println(d - (-0f));
+      System.out.println(-d - (-nd));
+      System.out.println(-d + (-nd));
+    }
+
+    public static void bug_1() {
+      int i4=18, i3=-48959;
+      float d;
+      float f=-0.0f;
+      float a=0.0f;
+
+      d = -f + (-a);
+      f += i4 * i3;
+
+      System.out.println("d " + d);
     }
 
     public static void main(String[] args) {
         doubleTest();
         floatTest();
+        bug_1();
     }
 
 }
diff --git a/test/999-jni-perf/check b/test/999-jni-perf/check
new file mode 100755
index 0000000..ffbb8cf
--- /dev/null
+++ b/test/999-jni-perf/check
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Only compare the last line.
+tail -n 1 "$2" | diff --strip-trailing-cr -q "$1" - >/dev/null
\ No newline at end of file
diff --git a/test/999-jni-perf/expected.txt b/test/999-jni-perf/expected.txt
new file mode 100644
index 0000000..a965a70
--- /dev/null
+++ b/test/999-jni-perf/expected.txt
@@ -0,0 +1 @@
+Done
diff --git a/test/999-jni-perf/info.txt b/test/999-jni-perf/info.txt
new file mode 100644
index 0000000..010b57b
--- /dev/null
+++ b/test/999-jni-perf/info.txt
@@ -0,0 +1 @@
+Tests for measuring performance of JNI state changes.
diff --git a/test/999-jni-perf/perf-jni.cc b/test/999-jni-perf/perf-jni.cc
new file mode 100644
index 0000000..51eeb83
--- /dev/null
+++ b/test/999-jni-perf/perf-jni.cc
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <assert.h>
+
+#include "jni.h"
+#include "scoped_thread_state_change.h"
+#include "thread.h"
+
+namespace art {
+
+namespace {
+
+extern "C" JNIEXPORT jint JNICALL Java_Main_perfJniEmptyCall(JNIEnv*, jobject) {
+  return 0;
+}
+
+extern "C" JNIEXPORT jint JNICALL Java_Main_perfSOACall(JNIEnv*, jobject) {
+  ScopedObjectAccess soa(Thread::Current());
+  return 0;
+}
+
+extern "C" JNIEXPORT jint JNICALL Java_Main_perfSOAUncheckedCall(JNIEnv*, jobject) {
+  ScopedObjectAccessUnchecked soa(Thread::Current());
+  return 0;
+}
+
+}  // namespace
+
+}  // namespace art
diff --git a/test/999-jni-perf/src/Main.java b/test/999-jni-perf/src/Main.java
new file mode 100644
index 0000000..032e700
--- /dev/null
+++ b/test/999-jni-perf/src/Main.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public Main() {
+  }
+
+  private static final String MSG = "ABCDE";
+
+  native int perfJniEmptyCall();
+  native int perfSOACall();
+  native int perfSOAUncheckedCall();
+
+  int runPerfTest(long N) {
+    long start = System.nanoTime();
+    for (long i = 0; i < N; i++) {
+      char c = MSG.charAt(2);
+    }
+    long elapse = System.nanoTime() - start;
+    System.out.println("Fast JNI (charAt): " + (double)elapse / N);
+
+    start = System.nanoTime();
+    for (long i = 0; i < N; i++) {
+      perfJniEmptyCall();
+    }
+    elapse = System.nanoTime() - start;
+    System.out.println("Empty call: " + (double)elapse / N);
+
+    start = System.nanoTime();
+    for (long i = 0; i < N; i++) {
+      perfSOACall();
+    }
+    elapse = System.nanoTime() - start;
+    System.out.println("SOA call: " + (double)elapse / N);
+
+    start = System.nanoTime();
+    for (long i = 0; i < N; i++) {
+      perfSOAUncheckedCall();
+    }
+    elapse = System.nanoTime() - start;
+    System.out.println("SOA unchecked call: " + (double)elapse / N);
+
+    return 0;
+  }
+
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+    long iterations = 1000000;
+    if (args.length > 1) {
+      iterations = Long.parseLong(args[1], 10);
+    }
+    Main m = new Main();
+    m.runPerfTest(iterations);
+    System.out.println("Done");
+  }
+}
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index 82f8c79..90bf5b5 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -29,6 +29,7 @@
   116-nodex2oat/nodex2oat.cc \
   117-nopatchoat/nopatchoat.cc \
   118-noimage-dex2oat/noimage-dex2oat.cc \
+  1337-gc-coverage/gc_coverage.cc \
   137-cfi/cfi.cc \
   139-register-natives/regnative.cc \
   454-get-vreg/get_vreg_jni.cc \
@@ -36,7 +37,8 @@
   457-regs/regs_jni.cc \
   461-get-reference-vreg/get_reference_vreg_jni.cc \
   466-get-live-vreg/get_live_vreg_jni.cc \
-  497-inlining-and-class-loader/clear_dex_cache.cc
+  497-inlining-and-class-loader/clear_dex_cache.cc \
+  999-jni-perf/perf-jni.cc
 
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 39dc030..6b57f2b 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -37,9 +37,9 @@
 QUIET="n"
 RELOCATE="y"
 SECONDARY_DEX=""
-TIME_OUT="y"
-# Value in minutes.
-TIME_OUT_VALUE=10
+TIME_OUT="gdb"  # "n" (disabled), "timeout" (use timeout), "gdb" (use gdb)
+# Value in seconds
+TIME_OUT_VALUE=600  # 10 minutes.
 USE_GDB="n"
 USE_JVM="n"
 VERIFY="y" # y=yes,n=no,s=softfail
@@ -459,15 +459,32 @@
 
     cmdline="$dalvikvm_cmdline"
 
-    if [ "$TIME_OUT" = "y" ]; then
+    if [ "$TIME_OUT" = "gdb" ]; then
+      if [ `uname` = "Darwin" ]; then
+        # Fall back to timeout on Mac.
+        TIME_OUT="timeout"
+      elif [ "$ISA" = "x86" ]; then
+        # prctl call may fail in 32-bit on an older (3.2) 64-bit Linux kernel. Fall back to timeout.
+        TIME_OUT="timeout"
+      else
+        # Check if gdb is available.
+        gdb --eval-command="quit" > /dev/null 2>&1
+        if [ $? != 0 ]; then
+          # gdb isn't available. Fall back to timeout.
+          TIME_OUT="timeout"
+        fi
+      fi
+    fi
+
+    if [ "$TIME_OUT" = "timeout" ]; then
       # Add timeout command if time out is desired.
       #
       # Note: We use nested timeouts. The inner timeout sends SIGRTMIN+2 (usually 36) to ART, which
       #       will induce a full thread dump before abort. However, dumping threads might deadlock,
       #       so the outer timeout sends the regular SIGTERM after an additional minute to ensure
       #       termination (without dumping all threads).
-      TIME_PLUS_ONE=$(($TIME_OUT_VALUE + 1))
-      cmdline="timeout ${TIME_PLUS_ONE}m timeout -s SIGRTMIN+2 ${TIME_OUT_VALUE}m $cmdline"
+      TIME_PLUS_ONE=$(($TIME_OUT_VALUE + 60))
+      cmdline="timeout ${TIME_PLUS_ONE}s timeout -s SIGRTMIN+2 ${TIME_OUT_VALUE}s $cmdline"
     fi
 
     if [ "$DEV_MODE" = "y" ]; then
@@ -502,12 +519,37 @@
       # When running under gdb, we cannot do piping and grepping...
       $cmdline "$@"
     else
-      trap 'kill -INT -$pid' INT
-      $cmdline "$@" 2>&1 & pid=$!
-      wait $pid
-      # Add extra detail if time out is enabled.
-      if [ ${PIPESTATUS[0]} = 124 ] && [ "$TIME_OUT" = "y" ]; then
-        echo -e "\e[91mTEST TIMED OUT!\e[0m" >&2
+      if [ "$TIME_OUT" != "gdb" ]; then
+        trap 'kill -INT -$pid' INT
+        $cmdline "$@" 2>&1 & pid=$!
+        wait $pid
+        # Add extra detail if time out is enabled.
+        if [ ${PIPESTATUS[0]} = 124 ] && [ "$TIME_OUT" = "timeout" ]; then
+          echo -e "\e[91mTEST TIMED OUT!\e[0m" >&2
+        fi
+      else
+        # With a thread dump that uses gdb if a timeout.
+        trap 'kill -INT -$pid' INT
+        $cmdline "$@" 2>&1 & pid=$!
+        # Spawn a watcher process.
+        ( sleep $TIME_OUT_VALUE && \
+          echo "##### Thread dump using gdb on test timeout" && \
+          ( gdb -q -p $pid --eval-command="info thread" --eval-command="thread apply all bt" \
+                           --eval-command="call exit(124)" --eval-command=quit || \
+            kill $pid )) 2> /dev/null & watcher=$!
+        wait $pid
+        test_exit_status=$?
+        pkill -P $watcher 2> /dev/null # kill the sleep which will in turn end the watcher as well
+        if [ $test_exit_status = 0 ]; then
+          # The test finished normally.
+          exit 0
+        else
+          # The test failed or timed out.
+          if [ $test_exit_status = 124 ]; then
+            # The test timed out.
+            echo -e "\e[91mTEST TIMED OUT!\e[0m" >&2
+          fi
+        fi
       fi
     fi
 fi
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index 1dced32..2ee87e5 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -27,9 +27,9 @@
 showcommands=
 make_command=
 
-if [[ "$TARGET_PRODUCT" == "armv8" ]]; then
-  linker="linker64"
-fi
+case "$TARGET_PRODUCT" in
+  (armv8|mips64r6) linker="linker64";;
+esac
 
 if [[ "$ART_TEST_ANDROID_ROOT" != "" ]]; then
   android_root="$ART_TEST_ANDROID_ROOT"