Merge "Add a new kReservedCapacity to decide when to start GC code."
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index dc53853..02bce41 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -118,8 +118,7 @@
 ART_TARGET_CLANG_arm := false
 ART_TARGET_CLANG_arm64 :=
 ART_TARGET_CLANG_mips :=
-# b/25928358, illegal instruction on mips64r6 with -O0
-ART_TARGET_CLANG_mips64 := false
+ART_TARGET_CLANG_mips64 :=
 ART_TARGET_CLANG_x86 :=
 ART_TARGET_CLANG_x86_64 :=
 
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index ab70367..c9af1c6 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -205,7 +205,7 @@
     LOCAL_DEX_PREOPT_IMAGE_LOCATION := $(TARGET_CORE_IMG_OUT)
     ifneq ($(wildcard $(LOCAL_PATH)/$(2)/main.list),)
       LOCAL_DX_FLAGS := --multi-dex --main-dex-list=$(LOCAL_PATH)/$(2)/main.list --minimal-main-dex
-      LOCAL_JACK_FLAGS := -D jack.dex.output.policy=minimal-multidex -D jack.preprocessor=true -D jack.preprocessor.file=$(LOCAL_PATH)/$(2)/main.jpp -D jack.dex.output.multidex.legacy=true
+      LOCAL_JACK_FLAGS := -D jack.dex.output.policy=minimal-multidex -D jack.preprocessor=true -D jack.preprocessor.file=$(LOCAL_PATH)/$(2)/main.jpp
     endif
     include $(BUILD_JAVA_LIBRARY)
     $(5) := $$(LOCAL_INSTALLED_MODULE)
@@ -221,7 +221,7 @@
     LOCAL_DEX_PREOPT_IMAGE := $(HOST_CORE_IMG_LOCATION)
     ifneq ($(wildcard $(LOCAL_PATH)/$(2)/main.list),)
       LOCAL_DX_FLAGS := --multi-dex --main-dex-list=$(LOCAL_PATH)/$(2)/main.list --minimal-main-dex
-      LOCAL_JACK_FLAGS := -D jack.dex.output.policy=minimal-multidex -D jack.preprocessor=true -D jack.preprocessor.file=$(LOCAL_PATH)/$(2)/main.jpp -D jack.dex.output.multidex.legacy=true
+      LOCAL_JACK_FLAGS := -D jack.dex.output.policy=minimal-multidex -D jack.preprocessor=true -D jack.preprocessor.file=$(LOCAL_PATH)/$(2)/main.jpp
     endif
     include $(BUILD_HOST_DALVIK_JAVA_LIBRARY)
     $(6) := $$(LOCAL_INSTALLED_MODULE)
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index dc2c9c9..81b854e 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -291,6 +291,13 @@
   }
 
   {
+    const char* log_args = "-verbose:collector";
+    LogVerbosity log_verbosity = LogVerbosity();
+    log_verbosity.collector = true;
+    EXPECT_SINGLE_PARSE_VALUE(log_verbosity, log_args, M::Verbose);
+  }
+
+  {
     const char* log_args = "-verbose:oat";
     LogVerbosity log_verbosity = LogVerbosity();
     log_verbosity.oat = true;
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index 740199d..c0a00cc 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -584,6 +584,8 @@
     for (size_t j = 0; j < verbose_options.size(); ++j) {
       if (verbose_options[j] == "class") {
         log_verbosity.class_linker = true;
+      } else if (verbose_options[j] == "collector") {
+        log_verbosity.collector = true;
       } else if (verbose_options[j] == "compiler") {
         log_verbosity.compiler = true;
       } else if (verbose_options[j] == "deopt") {
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 0d65bc7..3cb63e7 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -186,13 +186,7 @@
       } else {
         // Search dex file for localized ssb index, may fail if member's class is a parent
         // of the class mentioned in the dex file and there is no dex cache entry.
-        std::string temp;
-        const DexFile::TypeId* type_id =
-           dex_file->FindTypeId(resolved_member->GetDeclaringClass()->GetDescriptor(&temp));
-        if (type_id != nullptr) {
-          // medium path, needs check of static storage base being initialized
-          storage_idx = dex_file->GetIndexForTypeId(*type_id);
-        }
+        storage_idx = resolved_member->GetDeclaringClass()->FindTypeIndexInOtherDexFile(*dex_file);
       }
       if (storage_idx != DexFile::kDexNoIndex) {
         *storage_index = storage_idx;
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 670fe94..a51dd32 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -378,7 +378,6 @@
       compiled_method_storage_(swap_fd),
       profile_compilation_info_(profile_compilation_info) {
   DCHECK(compiler_options_ != nullptr);
-  DCHECK(verification_results_ != nullptr);
   DCHECK(method_inliner_map_ != nullptr);
 
   compiler_->Init();
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 5e35cbb..d8f23f7 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -138,6 +138,7 @@
       REQUIRES(!compiled_methods_lock_, !compiled_classes_lock_);
 
   VerificationResults* GetVerificationResults() const {
+    DCHECK(Runtime::Current()->IsAotCompiler());
     return verification_results_;
   }
 
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 73574ba..d50528e 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -124,7 +124,10 @@
   {
     ScopedObjectAccess soa(Thread::Current());
     PruneNonImageClasses();  // Remove junk
-    ComputeLazyFieldsForImageClasses();  // Add useful information
+    if (!compile_app_image_) {
+      // Avoid for app image since this may increase RAM and image size.
+      ComputeLazyFieldsForImageClasses();  // Add useful information
+    }
   }
   heap->CollectGarbage(false);  // Remove garbage.
 
@@ -735,20 +738,20 @@
   return IsBootClassLoaderClass(klass) && !IsInBootImage(klass);
 }
 
-bool ImageWriter::ContainsBootClassLoaderNonImageClass(mirror::Class* klass) {
+bool ImageWriter::PruneAppImageClass(mirror::Class* klass) {
   bool early_exit = false;
   std::unordered_set<mirror::Class*> visited;
-  return ContainsBootClassLoaderNonImageClassInternal(klass, &early_exit, &visited);
+  return PruneAppImageClassInternal(klass, &early_exit, &visited);
 }
 
-bool ImageWriter::ContainsBootClassLoaderNonImageClassInternal(
+bool ImageWriter::PruneAppImageClassInternal(
     mirror::Class* klass,
     bool* early_exit,
     std::unordered_set<mirror::Class*>* visited) {
   DCHECK(early_exit != nullptr);
   DCHECK(visited != nullptr);
   DCHECK(compile_app_image_);
-  if (klass == nullptr) {
+  if (klass == nullptr || IsInBootImage(klass)) {
     return false;
   }
   auto found = prune_class_memo_.find(klass);
@@ -762,7 +765,11 @@
     return false;
   }
   visited->emplace(klass);
-  bool result = IsBootClassLoaderNonImageClass(klass);
+  bool result = IsBootClassLoaderClass(klass);
+  std::string temp;
+  // Prune if not an image class, this handles any broken sets of image classes such as having a
+  // class in the set but not it's superclass.
+  result = result || !compiler_driver_.IsImageClass(klass->GetDescriptor(&temp));
   bool my_early_exit = false;  // Only for ourselves, ignore caller.
   // Remove classes that failed to verify since we don't want to have java.lang.VerifyError in the
   // app image.
@@ -775,17 +782,15 @@
     // Check interfaces since these wont be visited through VisitReferences.)
     mirror::IfTable* if_table = klass->GetIfTable();
     for (size_t i = 0, num_interfaces = klass->GetIfTableCount(); i < num_interfaces; ++i) {
-      result = result || ContainsBootClassLoaderNonImageClassInternal(
-          if_table->GetInterface(i),
-          &my_early_exit,
-          visited);
+      result = result || PruneAppImageClassInternal(if_table->GetInterface(i),
+                                                    &my_early_exit,
+                                                    visited);
     }
   }
   if (klass->IsObjectArrayClass()) {
-    result = result || ContainsBootClassLoaderNonImageClassInternal(
-        klass->GetComponentType(),
-        &my_early_exit,
-        visited);
+    result = result || PruneAppImageClassInternal(klass->GetComponentType(),
+                                                  &my_early_exit,
+                                                  visited);
   }
   // Check static fields and their classes.
   size_t num_static_fields = klass->NumReferenceStaticFields();
@@ -798,27 +803,22 @@
       mirror::Object* ref = klass->GetFieldObject<mirror::Object>(field_offset);
       if (ref != nullptr) {
         if (ref->IsClass()) {
-          result = result ||
-                   ContainsBootClassLoaderNonImageClassInternal(
-                       ref->AsClass(),
-                       &my_early_exit,
-                       visited);
+          result = result || PruneAppImageClassInternal(ref->AsClass(),
+                                                        &my_early_exit,
+                                                        visited);
+        } else {
+          result = result || PruneAppImageClassInternal(ref->GetClass(),
+                                                        &my_early_exit,
+                                                        visited);
         }
-        result = result ||
-                 ContainsBootClassLoaderNonImageClassInternal(
-                     ref->GetClass(),
-                     &my_early_exit,
-                     visited);
       }
       field_offset = MemberOffset(field_offset.Uint32Value() +
                                   sizeof(mirror::HeapReference<mirror::Object>));
     }
   }
-  result = result ||
-           ContainsBootClassLoaderNonImageClassInternal(
-               klass->GetSuperClass(),
-               &my_early_exit,
-               visited);
+  result = result || PruneAppImageClassInternal(klass->GetSuperClass(),
+                                                &my_early_exit,
+                                                visited);
   // Erase the element we stored earlier since we are exiting the function.
   auto it = visited->find(klass);
   DCHECK(it != visited->end());
@@ -837,15 +837,21 @@
   if (klass == nullptr) {
     return false;
   }
+  if (compile_app_image_ && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass)) {
+    // Already in boot image, return true.
+    return true;
+  }
+  std::string temp;
+  if (!compiler_driver_.IsImageClass(klass->GetDescriptor(&temp))) {
+    return false;
+  }
   if (compile_app_image_) {
     // For app images, we need to prune boot loader classes that are not in the boot image since
     // these may have already been loaded when the app image is loaded.
     // Keep classes in the boot image space since we don't want to re-resolve these.
-    return Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass) ||
-        !ContainsBootClassLoaderNonImageClass(klass);
+    return !PruneAppImageClass(klass);
   }
-  std::string temp;
-  return compiler_driver_.IsImageClass(klass->GetDescriptor(&temp));
+  return true;
 }
 
 class NonImageClassesVisitor : public ClassVisitor {
@@ -873,6 +879,7 @@
   class_linker->VisitClasses(&visitor);
 
   // Remove the undesired classes from the class roots.
+  VLOG(compiler) << "Pruning " << visitor.classes_to_prune_.size() << " classes";
   for (mirror::Class* klass : visitor.classes_to_prune_) {
     std::string temp;
     const char* name = klass->GetDescriptor(&temp);
@@ -891,10 +898,10 @@
   ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);  // For ClassInClassTable
   ReaderMutexLock mu2(self, *class_linker->DexLock());
   for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
-    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
-    if (dex_cache == nullptr) {
+    if (self->IsJWeakCleared(data.weak_root)) {
       continue;
     }
+    mirror::DexCache* dex_cache = self->DecodeJObject(data.weak_root)->AsDexCache();
     for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) {
       Class* klass = dex_cache->GetResolvedType(i);
       if (klass != nullptr && !KeepClass(klass)) {
@@ -907,10 +914,10 @@
           mirror::DexCache::GetElementPtrSize(resolved_methods, i, target_ptr_size_);
       DCHECK(method != nullptr) << "Expected resolution method instead of null method";
       mirror::Class* declaring_class = method->GetDeclaringClass();
-      // Miranda methods may be held live by a class which was not an image class but have a
+      // Copied methods may be held live by a class which was not an image class but have a
       // declaring class which is an image class. Set it to the resolution method to be safe and
       // prevent dangling pointers.
-      if (method->IsMiranda() || !KeepClass(declaring_class)) {
+      if (method->MightBeCopied() || !KeepClass(declaring_class)) {
         mirror::DexCache::SetElementPtrSize(resolved_methods,
                                             i,
                                             resolution_method,
@@ -1820,12 +1827,16 @@
 }
 
 template <typename T>
-T* ImageWriter::NativeLocationInImage(T* obj, const char* oat_filename) {
+T* ImageWriter::NativeLocationInImage(T* obj) {
   if (obj == nullptr || IsInBootImage(obj)) {
     return obj;
   } else {
-    ImageInfo& image_info = GetImageInfo(oat_filename);
-    return reinterpret_cast<T*>(image_info.image_begin_ + NativeOffsetInImage(obj));
+    auto it = native_object_relocations_.find(obj);
+    CHECK(it != native_object_relocations_.end()) << obj << " spaces "
+        << Runtime::Current()->GetHeap()->DumpSpaces();
+    const NativeObjectRelocation& relocation = it->second;
+    ImageInfo& image_info = GetImageInfo(relocation.oat_filename);
+    return reinterpret_cast<T*>(image_info.image_begin_ + relocation.offset);
   }
 }
 
@@ -1842,33 +1853,19 @@
 
 class NativeLocationVisitor {
  public:
-  explicit NativeLocationVisitor(ImageWriter* image_writer, const char* oat_filename)
-      : image_writer_(image_writer), oat_filename_(oat_filename) {}
+  explicit NativeLocationVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
 
   template <typename T>
   T* operator()(T* ptr) const SHARED_REQUIRES(Locks::mutator_lock_) {
-    return image_writer_->NativeLocationInImage(ptr, oat_filename_);
-  }
-
-  ArtMethod* operator()(ArtMethod* method) const SHARED_REQUIRES(Locks::mutator_lock_) {
-    const char* oat_filename = method->IsRuntimeMethod() ? image_writer_->GetDefaultOatFilename() :
-        image_writer_->GetOatFilenameForDexCache(method->GetDexCache());
-    return image_writer_->NativeLocationInImage(method, oat_filename);
-  }
-
-  ArtField* operator()(ArtField* field) const SHARED_REQUIRES(Locks::mutator_lock_) {
-    const char* oat_filename = image_writer_->GetOatFilenameForDexCache(field->GetDexCache());
-    return image_writer_->NativeLocationInImage(field, oat_filename);
+    return image_writer_->NativeLocationInImage(ptr);
   }
 
  private:
   ImageWriter* const image_writer_;
-  const char* oat_filename_;
 };
 
 void ImageWriter::FixupClass(mirror::Class* orig, mirror::Class* copy) {
-  const char* oat_filename = GetOatFilename(orig);
-  orig->FixupNativePointers(copy, target_ptr_size_, NativeLocationVisitor(this, oat_filename));
+  orig->FixupNativePointers(copy, target_ptr_size_, NativeLocationVisitor(this));
   FixupClassVisitor visitor(this, copy);
   static_cast<mirror::Object*>(orig)->VisitReferences(visitor, visitor);
 
@@ -1952,11 +1949,10 @@
   // 64-bit values here, clearing the top 32 bits for 32-bit targets. The zero-extension is
   // done by casting to the unsigned type uintptr_t before casting to int64_t, i.e.
   //     static_cast<int64_t>(reinterpret_cast<uintptr_t>(image_begin_ + offset))).
-  const char* oat_filename = GetOatFilenameForDexCache(orig_dex_cache);
   GcRoot<mirror::String>* orig_strings = orig_dex_cache->GetStrings();
   if (orig_strings != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::StringsOffset(),
-                                               NativeLocationInImage(orig_strings, oat_filename),
+                                               NativeLocationInImage(orig_strings),
                                                /*pointer size*/8u);
     orig_dex_cache->FixupStrings(NativeCopyLocation(orig_strings, orig_dex_cache),
                                  ImageAddressVisitor(this));
@@ -1964,7 +1960,7 @@
   GcRoot<mirror::Class>* orig_types = orig_dex_cache->GetResolvedTypes();
   if (orig_types != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedTypesOffset(),
-                                               NativeLocationInImage(orig_types, oat_filename),
+                                               NativeLocationInImage(orig_types),
                                                /*pointer size*/8u);
     orig_dex_cache->FixupResolvedTypes(NativeCopyLocation(orig_types, orig_dex_cache),
                                        ImageAddressVisitor(this));
@@ -1972,32 +1968,25 @@
   ArtMethod** orig_methods = orig_dex_cache->GetResolvedMethods();
   if (orig_methods != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedMethodsOffset(),
-                                               NativeLocationInImage(orig_methods, oat_filename),
+                                               NativeLocationInImage(orig_methods),
                                                /*pointer size*/8u);
     ArtMethod** copy_methods = NativeCopyLocation(orig_methods, orig_dex_cache);
     for (size_t i = 0, num = orig_dex_cache->NumResolvedMethods(); i != num; ++i) {
       ArtMethod* orig = mirror::DexCache::GetElementPtrSize(orig_methods, i, target_ptr_size_);
-      const char* method_oat_filename;
-      if (orig == nullptr || orig->IsRuntimeMethod()) {
-        method_oat_filename = default_oat_filename_;
-      } else {
-        method_oat_filename = GetOatFilenameForDexCache(orig->GetDexCache());
-      }
-      ArtMethod* copy = NativeLocationInImage(orig, method_oat_filename);
+      // NativeLocationInImage also handles runtime methods since these have relocation info.
+      ArtMethod* copy = NativeLocationInImage(orig);
       mirror::DexCache::SetElementPtrSize(copy_methods, i, copy, target_ptr_size_);
     }
   }
   ArtField** orig_fields = orig_dex_cache->GetResolvedFields();
   if (orig_fields != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedFieldsOffset(),
-                                               NativeLocationInImage(orig_fields, oat_filename),
+                                               NativeLocationInImage(orig_fields),
                                                /*pointer size*/8u);
     ArtField** copy_fields = NativeCopyLocation(orig_fields, orig_dex_cache);
     for (size_t i = 0, num = orig_dex_cache->NumResolvedFields(); i != num; ++i) {
       ArtField* orig = mirror::DexCache::GetElementPtrSize(orig_fields, i, target_ptr_size_);
-      const char* field_oat_filename =
-          orig == nullptr ? default_oat_filename_ : GetOatFilenameForDexCache(orig->GetDexCache());
-      ArtField* copy = NativeLocationInImage(orig, field_oat_filename);
+      ArtField* copy = NativeLocationInImage(orig);
       mirror::DexCache::SetElementPtrSize(copy_fields, i, copy, target_ptr_size_);
     }
   }
@@ -2089,20 +2078,10 @@
 
   copy->SetDeclaringClass(GetImageAddress(orig->GetDeclaringClassUnchecked()));
 
-  const char* oat_filename;
-  if (orig->IsRuntimeMethod() || compile_app_image_) {
-    oat_filename = default_oat_filename_;
-  } else {
-    auto it = dex_file_oat_filename_map_.find(orig->GetDexFile());
-    DCHECK(it != dex_file_oat_filename_map_.end()) << orig->GetDexFile()->GetLocation();
-    oat_filename = it->second;
-  }
   ArtMethod** orig_resolved_methods = orig->GetDexCacheResolvedMethods(target_ptr_size_);
-  copy->SetDexCacheResolvedMethods(NativeLocationInImage(orig_resolved_methods, oat_filename),
-                                   target_ptr_size_);
+  copy->SetDexCacheResolvedMethods(NativeLocationInImage(orig_resolved_methods), target_ptr_size_);
   GcRoot<mirror::Class>* orig_resolved_types = orig->GetDexCacheResolvedTypes(target_ptr_size_);
-  copy->SetDexCacheResolvedTypes(NativeLocationInImage(orig_resolved_types, oat_filename),
-                                 target_ptr_size_);
+  copy->SetDexCacheResolvedTypes(NativeLocationInImage(orig_resolved_types), target_ptr_size_);
 
   // OatWriter replaces the code_ with an offset value. Here we re-adjust to a pointer relative to
   // oat_begin_
@@ -2324,6 +2303,8 @@
     image_info_map_.emplace(oat_filename, ImageInfo());
   }
   std::fill_n(image_methods_, arraysize(image_methods_), nullptr);
+  CHECK_EQ(compile_app_image, !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty())
+      << "Compiling a boot image should occur iff there are no boot image spaces loaded";
 }
 
 ImageWriter::ImageInfo::ImageInfo()
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 9371d9f..ee204c5 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -410,16 +410,18 @@
   // Return true if klass is loaded by the boot class loader but not in the boot image.
   bool IsBootClassLoaderNonImageClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Return true if klass depends on a boot class loader non image class live. We want to prune
-  // these classes since we do not want any boot class loader classes in the image. This means that
+  // Return true if klass depends on a boot class loader non image class. We want to prune these
+  // classes since we do not want any boot class loader classes in the image. This means that
   // we also cannot have any classes which refer to these boot class loader non image classes.
-  bool ContainsBootClassLoaderNonImageClass(mirror::Class* klass)
+  // PruneAppImageClass also prunes if klass depends on a non-image class according to the compiler
+  // driver.
+  bool PruneAppImageClass(mirror::Class* klass)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // early_exit is true if we had a cyclic dependency anywhere down the chain.
-  bool ContainsBootClassLoaderNonImageClassInternal(mirror::Class* klass,
-                                                    bool* early_exit,
-                                                    std::unordered_set<mirror::Class*>* visited)
+  bool PruneAppImageClassInternal(mirror::Class* klass,
+                                  bool* early_exit,
+                                  std::unordered_set<mirror::Class*>* visited)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   static Bin BinTypeForNativeRelocationType(NativeObjectRelocationType type);
@@ -428,7 +430,7 @@
 
   // Location of where the object will be when the image is loaded at runtime.
   template <typename T>
-  T* NativeLocationInImage(T* obj, const char* oat_filename) SHARED_REQUIRES(Locks::mutator_lock_);
+  T* NativeLocationInImage(T* obj) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Location of where the temporary copy of the object currently is.
   template <typename T>
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 3fe7861..909d682 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -23,10 +23,7 @@
 #include "base/time_utils.h"
 #include "base/timing_logger.h"
 #include "base/unix_file/fd_file.h"
-#include "compiler_callbacks.h"
 #include "debug/elf_debug_writer.h"
-#include "dex/pass_manager.h"
-#include "dex/quick_compiler_callbacks.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
 #include "jit/debugger_interface.h"
@@ -36,7 +33,6 @@
 #include "oat_quick_method_header.h"
 #include "object_lock.h"
 #include "thread_list.h"
-#include "verifier/method_verifier-inl.h"
 
 namespace art {
 namespace jit {
@@ -45,11 +41,10 @@
   return new JitCompiler();
 }
 
-extern "C" void* jit_load(CompilerCallbacks** callbacks, bool* generate_debug_info) {
+extern "C" void* jit_load(bool* generate_debug_info) {
   VLOG(jit) << "loading jit compiler";
   auto* const jit_compiler = JitCompiler::Create();
   CHECK(jit_compiler != nullptr);
-  *callbacks = jit_compiler->GetCompilerCallbacks();
   *generate_debug_info = jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo();
   VLOG(jit) << "Done loading jit compiler";
   return jit_compiler;
@@ -151,14 +146,10 @@
     instruction_set_features_.reset(InstructionSetFeatures::FromCppDefines());
   }
   cumulative_logger_.reset(new CumulativeLogger("jit times"));
-  verification_results_.reset(new VerificationResults(compiler_options_.get()));
   method_inliner_map_.reset(new DexFileToMethodInlinerMap);
-  callbacks_.reset(new QuickCompilerCallbacks(verification_results_.get(),
-                                              method_inliner_map_.get(),
-                                              CompilerCallbacks::CallbackMode::kCompileApp));
   compiler_driver_.reset(new CompilerDriver(
       compiler_options_.get(),
-      verification_results_.get(),
+      /* verification_results */ nullptr,
       method_inliner_map_.get(),
       Compiler::kOptimizing,
       instruction_set,
@@ -251,9 +242,5 @@
   return success;
 }
 
-CompilerCallbacks* JitCompiler::GetCompilerCallbacks() const {
-  return callbacks_.get();
-}
-
 }  // namespace jit
 }  // namespace art
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 894d29e..d3b404a 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -415,7 +415,9 @@
     size_t visited_virtuals = 0;
     // TODO We should also check copied methods in this test.
     for (auto& m : klass->GetDeclaredVirtualMethods(pointer_size)) {
-      EXPECT_FALSE(m.IsMiranda());
+      if (!klass->IsInterface()) {
+        EXPECT_FALSE(m.MightBeCopied());
+      }
       CheckMethod(&m, oat_class.GetOatMethod(method_index), dex_file);
       ++method_index;
       ++visited_virtuals;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index a53a6be..f3c40b1 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -6511,8 +6511,8 @@
   if (value == 0) {
     // Clears upper bits too.
     __ xorl(dest, dest);
-  } else if (value > 0 && IsInt<32>(value)) {
-    // We can use a 32 bit move, as it will zero-extend and is one byte shorter.
+  } else if (IsUint<32>(value)) {
+    // We can use a 32 bit move, as it will zero-extend and is shorter.
     __ movl(dest, Immediate(static_cast<int32_t>(value)));
   } else {
     __ movq(dest, Immediate(value));
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc
index 014353d..7ddabde 100644
--- a/compiler/optimizing/constant_folding.cc
+++ b/compiler/optimizing/constant_folding.cc
@@ -18,8 +18,28 @@
 
 namespace art {
 
-// This visitor tries to simplify operations that yield a constant. For example
-// `input * 0` is replaced by a null constant.
+// This visitor tries to simplify instructions that can be evaluated
+// as constants.
+class HConstantFoldingVisitor : public HGraphDelegateVisitor {
+ public:
+  explicit HConstantFoldingVisitor(HGraph* graph)
+      : HGraphDelegateVisitor(graph) {}
+
+ private:
+  void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
+
+  void VisitUnaryOperation(HUnaryOperation* inst) OVERRIDE;
+  void VisitBinaryOperation(HBinaryOperation* inst) OVERRIDE;
+
+  void VisitTypeConversion(HTypeConversion* inst) OVERRIDE;
+  void VisitDivZeroCheck(HDivZeroCheck* inst) OVERRIDE;
+
+  DISALLOW_COPY_AND_ASSIGN(HConstantFoldingVisitor);
+};
+
+// This visitor tries to simplify operations with an absorbing input,
+// yielding a constant. For example `input * 0` is replaced by a
+// null constant.
 class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor {
  public:
   explicit InstructionWithAbsorbingInputSimplifier(HGraph* graph) : HGraphVisitor(graph) {}
@@ -44,59 +64,69 @@
   void VisitXor(HXor* instruction) OVERRIDE;
 };
 
+
 void HConstantFolding::Run() {
-  InstructionWithAbsorbingInputSimplifier simplifier(graph_);
+  HConstantFoldingVisitor visitor(graph_);
   // Process basic blocks in reverse post-order in the dominator tree,
   // so that an instruction turned into a constant, used as input of
   // another instruction, may possibly be used to turn that second
   // instruction into a constant as well.
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
-    // Traverse this block's instructions in (forward) order and
-    // replace the ones that can be statically evaluated by a
-    // compile-time counterpart.
-    for (HInstructionIterator inst_it(block->GetInstructions());
-         !inst_it.Done(); inst_it.Advance()) {
-      HInstruction* inst = inst_it.Current();
-      if (inst->IsBinaryOperation()) {
-        // Constant folding: replace `op(a, b)' with a constant at
-        // compile time if `a' and `b' are both constants.
-        HConstant* constant = inst->AsBinaryOperation()->TryStaticEvaluation();
-        if (constant != nullptr) {
-          inst->ReplaceWith(constant);
-          inst->GetBlock()->RemoveInstruction(inst);
-        } else {
-          inst->Accept(&simplifier);
-        }
-      } else if (inst->IsUnaryOperation()) {
-        // Constant folding: replace `op(a)' with a constant at compile
-        // time if `a' is a constant.
-        HConstant* constant = inst->AsUnaryOperation()->TryStaticEvaluation();
-        if (constant != nullptr) {
-          inst->ReplaceWith(constant);
-          inst->GetBlock()->RemoveInstruction(inst);
-        }
-      } else if (inst->IsTypeConversion()) {
-        // Constant folding: replace `TypeConversion(a)' with a constant at
-        // compile time if `a' is a constant.
-        HConstant* constant = inst->AsTypeConversion()->TryStaticEvaluation();
-        if (constant != nullptr) {
-          inst->ReplaceWith(constant);
-          inst->GetBlock()->RemoveInstruction(inst);
-        }
-      } else if (inst->IsDivZeroCheck()) {
-        // We can safely remove the check if the input is a non-null constant.
-        HDivZeroCheck* check = inst->AsDivZeroCheck();
-        HInstruction* check_input = check->InputAt(0);
-        if (check_input->IsConstant() && !check_input->AsConstant()->IsZero()) {
-          check->ReplaceWith(check_input);
-          check->GetBlock()->RemoveInstruction(check);
-        }
-      }
-    }
+  visitor.VisitReversePostOrder();
+}
+
+
+void HConstantFoldingVisitor::VisitBasicBlock(HBasicBlock* block) {
+  // Traverse this block's instructions (phis don't need to be
+  // processed) in (forward) order and replace the ones that can be
+  // statically evaluated by a compile-time counterpart.
+  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+    it.Current()->Accept(this);
   }
 }
 
+void HConstantFoldingVisitor::VisitUnaryOperation(HUnaryOperation* inst) {
+  // Constant folding: replace `op(a)' with a constant at compile
+  // time if `a' is a constant.
+  HConstant* constant = inst->TryStaticEvaluation();
+  if (constant != nullptr) {
+    inst->ReplaceWith(constant);
+    inst->GetBlock()->RemoveInstruction(inst);
+  }
+}
+
+void HConstantFoldingVisitor::VisitBinaryOperation(HBinaryOperation* inst) {
+  // Constant folding: replace `op(a, b)' with a constant at
+  // compile time if `a' and `b' are both constants.
+  HConstant* constant = inst->TryStaticEvaluation();
+  if (constant != nullptr) {
+    inst->ReplaceWith(constant);
+    inst->GetBlock()->RemoveInstruction(inst);
+  } else {
+    InstructionWithAbsorbingInputSimplifier simplifier(GetGraph());
+    inst->Accept(&simplifier);
+  }
+}
+
+void HConstantFoldingVisitor::VisitTypeConversion(HTypeConversion* inst) {
+  // Constant folding: replace `TypeConversion(a)' with a constant at
+  // compile time if `a' is a constant.
+  HConstant* constant = inst->AsTypeConversion()->TryStaticEvaluation();
+  if (constant != nullptr) {
+    inst->ReplaceWith(constant);
+    inst->GetBlock()->RemoveInstruction(inst);
+  }
+}
+
+void HConstantFoldingVisitor::VisitDivZeroCheck(HDivZeroCheck* inst) {
+  // We can safely remove the check if the input is a non-null constant.
+  HInstruction* check_input = inst->InputAt(0);
+  if (check_input->IsConstant() && !check_input->AsConstant()->IsZero()) {
+    inst->ReplaceWith(check_input);
+    inst->GetBlock()->RemoveInstruction(inst);
+  }
+}
+
+
 void InstructionWithAbsorbingInputSimplifier::VisitShift(HBinaryOperation* instruction) {
   DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr());
   HInstruction* left = instruction->GetLeft();
diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h
index 2698b2d..e10b1d6 100644
--- a/compiler/optimizing/constant_folding.h
+++ b/compiler/optimizing/constant_folding.h
@@ -26,13 +26,20 @@
  * Optimization pass performing a simple constant-expression
  * evaluation on the SSA form.
  *
+ * Note that graph simplifications producing a constant should be
+ * implemented in art::HConstantFolding, while graph simplifications
+ * not producing constants should be implemented in
+ * art::InstructionSimplifier.  (This convention is a choice that was
+ * made during the development of these parts of the compiler and is
+ * not bound by any technical requirement.)
+ *
  * This class is named art::HConstantFolding to avoid name
  * clashes with the art::ConstantPropagation class defined in
  * compiler/dex/post_opt_passes.h.
  */
 class HConstantFolding : public HOptimization {
  public:
-  explicit HConstantFolding(HGraph* graph, const char* name = kConstantFoldingPassName)
+  HConstantFolding(HGraph* graph, const char* name = kConstantFoldingPassName)
       : HOptimization(graph, name) {}
 
   void Run() OVERRIDE;
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index e6e9177..4a49c83 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -593,8 +593,9 @@
       HBasicBlock* predecessor = loop_header->GetPredecessors()[i];
       if (!loop_information->IsBackEdge(*predecessor)) {
         AddError(StringPrintf(
-            "Loop header %d has multiple incoming (non back edge) blocks.",
-            id));
+            "Loop header %d has multiple incoming (non back edge) blocks: %d.",
+            id,
+            predecessor->GetBlockId()));
       }
     }
   }
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index a5acab8..02a1acc 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -190,28 +190,34 @@
   }
 }
 
-static uint32_t FindClassIndexIn(mirror::Class* cls, const DexFile& dex_file)
+static uint32_t FindClassIndexIn(mirror::Class* cls,
+                                 const DexFile& dex_file,
+                                 Handle<mirror::DexCache> dex_cache)
     SHARED_REQUIRES(Locks::mutator_lock_) {
+  uint32_t index = DexFile::kDexNoIndex;
   if (cls->GetDexCache() == nullptr) {
-    DCHECK(cls->IsArrayClass());
-    // TODO: find the class in `dex_file`.
-    return DexFile::kDexNoIndex;
+    DCHECK(cls->IsArrayClass()) << PrettyClass(cls);
+    index = cls->FindTypeIndexInOtherDexFile(dex_file);
   } else if (cls->GetDexTypeIndex() == DexFile::kDexNoIndex16) {
+    DCHECK(cls->IsProxyClass()) << PrettyClass(cls);
     // TODO: deal with proxy classes.
-    return DexFile::kDexNoIndex;
   } else if (IsSameDexFile(cls->GetDexFile(), dex_file)) {
+    index = cls->GetDexTypeIndex();
+  } else {
+    index = cls->FindTypeIndexInOtherDexFile(dex_file);
+  }
+
+  if (index != DexFile::kDexNoIndex) {
     // Update the dex cache to ensure the class is in. The generated code will
     // consider it is. We make it safe by updating the dex cache, as other
     // dex files might also load the class, and there is no guarantee the dex
     // cache of the dex file of the class will be updated.
-    if (cls->GetDexCache()->GetResolvedType(cls->GetDexTypeIndex()) == nullptr) {
-      cls->GetDexCache()->SetResolvedType(cls->GetDexTypeIndex(), cls);
+    if (dex_cache->GetResolvedType(index) == nullptr) {
+      dex_cache->SetResolvedType(index, cls);
     }
-    return cls->GetDexTypeIndex();
-  } else {
-    // TODO: find the class in `dex_file`.
-    return DexFile::kDexNoIndex;
   }
+
+  return index;
 }
 
 bool HInliner::TryInline(HInvoke* invoke_instruction) {
@@ -303,7 +309,7 @@
                                                    uint32_t dex_pc) const {
   ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0);
   DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_");
-  return new (graph_->GetArena()) HInstanceFieldGet(
+  HInstanceFieldGet* result = new (graph_->GetArena()) HInstanceFieldGet(
       receiver,
       Primitive::kPrimNot,
       field->GetOffset(),
@@ -313,6 +319,9 @@
       *field->GetDexFile(),
       handles_->NewHandle(field->GetDexCache()),
       dex_pc);
+  // The class of a field is effectively final, and does not have any memory dependencies.
+  result->SetSideEffects(SideEffects::None());
+  return result;
 }
 
 bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
@@ -322,7 +331,8 @@
       << invoke_instruction->DebugName();
 
   const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
-  uint32_t class_index = FindClassIndexIn(ic.GetMonomorphicType(), caller_dex_file);
+  uint32_t class_index = FindClassIndexIn(
+      ic.GetMonomorphicType(), caller_dex_file, caller_compilation_unit_.GetDexCache());
   if (class_index == DexFile::kDexNoIndex) {
     VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
                    << " from inline cache is not inlined because its class is not"
@@ -350,32 +360,15 @@
   }
 
   // We successfully inlined, now add a guard.
-  HInstanceFieldGet* receiver_class = BuildGetReceiverClass(
-      class_linker, receiver, invoke_instruction->GetDexPc());
-
   bool is_referrer =
       (ic.GetMonomorphicType() == outermost_graph_->GetArtMethod()->GetDeclaringClass());
-  HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(),
-                                                               class_index,
-                                                               caller_dex_file,
-                                                               is_referrer,
-                                                               invoke_instruction->GetDexPc(),
-                                                               /* needs_access_check */ false,
-                                                               /* is_in_dex_cache */ true);
-
-  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class);
-  HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
-      compare, invoke_instruction->GetDexPc());
-  // TODO: Extend reference type propagation to understand the guard.
-  if (cursor != nullptr) {
-    bb_cursor->InsertInstructionAfter(receiver_class, cursor);
-  } else {
-    bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction());
-  }
-  bb_cursor->InsertInstructionAfter(load_class, receiver_class);
-  bb_cursor->InsertInstructionAfter(compare, load_class);
-  bb_cursor->InsertInstructionAfter(deoptimize, compare);
-  deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+  AddTypeGuard(receiver,
+               cursor,
+               bb_cursor,
+               class_index,
+               is_referrer,
+               invoke_instruction,
+               /* with_deoptimization */ true);
 
   // Run type propagation to get the guard typed, and eventually propagate the
   // type of the receiver.
@@ -386,11 +379,219 @@
   return true;
 }
 
+HInstruction* HInliner::AddTypeGuard(HInstruction* receiver,
+                                     HInstruction* cursor,
+                                     HBasicBlock* bb_cursor,
+                                     uint32_t class_index,
+                                     bool is_referrer,
+                                     HInstruction* invoke_instruction,
+                                     bool with_deoptimization) {
+  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+  HInstanceFieldGet* receiver_class = BuildGetReceiverClass(
+      class_linker, receiver, invoke_instruction->GetDexPc());
+
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+  // Note that we will just compare the classes, so we don't need Java semantics access checks.
+  // Also, the caller of `AddTypeGuard` must have guaranteed that the class is in the dex cache.
+  HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(),
+                                                               class_index,
+                                                               caller_dex_file,
+                                                               is_referrer,
+                                                               invoke_instruction->GetDexPc(),
+                                                               /* needs_access_check */ false,
+                                                               /* is_in_dex_cache */ true);
+
+  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class);
+  // TODO: Extend reference type propagation to understand the guard.
+  if (cursor != nullptr) {
+    bb_cursor->InsertInstructionAfter(receiver_class, cursor);
+  } else {
+    bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction());
+  }
+  bb_cursor->InsertInstructionAfter(load_class, receiver_class);
+  bb_cursor->InsertInstructionAfter(compare, load_class);
+  if (with_deoptimization) {
+    HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
+        compare, invoke_instruction->GetDexPc());
+    bb_cursor->InsertInstructionAfter(deoptimize, compare);
+    deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+  }
+  return compare;
+}
+
 bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
                                         ArtMethod* resolved_method,
                                         const InlineCache& ic) {
   DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface())
       << invoke_instruction->DebugName();
+
+  if (TryInlinePolymorphicCallToSameTarget(invoke_instruction, resolved_method, ic)) {
+    return true;
+  }
+
+  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+  size_t pointer_size = class_linker->GetImagePointerSize();
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+
+  bool all_targets_inlined = true;
+  bool one_target_inlined = false;
+  for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
+    if (ic.GetTypeAt(i) == nullptr) {
+      break;
+    }
+    ArtMethod* method = nullptr;
+    if (invoke_instruction->IsInvokeInterface()) {
+      method = ic.GetTypeAt(i)->FindVirtualMethodForInterface(
+          resolved_method, pointer_size);
+    } else {
+      DCHECK(invoke_instruction->IsInvokeVirtual());
+      method = ic.GetTypeAt(i)->FindVirtualMethodForVirtual(
+          resolved_method, pointer_size);
+    }
+
+    HInstruction* receiver = invoke_instruction->InputAt(0);
+    HInstruction* cursor = invoke_instruction->GetPrevious();
+    HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
+
+    uint32_t class_index = FindClassIndexIn(
+        ic.GetTypeAt(i), caller_dex_file, caller_compilation_unit_.GetDexCache());
+    HInstruction* return_replacement = nullptr;
+    if (class_index == DexFile::kDexNoIndex ||
+        !TryBuildAndInline(invoke_instruction, method, &return_replacement)) {
+      all_targets_inlined = false;
+    } else {
+      one_target_inlined = true;
+      bool is_referrer = (ic.GetTypeAt(i) == outermost_graph_->GetArtMethod()->GetDeclaringClass());
+
+      // If we have inlined all targets before, and this receiver is the last seen,
+      // we deoptimize instead of keeping the original invoke instruction.
+      bool deoptimize = all_targets_inlined &&
+          (i != InlineCache::kIndividualCacheSize - 1) &&
+          (ic.GetTypeAt(i + 1) == nullptr);
+      HInstruction* compare = AddTypeGuard(
+          receiver, cursor, bb_cursor, class_index, is_referrer, invoke_instruction, deoptimize);
+      if (deoptimize) {
+        if (return_replacement != nullptr) {
+          invoke_instruction->ReplaceWith(return_replacement);
+        }
+        invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
+        // Because the inline cache data can be populated concurrently, we force the end of the
+        // iteration. Otherhwise, we could see a new receiver type.
+        break;
+      } else {
+        CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction);
+      }
+    }
+  }
+
+  if (!one_target_inlined) {
+    VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
+                   << " from inline cache is not inlined because none"
+                   << " of its targets could be inlined";
+    return false;
+  }
+  MaybeRecordStat(kInlinedPolymorphicCall);
+
+  // Run type propagation to get the guards typed.
+  ReferenceTypePropagation rtp_fixup(graph_, handles_, /* is_first_run */ false);
+  rtp_fixup.Run();
+  return true;
+}
+
+void HInliner::CreateDiamondPatternForPolymorphicInline(HInstruction* compare,
+                                                        HInstruction* return_replacement,
+                                                        HInstruction* invoke_instruction) {
+  uint32_t dex_pc = invoke_instruction->GetDexPc();
+  HBasicBlock* cursor_block = compare->GetBlock();
+  HBasicBlock* original_invoke_block = invoke_instruction->GetBlock();
+  ArenaAllocator* allocator = graph_->GetArena();
+
+  // Spit the block after the compare: `cursor_block` will now be the start of the diamond,
+  // and the returned block is the start of the then branch (that could contain multiple blocks).
+  HBasicBlock* then = cursor_block->SplitAfterForInlining(compare);
+
+  // Split the block containing the invoke before and after the invoke. The returned block
+  // of the split before will contain the invoke and will be the otherwise branch of
+  // the diamond. The returned block of the split after will be the merge block
+  // of the diamond.
+  HBasicBlock* end_then = invoke_instruction->GetBlock();
+  HBasicBlock* otherwise = end_then->SplitBeforeForInlining(invoke_instruction);
+  HBasicBlock* merge = otherwise->SplitAfterForInlining(invoke_instruction);
+
+  // If the methods we are inlining return a value, we create a phi in the merge block
+  // that will have the `invoke_instruction and the `return_replacement` as inputs.
+  if (return_replacement != nullptr) {
+    HPhi* phi = new (allocator) HPhi(
+        allocator, kNoRegNumber, 0, HPhi::ToPhiType(invoke_instruction->GetType()), dex_pc);
+    merge->AddPhi(phi);
+    invoke_instruction->ReplaceWith(phi);
+    phi->AddInput(return_replacement);
+    phi->AddInput(invoke_instruction);
+  }
+
+  // Add the control flow instructions.
+  otherwise->AddInstruction(new (allocator) HGoto(dex_pc));
+  end_then->AddInstruction(new (allocator) HGoto(dex_pc));
+  cursor_block->AddInstruction(new (allocator) HIf(compare, dex_pc));
+
+  // Add the newly created blocks to the graph.
+  graph_->AddBlock(then);
+  graph_->AddBlock(otherwise);
+  graph_->AddBlock(merge);
+
+  // Set up successor (and implictly predecessor) relations.
+  cursor_block->AddSuccessor(otherwise);
+  cursor_block->AddSuccessor(then);
+  end_then->AddSuccessor(merge);
+  otherwise->AddSuccessor(merge);
+
+  // Set up dominance information.
+  then->SetDominator(cursor_block);
+  cursor_block->AddDominatedBlock(then);
+  otherwise->SetDominator(cursor_block);
+  cursor_block->AddDominatedBlock(otherwise);
+  merge->SetDominator(cursor_block);
+  cursor_block->AddDominatedBlock(merge);
+
+  // Update the revert post order.
+  size_t index = IndexOfElement(graph_->reverse_post_order_, cursor_block);
+  MakeRoomFor(&graph_->reverse_post_order_, 1, index);
+  graph_->reverse_post_order_[++index] = then;
+  index = IndexOfElement(graph_->reverse_post_order_, end_then);
+  MakeRoomFor(&graph_->reverse_post_order_, 2, index);
+  graph_->reverse_post_order_[++index] = otherwise;
+  graph_->reverse_post_order_[++index] = merge;
+
+  // Set the loop information of the newly created blocks.
+  HLoopInformation* loop_info = cursor_block->GetLoopInformation();
+  if (loop_info != nullptr) {
+    then->SetLoopInformation(cursor_block->GetLoopInformation());
+    merge->SetLoopInformation(cursor_block->GetLoopInformation());
+    otherwise->SetLoopInformation(cursor_block->GetLoopInformation());
+    for (HLoopInformationOutwardIterator loop_it(*cursor_block);
+         !loop_it.Done();
+         loop_it.Advance()) {
+      loop_it.Current()->Add(then);
+      loop_it.Current()->Add(merge);
+      loop_it.Current()->Add(otherwise);
+    }
+    // In case the original invoke location was a back edge, we need to update
+    // the loop to now have the merge block as a back edge.
+    if (loop_info->IsBackEdge(*original_invoke_block)) {
+      loop_info->RemoveBackEdge(original_invoke_block);
+      loop_info->AddBackEdge(merge);
+    }
+  }
+
+  // Set the try/catch information of the newly created blocks.
+  then->SetTryCatchInformation(cursor_block->GetTryCatchInformation());
+  merge->SetTryCatchInformation(cursor_block->GetTryCatchInformation());
+  otherwise->SetTryCatchInformation(cursor_block->GetTryCatchInformation());
+}
+
+bool HInliner::TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction,
+                                                    ArtMethod* resolved_method,
+                                                    const InlineCache& ic) {
   // This optimization only works under JIT for now.
   DCHECK(Runtime::Current()->UseJit());
   if (graph_->GetInstructionSet() == kMips64) {
@@ -557,8 +758,9 @@
 
   if (!method->GetDeclaringClass()->IsVerified()) {
     uint16_t class_def_idx = method->GetDeclaringClass()->GetDexClassDefIndex();
-    if (!compiler_driver_->IsMethodVerifiedWithoutFailures(
-          method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
+    if (Runtime::Current()->UseJit() ||
+        !compiler_driver_->IsMethodVerifiedWithoutFailures(
+            method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
       VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
                      << " couldn't be verified, so it cannot be inlined";
       return false;
@@ -781,16 +983,16 @@
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
   Handle<mirror::DexCache> dex_cache(handles_->NewHandle(resolved_method->GetDexCache()));
   DexCompilationUnit dex_compilation_unit(
-    nullptr,
-    caller_compilation_unit_.GetClassLoader(),
-    class_linker,
-    callee_dex_file,
-    code_item,
-    resolved_method->GetDeclaringClass()->GetDexClassDefIndex(),
-    method_index,
-    resolved_method->GetAccessFlags(),
-    compiler_driver_->GetVerifiedMethod(&callee_dex_file, method_index),
-    dex_cache);
+      nullptr,
+      caller_compilation_unit_.GetClassLoader(),
+      class_linker,
+      callee_dex_file,
+      code_item,
+      resolved_method->GetDeclaringClass()->GetDexClassDefIndex(),
+      method_index,
+      resolved_method->GetAccessFlags(),
+      /* verified_method */ nullptr,
+      dex_cache);
 
   bool requires_ctor_barrier = false;
 
@@ -883,7 +1085,7 @@
   HConstantFolding fold(callee_graph);
   HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_);
   InstructionSimplifier simplify(callee_graph, stats_);
-  IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_);
+  IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_, stats_);
 
   HOptimization* optimizations[] = {
     &intrinsics,
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 9dd9bf5..cdb2167 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -101,12 +101,18 @@
                                 const InlineCache& ic)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Try to inline targets of a polymorphic call. Currently unimplemented.
+  // Try to inline targets of a polymorphic call.
   bool TryInlinePolymorphicCall(HInvoke* invoke_instruction,
                                 ArtMethod* resolved_method,
                                 const InlineCache& ic)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
+  bool TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction,
+                                            ArtMethod* resolved_method,
+                                            const InlineCache& ic)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+
   HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker,
                                            HInstruction* receiver,
                                            uint32_t dex_pc) const
@@ -118,6 +124,57 @@
                                 bool do_rtp)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Add a type guard on the given `receiver`. This will add to the graph:
+  // i0 = HFieldGet(receiver, klass)
+  // i1 = HLoadClass(class_index, is_referrer)
+  // i2 = HNotEqual(i0, i1)
+  //
+  // And if `with_deoptimization` is true:
+  // HDeoptimize(i2)
+  //
+  // The method returns the `HNotEqual`, that will be used for polymorphic inlining.
+  HInstruction* AddTypeGuard(HInstruction* receiver,
+                             HInstruction* cursor,
+                             HBasicBlock* bb_cursor,
+                             uint32_t class_index,
+                             bool is_referrer,
+                             HInstruction* invoke_instruction,
+                             bool with_deoptimization)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  /*
+   * Ad-hoc implementation for implementing a diamond pattern in the graph for
+   * polymorphic inlining:
+   * 1) `compare` becomes the input of the new `HIf`.
+   * 2) Everything up until `invoke_instruction` is in the then branch (could
+   *    contain multiple blocks).
+   * 3) `invoke_instruction` is moved to the otherwise block.
+   * 4) If `return_replacement` is not null, the merge block will have
+   *    a phi whose inputs are `return_replacement` and `invoke_instruction`.
+   *
+   * Before:
+   *             Block1
+   *             compare
+   *              ...
+   *         invoke_instruction
+   *
+   * After:
+   *            Block1
+   *            compare
+   *              if
+   *          /        \
+   *         /          \
+   *   Then block    Otherwise block
+   *      ...       invoke_instruction
+   *       \              /
+   *        \            /
+   *          Merge block
+   *  phi(return_replacement, invoke_instruction)
+   */
+  void CreateDiamondPatternForPolymorphicInline(HInstruction* compare,
+                                                HInstruction* return_replacement,
+                                                HInstruction* invoke_instruction);
+
   HGraph* const outermost_graph_;
   const DexCompilationUnit& outer_compilation_unit_;
   const DexCompilationUnit& caller_compilation_unit_;
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
index cc4b6f6..7905104 100644
--- a/compiler/optimizing/instruction_simplifier.h
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -25,6 +25,13 @@
 
 /**
  * Implements optimizations specific to each instruction.
+ *
+ * Note that graph simplifications producing a constant should be
+ * implemented in art::HConstantFolding, while graph simplifications
+ * not producing constants should be implemented in
+ * art::InstructionSimplifier.  (This convention is a choice that was
+ * made during the development of these parts of the compiler and is
+ * not bound by any technical requirement.)
  */
 class InstructionSimplifier : public HOptimization {
  public:
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index db39bc8..316e86b 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -570,6 +570,7 @@
                                    NeedsEnvironmentOrCache(intrinsic),
                                    GetSideEffects(intrinsic),
                                    GetExceptions(intrinsic));
+              MaybeRecordStat(MethodCompilationStat::kIntrinsicRecognized);
             }
           }
         }
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 3bf3f7f..2ab50bb 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -33,8 +33,8 @@
 // Recognize intrinsics from HInvoke nodes.
 class IntrinsicsRecognizer : public HOptimization {
  public:
-  IntrinsicsRecognizer(HGraph* graph, CompilerDriver* driver)
-      : HOptimization(graph, kIntrinsicsRecognizerPassName),
+  IntrinsicsRecognizer(HGraph* graph, CompilerDriver* driver, OptimizingCompilerStats* stats)
+      : HOptimization(graph, kIntrinsicsRecognizerPassName, stats),
         driver_(driver) {}
 
   void Run() OVERRIDE;
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index a6b4078..33bb2e8 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -141,6 +141,7 @@
             DCHECK(!instruction->HasEnvironment());
           }
           instruction->MoveBefore(pre_header->GetLastInstruction());
+          MaybeRecordStat(MethodCompilationStat::kLoopInvariantMoved);
         } else if (instruction->CanThrow()) {
           // If `instruction` can throw, we cannot move further instructions
           // that can throw as well.
diff --git a/compiler/optimizing/licm.h b/compiler/optimizing/licm.h
index 0b5a0f1..bf56f53 100644
--- a/compiler/optimizing/licm.h
+++ b/compiler/optimizing/licm.h
@@ -26,8 +26,9 @@
 
 class LICM : public HOptimization {
  public:
-  LICM(HGraph* graph, const SideEffectsAnalysis& side_effects)
-      : HOptimization(graph, kLoopInvariantCodeMotionPassName), side_effects_(side_effects) {}
+  LICM(HGraph* graph, const SideEffectsAnalysis& side_effects, OptimizingCompilerStats* stats)
+      : HOptimization(graph, kLoopInvariantCodeMotionPassName, stats),
+        side_effects_(side_effects) {}
 
   void Run() OVERRIDE;
 
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index 9fb32f4..d446539 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -79,7 +79,7 @@
     graph_->BuildDominatorTree();
     SideEffectsAnalysis side_effects(graph_);
     side_effects.Run();
-    LICM(graph_, side_effects).Run();
+    LICM(graph_, side_effects, nullptr).Run();
   }
 
   // General building fields.
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index b26ce0a..f36dc6e 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1420,7 +1420,38 @@
   }
 }
 
-HBasicBlock* HBasicBlock::SplitAfter(HInstruction* cursor) {
+HBasicBlock* HBasicBlock::SplitBeforeForInlining(HInstruction* cursor) {
+  DCHECK_EQ(cursor->GetBlock(), this);
+
+  HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(),
+                                                                    cursor->GetDexPc());
+  new_block->instructions_.first_instruction_ = cursor;
+  new_block->instructions_.last_instruction_ = instructions_.last_instruction_;
+  instructions_.last_instruction_ = cursor->previous_;
+  if (cursor->previous_ == nullptr) {
+    instructions_.first_instruction_ = nullptr;
+  } else {
+    cursor->previous_->next_ = nullptr;
+    cursor->previous_ = nullptr;
+  }
+
+  new_block->instructions_.SetBlockOfInstructions(new_block);
+
+  for (HBasicBlock* successor : GetSuccessors()) {
+    new_block->successors_.push_back(successor);
+    successor->predecessors_[successor->GetPredecessorIndexOf(this)] = new_block;
+  }
+  successors_.clear();
+
+  for (HBasicBlock* dominated : GetDominatedBlocks()) {
+    dominated->dominator_ = new_block;
+    new_block->dominated_blocks_.push_back(dominated);
+  }
+  dominated_blocks_.clear();
+  return new_block;
+}
+
+HBasicBlock* HBasicBlock::SplitAfterForInlining(HInstruction* cursor) {
   DCHECK(!cursor->IsControlFlow());
   DCHECK_NE(instructions_.last_instruction_, cursor);
   DCHECK_EQ(cursor->GetBlock(), this);
@@ -1573,6 +1604,20 @@
   }
 }
 
+void HInstructionList::AddBefore(HInstruction* cursor, const HInstructionList& instruction_list) {
+  DCHECK(Contains(cursor));
+  if (!instruction_list.IsEmpty()) {
+    if (cursor == first_instruction_) {
+      first_instruction_ = instruction_list.first_instruction_;
+    } else {
+      cursor->previous_->next_ = instruction_list.first_instruction_;
+    }
+    instruction_list.last_instruction_->next_ = cursor;
+    instruction_list.first_instruction_->previous_ = cursor->previous_;
+    cursor->previous_ = instruction_list.last_instruction_;
+  }
+}
+
 void HInstructionList::Add(const HInstructionList& instruction_list) {
   if (IsEmpty()) {
     first_instruction_ = instruction_list.first_instruction_;
@@ -1815,18 +1860,6 @@
   graph_ = nullptr;
 }
 
-// Create space in `blocks` for adding `number_of_new_blocks` entries
-// starting at location `at`. Blocks after `at` are moved accordingly.
-static void MakeRoomFor(ArenaVector<HBasicBlock*>* blocks,
-                        size_t number_of_new_blocks,
-                        size_t after) {
-  DCHECK_LT(after, blocks->size());
-  size_t old_size = blocks->size();
-  size_t new_size = old_size + number_of_new_blocks;
-  blocks->resize(new_size);
-  std::copy_backward(blocks->begin() + after + 1u, blocks->begin() + old_size, blocks->end());
-}
-
 void HGraph::DeleteDeadEmptyBlock(HBasicBlock* block) {
   DCHECK_EQ(block->GetGraph(), this);
   DCHECK(block->GetSuccessors().empty());
@@ -1880,7 +1913,8 @@
     DCHECK(!body->IsInLoop());
     HInstruction* last = body->GetLastInstruction();
 
-    invoke->GetBlock()->instructions_.AddAfter(invoke, body->GetInstructions());
+    // Note that we add instructions before the invoke only to simplify polymorphic inlining.
+    invoke->GetBlock()->instructions_.AddBefore(invoke, body->GetInstructions());
     body->GetInstructions().SetBlockOfInstructions(invoke->GetBlock());
 
     // Replace the invoke with the return value of the inlined graph.
@@ -1898,7 +1932,8 @@
     // with the second half.
     ArenaAllocator* allocator = outer_graph->GetArena();
     HBasicBlock* at = invoke->GetBlock();
-    HBasicBlock* to = at->SplitAfter(invoke);
+    // Note that we split before the invoke only to simplify polymorphic inlining.
+    HBasicBlock* to = at->SplitBeforeForInlining(invoke);
 
     HBasicBlock* first = entry_block_->GetSuccessors()[0];
     DCHECK(!first->IsInLoop());
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 01ba704..399afab 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -131,6 +131,7 @@
   void SetBlockOfInstructions(HBasicBlock* block) const;
 
   void AddAfter(HInstruction* cursor, const HInstructionList& instruction_list);
+  void AddBefore(HInstruction* cursor, const HInstructionList& instruction_list);
   void Add(const HInstructionList& instruction_list);
 
   // Return the number of instructions in the list. This is an expensive operation.
@@ -618,6 +619,7 @@
 
   friend class SsaBuilder;           // For caching constants.
   friend class SsaLivenessAnalysis;  // For the linear order.
+  friend class HInliner;             // For the reverse post order.
   ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
   DISALLOW_COPY_AND_ASSIGN(HGraph);
 };
@@ -972,12 +974,15 @@
   // loop and try/catch information.
   HBasicBlock* SplitBefore(HInstruction* cursor);
 
-  // Split the block into two blocks just after `cursor`. Returns the newly
+  // Split the block into two blocks just before `cursor`. Returns the newly
   // created block. Note that this method just updates raw block information,
   // like predecessors, successors, dominators, and instruction list. It does not
   // update the graph, reverse post order, loop information, nor make sure the
   // blocks are consistent (for example ending with a control flow instruction).
-  HBasicBlock* SplitAfter(HInstruction* cursor);
+  HBasicBlock* SplitBeforeForInlining(HInstruction* cursor);
+
+  // Similar to `SplitBeforeForInlining` but does it after `cursor`.
+  HBasicBlock* SplitAfterForInlining(HInstruction* cursor);
 
   // Split catch block into two blocks after the original move-exception bytecode
   // instruction, or at the beginning if not present. Returns the newly created,
@@ -2063,6 +2068,7 @@
   }
 
   SideEffects GetSideEffects() const { return side_effects_; }
+  void SetSideEffects(SideEffects other) { side_effects_ = other; }
   void AddSideEffects(SideEffects other) { side_effects_.Add(other); }
 
   size_t GetLifetimePosition() const { return lifetime_position_; }
@@ -2101,7 +2107,6 @@
  protected:
   virtual const HUserRecord<HInstruction*> InputRecordAt(size_t i) const = 0;
   virtual void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) = 0;
-  void SetSideEffects(SideEffects other) { side_effects_ = other; }
 
  private:
   void RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use_node) { env_uses_.Remove(use_node); }
@@ -6370,6 +6375,18 @@
   DISALLOW_COPY_AND_ASSIGN(SwitchTable);
 };
 
+// Create space in `blocks` for adding `number_of_new_blocks` entries
+// starting at location `at`. Blocks after `at` are moved accordingly.
+inline void MakeRoomFor(ArenaVector<HBasicBlock*>* blocks,
+                        size_t number_of_new_blocks,
+                        size_t after) {
+  DCHECK_LT(after, blocks->size());
+  size_t old_size = blocks->size();
+  size_t new_size = old_size + number_of_new_blocks;
+  blocks->resize(new_size);
+  std::copy_backward(blocks->begin() + after + 1u, blocks->begin() + old_size, blocks->end());
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 12b748b..b1891c9 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -505,12 +505,12 @@
       graph, stats, HDeadCodeElimination::kFinalDeadCodeEliminationPassName);
   HConstantFolding* fold1 = new (arena) HConstantFolding(graph);
   InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
-  HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph);
+  HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats);
   HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining");
   HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding_after_bce");
   SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
   GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
-  LICM* licm = new (arena) LICM(graph, *side_effects);
+  LICM* licm = new (arena) LICM(graph, *side_effects, stats);
   LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects);
   HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
   BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
@@ -519,7 +519,7 @@
       graph, stats, "instruction_simplifier_after_bce");
   InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
       graph, stats, "instruction_simplifier_before_codegen");
-  IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver);
+  IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver, stats);
 
   HOptimization* optimizations1[] = {
     intrinsics,
@@ -651,7 +651,7 @@
   DexCompilationUnit dex_compilation_unit(
     nullptr, class_loader, Runtime::Current()->GetClassLinker(), dex_file, code_item,
     class_def_idx, method_idx, access_flags,
-    compiler_driver->GetVerifiedMethod(&dex_file, method_idx), dex_cache);
+    nullptr, dex_cache);
 
   bool requires_barrier = dex_compilation_unit.IsConstructor()
       && compiler_driver->RequiresConstructorBarrier(Thread::Current(),
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 52a7b10..179004b 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -56,6 +56,10 @@
   kMonomorphicCall,
   kPolymorphicCall,
   kMegamorphicCall,
+  kBooleanSimplified,
+  kIntrinsicRecognized,
+  kLoopInvariantMoved,
+  kSelectGenerated,
   kLastStat
 };
 
@@ -124,7 +128,11 @@
       case kInlinedPolymorphicCall: name = "InlinedPolymorphicCall"; break;
       case kMonomorphicCall: name = "MonomorphicCall"; break;
       case kPolymorphicCall: name = "PolymorphicCall"; break;
-      case kMegamorphicCall: name = "kMegamorphicCall"; break;
+      case kMegamorphicCall: name = "MegamorphicCall"; break;
+      case kBooleanSimplified : name = "BooleanSimplified"; break;
+      case kIntrinsicRecognized : name = "IntrinsicRecognized"; break;
+      case kLoopInvariantMoved : name = "LoopInvariantMoved"; break;
+      case kSelectGenerated : name = "SelectGenerated"; break;
 
       case kLastStat:
         LOG(FATAL) << "invalid stat "
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
index 105b30a..e52476e 100644
--- a/compiler/optimizing/select_generator.cc
+++ b/compiler/optimizing/select_generator.cc
@@ -141,6 +141,8 @@
       block->MergeWith(merge_block);
     }
 
+    MaybeRecordStat(MethodCompilationStat::kSelectGenerated);
+
     // No need to update dominance information, as we are simplifying
     // a simple diamond shape, where the join block is merged with the
     // entry block. Any following blocks would have had the join block
diff --git a/compiler/optimizing/select_generator.h b/compiler/optimizing/select_generator.h
index f9d6d4d..c6dca58 100644
--- a/compiler/optimizing/select_generator.h
+++ b/compiler/optimizing/select_generator.h
@@ -47,8 +47,8 @@
 
 class HSelectGenerator : public HOptimization {
  public:
-  explicit HSelectGenerator(HGraph* graph)
-    : HOptimization(graph, kSelectGeneratorPassName) {}
+  HSelectGenerator(HGraph* graph, OptimizingCompilerStats* stats)
+    : HOptimization(graph, kSelectGeneratorPassName, stats) {}
 
   void Run() OVERRIDE;
 
diff --git a/dex2oat/Android.mk b/dex2oat/Android.mk
index 77f8d6c..dfc379f 100644
--- a/dex2oat/Android.mk
+++ b/dex2oat/Android.mk
@@ -55,20 +55,42 @@
   $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler libsigchain,art/compiler,target,debug,$(dex2oat_target_arch)))
 endif
 
+# Note: the order is important because of static linking resolution.
+DEX2OAT_STATIC_DEPENDENCIES := \
+  libziparchive-host \
+  libnativehelper \
+  libnativebridge \
+  libnativeloader \
+  libsigchain_dummy \
+  libvixl \
+  liblog \
+  libz \
+  libbacktrace \
+  libLLVMObject \
+  libLLVMBitReader \
+  libLLVMMC \
+  libLLVMMCParser \
+  libLLVMCore \
+  libLLVMSupport \
+  libcutils \
+  libunwindbacktrace \
+  libutils \
+  libbase \
+  liblz4 \
+  liblzma
+
 # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
 ifeq ($(ART_BUILD_HOST_NDEBUG),true)
   $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libart-compiler libsigchain libziparchive-host liblz4,art/compiler,host,ndebug,$(dex2oat_host_arch)))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
-    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart libart-compiler libart libziparchive-host libnativehelper libnativebridge libsigchain_dummy libvixl liblog libz \
-        libbacktrace libLLVMObject libLLVMBitReader libLLVMMC libLLVMMCParser libLLVMCore libLLVMSupport libcutils libunwindbacktrace libutils libbase liblz4,art/compiler,host,ndebug,$(dex2oat_host_arch),static))
+    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart libart-compiler libart $(DEX2OAT_STATIC_DEPENDENCIES),art/compiler,host,ndebug,$(dex2oat_host_arch),static))
   endif
 endif
 
 ifeq ($(ART_BUILD_HOST_DEBUG),true)
   $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler libsigchain libziparchive-host liblz4,art/compiler,host,debug,$(dex2oat_host_arch)))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
-    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd libartd-compiler libartd libziparchive-host libnativehelper libnativebridge libsigchain_dummy libvixld liblog libz \
-        libbacktrace libLLVMObject libLLVMBitReader libLLVMMC libLLVMMCParser libLLVMCore libLLVMSupport libcutils libunwindbacktrace libutils libbase liblz4,art/compiler,host,debug,$(dex2oat_host_arch),static))
+    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd libartd-compiler libartd $(DEX2OAT_STATIC_DEPENDENCIES),art/compiler,host,debug,$(dex2oat_host_arch),static))
   endif
 endif
 
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 078a978..f3e8d6b 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -132,6 +132,11 @@
     return (GetAccessFlags() & kAccFinal) != 0;
   }
 
+  // Returns true if this method might be copied from another class.
+  bool MightBeCopied() {
+    return IsMiranda() || IsDefault() || IsDefaultConflicting();
+  }
+
   bool IsMiranda() {
     return (GetAccessFlags() & kAccMiranda) != 0;
   }
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index de46b0c..8aaeaac 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -37,6 +37,7 @@
 // and the "-verbose:" command line argument.
 struct LogVerbosity {
   bool class_linker;  // Enabled with "-verbose:class".
+  bool collector;
   bool compiler;
   bool deopt;
   bool gc;
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 5278d1b..936c988 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -759,7 +759,7 @@
     SHARED_REQUIRES(Locks::mutator_lock_) {
   if (m->IsRuntimeMethod()) {
     CHECK(m->GetDeclaringClass() == nullptr) << PrettyMethod(m);
-  } else if (m->IsMiranda()) {
+  } else if (m->MightBeCopied()) {
     CHECK(m->GetDeclaringClass() != nullptr) << PrettyMethod(m);
   } else if (expected_class != nullptr) {
     CHECK_EQ(m->GetDeclaringClassUnchecked(), expected_class) << PrettyMethod(m);
@@ -1137,18 +1137,18 @@
 
   virtual void Visit(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) {
     GcRoot<mirror::Class>* resolved_types = method->GetDexCacheResolvedTypes(sizeof(void*));
-    const bool is_miranda = method->IsMiranda();
+    const bool maybe_copied = method->MightBeCopied();
     if (resolved_types != nullptr) {
       bool in_image_space = false;
-      if (kIsDebugBuild || is_miranda) {
+      if (kIsDebugBuild || maybe_copied) {
         in_image_space = header_.GetImageSection(ImageHeader::kSectionDexCacheArrays).Contains(
             reinterpret_cast<const uint8_t*>(resolved_types) - header_.GetImageBegin());
       }
       // Must be in image space for non-miranda method.
-      DCHECK(is_miranda || in_image_space)
+      DCHECK(maybe_copied || in_image_space)
           << resolved_types << " is not in image starting at "
           << reinterpret_cast<void*>(header_.GetImageBegin());
-      if (!is_miranda || in_image_space) {
+      if (!maybe_copied || in_image_space) {
         // Go through the array so that we don't need to do a slow map lookup.
         method->SetDexCacheResolvedTypes(*reinterpret_cast<GcRoot<mirror::Class>**>(resolved_types),
                                          sizeof(void*));
@@ -1157,15 +1157,15 @@
     ArtMethod** resolved_methods = method->GetDexCacheResolvedMethods(sizeof(void*));
     if (resolved_methods != nullptr) {
       bool in_image_space = false;
-      if (kIsDebugBuild || is_miranda) {
+      if (kIsDebugBuild || maybe_copied) {
         in_image_space = header_.GetImageSection(ImageHeader::kSectionDexCacheArrays).Contains(
               reinterpret_cast<const uint8_t*>(resolved_methods) - header_.GetImageBegin());
       }
       // Must be in image space for non-miranda method.
-      DCHECK(is_miranda || in_image_space)
+      DCHECK(maybe_copied || in_image_space)
           << resolved_methods << " is not in image starting at "
           << reinterpret_cast<void*>(header_.GetImageBegin());
-      if (!is_miranda || in_image_space) {
+      if (!maybe_copied || in_image_space) {
         // Go through the array so that we don't need to do a slow map lookup.
         method->SetDexCacheResolvedMethods(*reinterpret_cast<ArtMethod***>(resolved_methods),
                                            sizeof(void*));
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 3a0f3e5..5c3029a 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -263,7 +263,7 @@
     for (ArtMethod& method : klass->GetCopiedMethods(sizeof(void*))) {
       AssertMethod(&method);
       EXPECT_FALSE(method.IsDirect());
-      EXPECT_TRUE(method.IsMiranda() || method.IsDefault() || method.IsDefaultConflicting());
+      EXPECT_TRUE(method.MightBeCopied());
       EXPECT_TRUE(method.GetDeclaringClass()->IsInterface())
           << "declaring class: " << PrettyClass(method.GetDeclaringClass());
       EXPECT_TRUE(method.GetDeclaringClass()->IsAssignableFrom(klass.Get()))
@@ -1225,12 +1225,12 @@
   dex_cache->SetLocation(location.Get());
   const DexFile* old_dex_file = dex_cache->GetDexFile();
 
-  DexFile* dex_file = new DexFile(old_dex_file->Begin(),
-                                  old_dex_file->Size(),
-                                  location->ToModifiedUtf8(),
-                                  0u,
-                                  nullptr,
-                                  nullptr);
+  std::unique_ptr<DexFile> dex_file(new DexFile(old_dex_file->Begin(),
+                                                old_dex_file->Size(),
+                                                location->ToModifiedUtf8(),
+                                                0u,
+                                                nullptr,
+                                                nullptr));
   {
     WriterMutexLock mu(soa.Self(), *class_linker->DexLock());
     // Check that inserting with a UTF16 name works.
diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc
index 369e408..83e5bad 100644
--- a/runtime/gc/allocation_record.cc
+++ b/runtime/gc/allocation_record.cc
@@ -34,11 +34,7 @@
 
 const char* AllocRecord::GetClassDescriptor(std::string* storage) const {
   // klass_ could contain null only if we implement class unloading.
-  if (UNLIKELY(klass_.IsNull())) {
-    return "null";
-  } else {
-    return klass_.Read()->GetDescriptor(storage);
-  }
+  return klass_.IsNull() ? "null" : klass_.Read()->GetDescriptor(storage);
 }
 
 void AllocRecordObjectMap::SetProperties() {
@@ -105,8 +101,19 @@
   size_t count = recent_record_max_;
   // Only visit the last recent_record_max_ number of allocation records in entries_ and mark the
   // klass_ fields as strong roots.
-  for (auto it = entries_.rbegin(), end = entries_.rend(); count > 0 && it != end; count--, ++it) {
-    buffered_visitor.VisitRootIfNonNull(it->second->GetClassGcRoot());
+  for (auto it = entries_.rbegin(), end = entries_.rend(); it != end; ++it) {
+    AllocRecord* record = it->second;
+    if (count > 0) {
+      buffered_visitor.VisitRootIfNonNull(record->GetClassGcRoot());
+      --count;
+    }
+    // Visit all of the stack frames to make sure no methods in the stack traces get unloaded by
+    // class unloading.
+    for (size_t i = 0, depth = record->GetDepth(); i < depth; ++i) {
+      const AllocRecordStackTraceElement& element = record->StackElement(i);
+      DCHECK(element.GetMethod() != nullptr);
+      element.GetMethod()->VisitRoots(buffered_visitor, sizeof(void*));
+    }
   }
 }
 
@@ -131,12 +138,7 @@
   VLOG(heap) << "Start SweepAllocationRecords()";
   size_t count_deleted = 0, count_moved = 0, count = 0;
   // Only the first (size - recent_record_max_) number of records can be deleted.
-  size_t delete_bound;
-  if (entries_.size() <= recent_record_max_) {
-    delete_bound = 0;
-  } else {
-    delete_bound = entries_.size() - recent_record_max_;
-  }
+  const size_t delete_bound = std::max(entries_.size(), recent_record_max_) - recent_record_max_;
   for (auto it = entries_.begin(), end = entries_.end(); it != end;) {
     ++count;
     // This does not need a read barrier because this is called by GC.
@@ -187,7 +189,6 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
         trace(trace_in),
-        depth(0),
         max_depth(max) {}
 
   // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
@@ -209,7 +210,7 @@
   }
 
   AllocRecordStackTrace* trace;
-  size_t depth;
+  size_t depth = 0u;
   const size_t max_depth;
 };
 
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 8b125dd..2c487fe 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -58,10 +58,16 @@
       page_release_mode_(page_release_mode),
       page_release_size_threshold_(page_release_size_threshold),
       is_running_on_memory_tool_(running_on_memory_tool) {
+  DCHECK_ALIGNED(base, kPageSize);
   DCHECK_EQ(RoundUp(capacity, kPageSize), capacity);
   DCHECK_EQ(RoundUp(max_capacity, kPageSize), max_capacity);
   CHECK_LE(capacity, max_capacity);
   CHECK_ALIGNED(page_release_size_threshold_, kPageSize);
+  // Zero the memory explicitly (don't rely on that the mem map is zero-initialized).
+  if (!kMadviseZeroes) {
+    memset(base_, 0, max_capacity);
+  }
+  CHECK_EQ(madvise(base_, max_capacity, MADV_DONTNEED), 0);
   if (!initialized_) {
     Initialize();
   }
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index a472a8b..b12cb5b 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -192,6 +192,7 @@
         Verify();
       }
       DCHECK(slot != nullptr);
+      DCHECK(slot->Next() == nullptr);
       Slot** headp = reinterpret_cast<Slot**>(&head_);
       Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
       Slot* old_head = *headp;
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 8e1b7f4..d393f0b 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -1622,7 +1622,9 @@
 inline void ConcurrentCopying::Scan(mirror::Object* to_ref) {
   DCHECK(!region_space_->IsInFromSpace(to_ref));
   ConcurrentCopyingRefFieldsVisitor visitor(this);
-  to_ref->VisitReferences(visitor, visitor);
+  // Disable the read barrier for a performance reason.
+  to_ref->VisitReferences</*kVisitNativeRoots*/true, kDefaultVerifyFlags, kWithoutReadBarrier>(
+      visitor, visitor);
 }
 
 // Process a field.
diff --git a/runtime/gc/collector/immune_region.h b/runtime/gc/collector/immune_region.h
index b60426d..c9ac435 100644
--- a/runtime/gc/collector/immune_region.h
+++ b/runtime/gc/collector/immune_region.h
@@ -66,6 +66,10 @@
     return end_;
   }
 
+  size_t Size() const {
+    return size_;
+  }
+
  private:
   void UpdateSize() {
     size_ = reinterpret_cast<uintptr_t>(end_) - reinterpret_cast<uintptr_t>(begin_);
diff --git a/runtime/gc/collector/immune_spaces.cc b/runtime/gc/collector/immune_spaces.cc
index 8f9a9e2..26da4ca 100644
--- a/runtime/gc/collector/immune_spaces.cc
+++ b/runtime/gc/collector/immune_spaces.cc
@@ -18,6 +18,7 @@
 
 #include "gc/space/space-inl.h"
 #include "mirror/object.h"
+#include "oat_file.h"
 
 namespace art {
 namespace gc {
@@ -45,11 +46,16 @@
       space::ImageSpace* image_space = space->AsImageSpace();
       // Update the end to include the other non-heap sections.
       space_end = RoundUp(reinterpret_cast<uintptr_t>(image_space->GetImageEnd()), kPageSize);
-      uintptr_t oat_begin = reinterpret_cast<uintptr_t>(image_space->GetOatFileBegin());
-      uintptr_t oat_end = reinterpret_cast<uintptr_t>(image_space->GetOatFileEnd());
-      if (space_end == oat_begin) {
-        DCHECK_GE(oat_end, oat_begin);
-        space_end = oat_end;
+      // For the app image case, GetOatFileBegin is where the oat file was mapped during image
+      // creation, the actual oat file could be somewhere else.
+      const OatFile* const image_oat_file = image_space->GetOatFile();
+      if (image_oat_file != nullptr) {
+        uintptr_t oat_begin = reinterpret_cast<uintptr_t>(image_oat_file->Begin());
+        uintptr_t oat_end = reinterpret_cast<uintptr_t>(image_oat_file->End());
+        if (space_end == oat_begin) {
+          DCHECK_GE(oat_end, oat_begin);
+          space_end = oat_end;
+        }
       }
     }
     if (cur_begin == 0u) {
@@ -71,6 +77,8 @@
   }
   largest_immune_region_.SetBegin(reinterpret_cast<mirror::Object*>(best_begin));
   largest_immune_region_.SetEnd(reinterpret_cast<mirror::Object*>(best_end));
+  VLOG(collector) << "Immune region " << largest_immune_region_.Begin() << "-"
+                  << largest_immune_region_.End();
 }
 
 void ImmuneSpaces::AddSpace(space::ContinuousSpace* space) {
diff --git a/runtime/gc/collector/immune_spaces_test.cc b/runtime/gc/collector/immune_spaces_test.cc
index ea290dd..56838f5 100644
--- a/runtime/gc/collector/immune_spaces_test.cc
+++ b/runtime/gc/collector/immune_spaces_test.cc
@@ -72,17 +72,31 @@
   EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().End()), b.Limit());
 }
 
+class DummyOatFile : public OatFile {
+ public:
+  DummyOatFile(uint8_t* begin, uint8_t* end) : OatFile("Location", /*is_executable*/ false) {
+    begin_ = begin;
+    end_ = end;
+  }
+};
+
 class DummyImageSpace : public space::ImageSpace {
  public:
-  DummyImageSpace(MemMap* map, accounting::ContinuousSpaceBitmap* live_bitmap)
+  DummyImageSpace(MemMap* map,
+                  accounting::ContinuousSpaceBitmap* live_bitmap,
+                  std::unique_ptr<DummyOatFile>&& oat_file)
       : ImageSpace("DummyImageSpace",
                    /*image_location*/"",
                    map,
                    live_bitmap,
-                   map->End()) {}
+                   map->End()) {
+    oat_file_ = std::move(oat_file);
+    oat_file_non_owned_ = oat_file_.get();
+  }
 
-  // OatSize is how large the oat file is after the image.
-  static DummyImageSpace* Create(size_t size, size_t oat_size) {
+  // Size is the size of the image space, oat offset is where the oat file is located
+  // after the end of image space. oat_size is the size of the oat file.
+  static DummyImageSpace* Create(size_t size, size_t oat_offset, size_t oat_size) {
     std::string error_str;
     std::unique_ptr<MemMap> map(MemMap::MapAnonymous("DummyImageSpace",
                                                      nullptr,
@@ -100,6 +114,9 @@
     if (live_bitmap == nullptr) {
       return nullptr;
     }
+    // The actual mapped oat file may not be directly after the image for the app image case.
+    std::unique_ptr<DummyOatFile> oat_file(new DummyOatFile(map->End() + oat_offset,
+                                                            map->End() + oat_offset + oat_size));
     // Create image header.
     ImageSection sections[ImageHeader::kSectionCount];
     new (map->Begin()) ImageHeader(
@@ -108,6 +125,7 @@
         sections,
         /*image_roots*/PointerToLowMemUInt32(map->Begin()) + 1,
         /*oat_checksum*/0u,
+        // The oat file data in the header is always right after the image space.
         /*oat_file_begin*/PointerToLowMemUInt32(map->End()),
         /*oat_data_begin*/PointerToLowMemUInt32(map->End()),
         /*oat_data_end*/PointerToLowMemUInt32(map->End() + oat_size),
@@ -121,7 +139,7 @@
         /*is_pic*/false,
         ImageHeader::kStorageModeUncompressed,
         /*storage_size*/0u);
-    return new DummyImageSpace(map.release(), live_bitmap.release());
+    return new DummyImageSpace(map.release(), live_bitmap.release(), std::move(oat_file));
   }
 };
 
@@ -129,7 +147,9 @@
   ImmuneSpaces spaces;
   constexpr size_t image_size = 123 * kPageSize;
   constexpr size_t image_oat_size = 321 * kPageSize;
-  std::unique_ptr<DummyImageSpace> image_space(DummyImageSpace::Create(image_size, image_oat_size));
+  std::unique_ptr<DummyImageSpace> image_space(DummyImageSpace::Create(image_size,
+                                                                       0,
+                                                                       image_oat_size));
   ASSERT_TRUE(image_space != nullptr);
   const ImageHeader& image_header = image_space->GetImageHeader();
   EXPECT_EQ(image_header.GetImageSize(), image_size);
@@ -150,6 +170,18 @@
   EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().Begin()),
             image_space->Begin());
   EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().End()), space.Limit());
+  // Check that appending with a gap between the map does not include the oat file.
+  image_space.reset(DummyImageSpace::Create(image_size, kPageSize, image_oat_size));
+  spaces.Reset();
+  {
+    WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    spaces.AddSpace(image_space.get());
+  }
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().Begin()),
+            image_space->Begin());
+  // Size should be equal, we should not add the oat file since it is not adjacent to the image
+  // space.
+  EXPECT_EQ(spaces.GetLargestImmuneRegion().Size(), image_size);
 }
 
 }  // namespace collector
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 3c9312f..a656fb8 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -845,6 +845,13 @@
 void Heap::IncrementDisableThreadFlip(Thread* self) {
   // Supposed to be called by mutators. If thread_flip_running_ is true, block. Otherwise, go ahead.
   CHECK(kUseReadBarrier);
+  bool is_nested = self->GetDisableThreadFlipCount() > 0;
+  self->IncrementDisableThreadFlipCount();
+  if (is_nested) {
+    // If this is a nested JNI critical section enter, we don't need to wait or increment the global
+    // counter. The global counter is incremented only once for a thread for the outermost enter.
+    return;
+  }
   ScopedThreadStateChange tsc(self, kWaitingForGcThreadFlip);
   MutexLock mu(self, *thread_flip_lock_);
   bool has_waited = false;
@@ -867,10 +874,20 @@
   // Supposed to be called by mutators. Decrement disable_thread_flip_count_ and potentially wake up
   // the GC waiting before doing a thread flip.
   CHECK(kUseReadBarrier);
+  self->DecrementDisableThreadFlipCount();
+  bool is_outermost = self->GetDisableThreadFlipCount() == 0;
+  if (!is_outermost) {
+    // If this is not an outermost JNI critical exit, we don't need to decrement the global counter.
+    // The global counter is decremented only once for a thread for the outermost exit.
+    return;
+  }
   MutexLock mu(self, *thread_flip_lock_);
   CHECK_GT(disable_thread_flip_count_, 0U);
   --disable_thread_flip_count_;
-  thread_flip_cond_->Broadcast(self);
+  if (disable_thread_flip_count_ == 0) {
+    // Potentially notify the GC thread blocking to begin a thread flip.
+    thread_flip_cond_->Broadcast(self);
+  }
 }
 
 void Heap::ThreadFlipBegin(Thread* self) {
@@ -882,7 +899,8 @@
   bool has_waited = false;
   uint64_t wait_start = NanoTime();
   CHECK(!thread_flip_running_);
-  // Set this to true before waiting so that a new mutator entering a JNI critical won't starve GC.
+  // Set this to true before waiting so that frequent JNI critical enter/exits won't starve
+  // GC. This like a writer preference of a reader-writer lock.
   thread_flip_running_ = true;
   while (disable_thread_flip_count_ > 0) {
     has_waited = true;
@@ -904,6 +922,7 @@
   MutexLock mu(self, *thread_flip_lock_);
   CHECK(thread_flip_running_);
   thread_flip_running_ = false;
+  // Potentially notify mutator threads blocking to enter a JNI critical section.
   thread_flip_cond_->Broadcast(self);
 }
 
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index c02e2d3..a181e23 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -1113,6 +1113,8 @@
   // Used to synchronize between JNI critical calls and the thread flip of the CC collector.
   Mutex* thread_flip_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::unique_ptr<ConditionVariable> thread_flip_cond_ GUARDED_BY(thread_flip_lock_);
+  // This counter keeps track of how many threads are currently in a JNI critical section. This is
+  // incremented once per thread even with nested enters.
   size_t disable_thread_flip_count_ GUARDED_BY(thread_flip_lock_);
   bool thread_flip_running_ GUARDED_BY(thread_flip_lock_);
 
diff --git a/runtime/gc/space/memory_tool_malloc_space-inl.h b/runtime/gc/space/memory_tool_malloc_space-inl.h
index ea8b8aa..6cb2465 100644
--- a/runtime/gc/space/memory_tool_malloc_space-inl.h
+++ b/runtime/gc/space/memory_tool_malloc_space-inl.h
@@ -240,9 +240,9 @@
                     kAdjustForRedzoneInAllocSize,
                     kUseObjSizeForUsable>::MemoryToolMallocSpace(
     MemMap* mem_map, size_t initial_size, Params... params) : S(mem_map, initial_size, params...) {
-  MEMORY_TOOL_MAKE_DEFINED(mem_map->Begin(), initial_size);
-  MEMORY_TOOL_MAKE_UNDEFINED(mem_map->Begin() + initial_size,
-                     mem_map->Size() - initial_size);
+  // Don't want to change the valgrind states of the mem map here as the allocator is already
+  // initialized at this point and that may interfere with what the allocator does internally. Note
+  // that the tail beyond the initial size is mprotected.
 }
 
 template <typename S,
diff --git a/runtime/image.h b/runtime/image.h
index c449e43..146ee00 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -143,6 +143,8 @@
     oat_checksum_ = oat_checksum;
   }
 
+  // The location that the oat file was expected to be when the image was created. The actual
+  // oat file may be at a different location for application images.
   uint8_t* GetOatFileBegin() const {
     return reinterpret_cast<uint8_t*>(oat_file_begin_);
   }
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 56aeefc..e3cbf53 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -290,6 +290,14 @@
   bool IsActive() const SHARED_REQUIRES(Locks::mutator_lock_) {
     return have_dex_pc_listeners_ || have_method_entry_listeners_ || have_method_exit_listeners_ ||
         have_field_read_listeners_ || have_field_write_listeners_ ||
+        have_exception_caught_listeners_ || have_method_unwind_listeners_ ||
+        have_branch_listeners_ || have_invoke_virtual_or_interface_listeners_;
+  }
+
+  // Any instrumentation *other* than what is needed for Jit profiling active?
+  bool NonJitProfilingActive() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return have_dex_pc_listeners_ || have_method_exit_listeners_ ||
+        have_field_read_listeners_ || have_field_write_listeners_ ||
         have_exception_caught_listeners_ || have_method_unwind_listeners_;
   }
 
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 4fd3c78..a595d33 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -320,12 +320,13 @@
         // No Mterp variant - just use the switch interpreter.
         return ExecuteSwitchImpl<false, true>(self, code_item, shadow_frame, result_register,
                                               false);
+      } else if (UNLIKELY(!Runtime::Current()->IsStarted())) {
+        return ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame, result_register,
+                                               false);
       } else {
-        const instrumentation::Instrumentation* const instrumentation =
-            Runtime::Current()->GetInstrumentation();
         while (true) {
-          if (instrumentation->IsActive() || !Runtime::Current()->IsStarted()) {
-            // TODO: allow JIT profiling instrumentation.  Now, just punt on all instrumentation.
+          // Mterp does not support all instrumentation/debugging.
+          if (MterpShouldSwitchInterpreters()) {
 #if !defined(__clang__)
             return ExecuteGotoImpl<false, false>(self, code_item, shadow_frame, result_register);
 #else
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 949112d..19d971e 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -948,11 +948,15 @@
   __attribute__((cold))
   SHARED_REQUIRES(Locks::mutator_lock_);
 
+static inline bool TraceExecutionEnabled() {
+  // Return true if you want TraceExecution invocation before each bytecode execution.
+  return false;
+}
+
 static inline void TraceExecution(const ShadowFrame& shadow_frame, const Instruction* inst,
                                   const uint32_t dex_pc)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  constexpr bool kTracing = false;
-  if (kTracing) {
+  if (TraceExecutionEnabled()) {
 #define TRACE_LOG std::cerr
     std::ostringstream oss;
     oss << PrettyMethod(shadow_frame.GetMethod())
diff --git a/runtime/interpreter/mterp/arm/bincmp.S b/runtime/interpreter/mterp/arm/bincmp.S
index 474bc3c..774e167 100644
--- a/runtime/interpreter/mterp/arm/bincmp.S
+++ b/runtime/interpreter/mterp/arm/bincmp.S
@@ -6,17 +6,29 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    mov${revcmp} r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    b${revcmp} .L_${opcode}_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_${opcode}_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -25,10 +37,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    mov${revcmp} r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    mov${revcmp} rINST, #2              @ rINST<- BYTE branch dist for not-taken
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/footer.S b/runtime/interpreter/mterp/arm/footer.S
index 1dba856..3456a75 100644
--- a/runtime/interpreter/mterp/arm/footer.S
+++ b/runtime/interpreter/mterp/arm/footer.S
@@ -12,7 +12,6 @@
  * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
  * TUNING: for consistency, we may want to just go ahead and handle these here.
  */
-#define MTERP_LOGGING 0
 common_errDivideByZero:
     EXPORT_PC
 #if MTERP_LOGGING
@@ -103,8 +102,12 @@
     ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
     add     rPC, r0, #CODEITEM_INSNS_OFFSET
     add     rPC, rPC, r1, lsl #1                    @ generate new dex_pc_ptr
-    str     rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+    /* Do we need to switch interpreters? */
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     /* resume execution at catch block */
+    EXPORT_PC
     FETCH_INST
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
@@ -116,12 +119,31 @@
  */
 MterpCheckSuspendAndContinue:
     ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
-    EXPORT_PC
-    mov     r0, rSELF
     ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    blne    MterpSuspendCheck           @ (self)
+    bne     1f
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
+1:
+    EXPORT_PC
+    mov     r0, rSELF
+    bl      MterpSuspendCheck           @ (self)
+    cmp     r0, #0
+    bne     MterpFallback
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov r0, rSELF
+    add r1, rFP, #OFF_FP_SHADOWFRAME
+    mov r2, rINST
+    bl MterpLogOSR
+#endif
+    mov r0, #1                          @ Signal normal return
+    b MterpDone
 
 /*
  * Bail out to reference interpreter.
diff --git a/runtime/interpreter/mterp/arm/header.S b/runtime/interpreter/mterp/arm/header.S
index b2370bf..298af8a 100644
--- a/runtime/interpreter/mterp/arm/header.S
+++ b/runtime/interpreter/mterp/arm/header.S
@@ -85,6 +85,9 @@
  */
 #include "asm_support.h"
 
+#define MTERP_PROFILE_BRANCHES 1
+#define MTERP_LOGGING 0
+
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
 #define rPC     r4
@@ -109,14 +112,6 @@
 #define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
 /*
- *
- * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
- * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
- * mterp should do so as well.
- */
-#define MTERP_SUSPEND 0
-
-/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
diff --git a/runtime/interpreter/mterp/arm/invoke.S b/runtime/interpreter/mterp/arm/invoke.S
index 7575865..e47dd1b 100644
--- a/runtime/interpreter/mterp/arm/invoke.S
+++ b/runtime/interpreter/mterp/arm/invoke.S
@@ -14,6 +14,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
diff --git a/runtime/interpreter/mterp/arm/op_goto.S b/runtime/interpreter/mterp/arm/op_goto.S
index 9b3632a..6861950 100644
--- a/runtime/interpreter/mterp/arm/op_goto.S
+++ b/runtime/interpreter/mterp/arm/op_goto.S
@@ -6,20 +6,28 @@
      */
     /* goto +AA */
     /* tuning: use sbfx for 6t2+ targets */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
-    add     r2, r1, r1                  @ r2<- byte offset, set flags
-       @ If backwards branch refresh rIBASE
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+       @ If backwards branch refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
-    add     r2, r1, r1                  @ r2<- byte offset, set flags
+    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
        @ If backwards branch refresh rIBASE
     bmi     MterpCheckSuspendAndContinue
diff --git a/runtime/interpreter/mterp/arm/op_goto_16.S b/runtime/interpreter/mterp/arm/op_goto_16.S
index 2231acd..91639ca 100644
--- a/runtime/interpreter/mterp/arm/op_goto_16.S
+++ b/runtime/interpreter/mterp/arm/op_goto_16.S
@@ -5,17 +5,25 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_SUSPEND
-    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
-    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+#if MTERP_PROFILE_BRANCHES
+    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
-    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
+    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/op_goto_32.S b/runtime/interpreter/mterp/arm/op_goto_32.S
index 6b72ff5..e730b52 100644
--- a/runtime/interpreter/mterp/arm/op_goto_32.S
+++ b/runtime/interpreter/mterp/arm/op_goto_32.S
@@ -10,21 +10,29 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- aaaa (lo)
     FETCH r1, 2                         @ r1<- AAAA (hi)
-    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
-    adds    r1, r0, r0                  @ r1<- byte offset
+    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     FETCH r0, 1                         @ r0<- aaaa (lo)
     FETCH r1, 2                         @ r1<- AAAA (hi)
+    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
-    adds    r1, r0, r0                  @ r1<- byte offset
+    adds    r1, rINST, rINST            @ r1<- byte offset
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/op_packed_switch.S b/runtime/interpreter/mterp/arm/op_packed_switch.S
index 1e3370e..4c369cb 100644
--- a/runtime/interpreter/mterp/arm/op_packed_switch.S
+++ b/runtime/interpreter/mterp/arm/op_packed_switch.S
@@ -9,7 +9,7 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- bbbb (lo)
     FETCH r1, 2                         @ r1<- BBBB (hi)
     mov     r3, rINST, lsr #8           @ r3<- AA
@@ -17,9 +17,18 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      $func                       @ r0<- code-unit branch offset
-    adds    r1, r0, r0                  @ r1<- byte offset; clear V
-    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    mov     rINST, r0
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -30,8 +39,9 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      $func                       @ r0<- code-unit branch offset
+    mov     rINST, r0
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/op_shl_long.S b/runtime/interpreter/mterp/arm/op_shl_long.S
index dc8a679..12ea248 100644
--- a/runtime/interpreter/mterp/arm/op_shl_long.S
+++ b/runtime/interpreter/mterp/arm/op_shl_long.S
@@ -12,16 +12,16 @@
     add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
-
-    mov     r1, r1, asl r2              @  r1<- r1 << r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
-    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r1, r1, asl r2              @ r1<- r1 << r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r1, r0, asl ip              @ if r2 >= 32, r1<- r0 << (r2-32)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    mov     r0, r0, asl r2              @ r0<- r0 << r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_shl_long_2addr.S b/runtime/interpreter/mterp/arm/op_shl_long_2addr.S
index fd7668d..4799e77 100644
--- a/runtime/interpreter/mterp/arm/op_shl_long_2addr.S
+++ b/runtime/interpreter/mterp/arm/op_shl_long_2addr.S
@@ -6,17 +6,17 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
-
-    mov     r1, r1, asl r2              @  r1<- r1 << r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
+    mov     r1, r1, asl r2              @ r1<- r1 << r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
-    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    movpl   r1, r0, asl ip              @ if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r0, r0, asl r2              @ r0<- r0 << r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_shr_long.S b/runtime/interpreter/mterp/arm/op_shr_long.S
index c0edf90..88a13d6 100644
--- a/runtime/interpreter/mterp/arm/op_shr_long.S
+++ b/runtime/interpreter/mterp/arm/op_shr_long.S
@@ -12,16 +12,16 @@
     add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
-    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r0, r1, asr ip              @ if r2 >= 32, r0<-r1 >> (r2-32)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    mov     r1, r1, asr r2              @ r1<- r1 >> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_shr_long_2addr.S b/runtime/interpreter/mterp/arm/op_shr_long_2addr.S
index ffeaf9c..78d8bb7 100644
--- a/runtime/interpreter/mterp/arm/op_shr_long_2addr.S
+++ b/runtime/interpreter/mterp/arm/op_shr_long_2addr.S
@@ -6,17 +6,17 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
-    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    movpl   r0, r1, asr ip              @ if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r1, r1, asr r2              @ r1<- r1 >> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_ushr_long.S b/runtime/interpreter/mterp/arm/op_ushr_long.S
index f64c861..f98ec63 100644
--- a/runtime/interpreter/mterp/arm/op_ushr_long.S
+++ b/runtime/interpreter/mterp/arm/op_ushr_long.S
@@ -12,16 +12,16 @@
     add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
-    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r0, r1, lsr ip              @ if r2 >= 32, r0<-r1 >>> (r2-32)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    mov     r1, r1, lsr r2              @ r1<- r1 >>> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S b/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S
index dbab08d..840283d 100644
--- a/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S
+++ b/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S
@@ -6,17 +6,17 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
-    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    movpl   r0, r1, lsr ip              @ if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r1, r1, lsr r2              @ r1<- r1 >>> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/zcmp.S b/runtime/interpreter/mterp/arm/zcmp.S
index 6e9ef55..800804d 100644
--- a/runtime/interpreter/mterp/arm/zcmp.S
+++ b/runtime/interpreter/mterp/arm/zcmp.S
@@ -6,25 +6,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    mov${revcmp} r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    b${revcmp} .L_${opcode}_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_${opcode}_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    mov${revcmp} r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    mov${revcmp} rINST, #2              @ rINST<- inst branch dist for not-taken
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm64/bincmp.S b/runtime/interpreter/mterp/arm64/bincmp.S
index ecab2ce..ed850fc 100644
--- a/runtime/interpreter/mterp/arm64/bincmp.S
+++ b/runtime/interpreter/mterp/arm64/bincmp.S
@@ -6,17 +6,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
-    mov     w1, wINST, lsr #12          // w1<- B
+#if MTERP_PROFILE_BRANCHES
+    lsr     w1, wINST, #12              // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    mov${condition} w1, #2                 // w1<- BYTE branch dist for not-taken
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    b.${condition} .L_${opcode}_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_${opcode}_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -25,11 +36,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    w1, w1, w0, ${condition}    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    csel    wINST, w1, w0, ${condition} // Branch if true, stashing result in callee save reg.
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
diff --git a/runtime/interpreter/mterp/arm64/footer.S b/runtime/interpreter/mterp/arm64/footer.S
index b360539..aae78de 100644
--- a/runtime/interpreter/mterp/arm64/footer.S
+++ b/runtime/interpreter/mterp/arm64/footer.S
@@ -10,7 +10,6 @@
  * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
  * TUNING: for consistency, we may want to just go ahead and handle these here.
  */
-#define MTERP_LOGGING 0
 common_errDivideByZero:
     EXPORT_PC
 #if MTERP_LOGGING
@@ -99,8 +98,11 @@
     ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
     add     xPC, x0, #CODEITEM_INSNS_OFFSET
     add     xPC, xPC, x1, lsl #1                    // generate new dex_pc_ptr
-    str     xPC, [xFP, #OFF_FP_DEX_PC_PTR]
+    /* Do we need to switch interpreters? */
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     /* resume execution at catch block */
+    EXPORT_PC
     FETCH_INST
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
@@ -120,10 +122,24 @@
     EXPORT_PC
     mov     x0, xSELF
     bl      MterpSuspendCheck           // (self)
+    cbnz    x0, MterpFallback           // Something in the environment changed, switch interpreters
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
 /*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm x2, xINST, 0, 31
+    bl MterpLogOSR
+#endif
+    mov  x0, #1                         // Signal normal return
+    b    MterpDone
+
+/*
  * Bail out to reference interpreter.
  */
 MterpFallback:
diff --git a/runtime/interpreter/mterp/arm64/header.S b/runtime/interpreter/mterp/arm64/header.S
index 351a607..7223750 100644
--- a/runtime/interpreter/mterp/arm64/header.S
+++ b/runtime/interpreter/mterp/arm64/header.S
@@ -87,6 +87,9 @@
  */
 #include "asm_support.h"
 
+#define MTERP_PROFILE_BRANCHES 1
+#define MTERP_LOGGING 0
+
 /* During bringup, we'll use the shadow frame model instead of xFP */
 /* single-purpose registers, given names for clarity */
 #define xPC     x20
@@ -114,14 +117,6 @@
 #define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
 /*
- *
- * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
- * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
- * mterp should do so as well.
- */
-#define MTERP_SUSPEND 0
-
-/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
diff --git a/runtime/interpreter/mterp/arm64/invoke.S b/runtime/interpreter/mterp/arm64/invoke.S
index ff1974c..7a32df7 100644
--- a/runtime/interpreter/mterp/arm64/invoke.S
+++ b/runtime/interpreter/mterp/arm64/invoke.S
@@ -9,11 +9,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      $helper
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
diff --git a/runtime/interpreter/mterp/arm64/op_goto.S b/runtime/interpreter/mterp/arm64/op_goto.S
index db98a45..7e2f6a9 100644
--- a/runtime/interpreter/mterp/arm64/op_goto.S
+++ b/runtime/interpreter/mterp/arm64/op_goto.S
@@ -6,23 +6,20 @@
      */
     /* goto +AA */
     /* tuning: use sbfx for 6t2+ targets */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsl #16          // w0<- AAxx0000
-    movs    w1, w0, asr #24             // w1<- ssssssAA (sign-extended)
-    add     w2, w1, w1                  // w2<- byte offset, set flags
-       // If backwards branch refresh rIBASE
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
     lsl     w0, wINST, #16              // w0<- AAxx0000
-    asr     w0, w0, #24                 // w0<- ssssssAA (sign-extended)
-    adds    w1, w0, w0                  // Convert dalvik offset to byte offset, setting flags
+    asr     wINST, w0, #24              // wINST<- ssssssAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
+    adds    w1, wINST, wINST            // Convert dalvik offset to byte offset, setting flags
     FETCH_ADVANCE_INST_RB w1            // load wINST and advance xPC
        // If backwards branch refresh rIBASE
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
diff --git a/runtime/interpreter/mterp/arm64/op_goto_16.S b/runtime/interpreter/mterp/arm64/op_goto_16.S
index ff66a23..b2b9924 100644
--- a/runtime/interpreter/mterp/arm64/op_goto_16.S
+++ b/runtime/interpreter/mterp/arm64/op_goto_16.S
@@ -5,19 +5,18 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_SUSPEND
-    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
-    adds    w1, w0, w0                  // w1<- byte offset, flags set
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
-    ldrmi   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    GET_INST_OPCODE ip                  // extract opcode from rINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
-    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
+    FETCH_S wINST, 1                    // wINST<- ssssAAAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, w0, w0                  // w1<- byte offset, flags set
+    adds    w1, wINST, wINST            // w1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from rINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
diff --git a/runtime/interpreter/mterp/arm64/op_goto_32.S b/runtime/interpreter/mterp/arm64/op_goto_32.S
index 8a6980e..b785857 100644
--- a/runtime/interpreter/mterp/arm64/op_goto_32.S
+++ b/runtime/interpreter/mterp/arm64/op_goto_32.S
@@ -10,23 +10,20 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_SUSPEND
     FETCH w0, 1                         // w0<- aaaa (lo)
     FETCH w1, 2                         // w1<- AAAA (hi)
-    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
-    adds    w1, w0, w0                  // w1<- byte offset
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
-    ldrle   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    GET_INST_OPCODE ip                  // extract opcode from xINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
-    FETCH w0, 1                         // w0<- aaaa (lo)
-    FETCH w1, 2                         // w1<- AAAA (hi)
+    orr     wINST, w0, w1, lsl #16      // wINST<- AAAAaaaa
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
-    adds    w1, w0, w0                  // w1<- byte offset
+    adds    w1, wINST, wINST            // w1<- byte offset
     FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
     b.le    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from xINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
diff --git a/runtime/interpreter/mterp/arm64/op_iget.S b/runtime/interpreter/mterp/arm64/op_iget.S
index 165c730..88533bd 100644
--- a/runtime/interpreter/mterp/arm64/op_iget.S
+++ b/runtime/interpreter/mterp/arm64/op_iget.S
@@ -1,4 +1,4 @@
-%default { "is_object":"0", "helper":"artGet32InstanceFromCode"}
+%default { "extend":"", "is_object":"0", "helper":"artGet32InstanceFromCode"}
     /*
      * General instance field get.
      *
@@ -12,6 +12,7 @@
     mov      x3, xSELF                     // w3<- self
     bl       $helper
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    $extend
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
diff --git a/runtime/interpreter/mterp/arm64/op_packed_switch.S b/runtime/interpreter/mterp/arm64/op_packed_switch.S
index f087d23..e8b4f04 100644
--- a/runtime/interpreter/mterp/arm64/op_packed_switch.S
+++ b/runtime/interpreter/mterp/arm64/op_packed_switch.S
@@ -9,20 +9,6 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_SUSPEND
-    FETCH w0, 1                         // w0<- bbbb (lo)
-    FETCH w1, 2                         // w1<- BBBB (hi)
-    mov     w3, wINST, lsr #8           // w3<- AA
-    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
-    GET_VREG w1, w3                     // w1<- vAA
-    add     w0, rPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
-    bl      $func                       // w0<- code-unit branch offset
-    adds    w1, w0, w0                  // w1<- byte offset; clear V
-    ldrle   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
     FETCH w0, 1                         // w0<- bbbb (lo)
     FETCH w1, 2                         // w1<- BBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
@@ -30,10 +16,18 @@
     GET_VREG w1, w3                     // w1<- vAA
     add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
     bl      $func                       // w0<- code-unit branch offset
+    sbfm    xINST, x0, 0, 31
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement
+#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, w0, w0                  // w1<- byte offset; clear V
+    adds    w1, wINST, wINST            // w1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
     b.le    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
diff --git a/runtime/interpreter/mterp/arm64/zcmp.S b/runtime/interpreter/mterp/arm64/zcmp.S
index d4856d2..e528d9f 100644
--- a/runtime/interpreter/mterp/arm64/zcmp.S
+++ b/runtime/interpreter/mterp/arm64/zcmp.S
@@ -6,26 +6,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsr #8           // w0<- AA
+#if MTERP_PROFILE_BRANCHES
+    lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    mov${condition} w1, #2                 // w1<- inst branch dist for not-taken
-    adds    w1, w1, w1                  // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    b.${condition} .L_${opcode}_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_${opcode}_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    w1, w1, w0, ${condition}    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes & set flags
+    csel    wINST, w1, w0, ${condition} // Branch if true, stashing result in callee save reg
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 0afd276..8f4741c 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -20,6 +20,8 @@
 #include "interpreter/interpreter_common.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "mterp.h"
+#include "jit/jit.h"
+#include "debugger.h"
 
 namespace art {
 namespace interpreter {
@@ -45,7 +47,9 @@
 void InitMterpTls(Thread* self) {
   self->SetMterpDefaultIBase(artMterpAsmInstructionStart);
   self->SetMterpAltIBase(artMterpAsmAltInstructionStart);
-  self->SetMterpCurrentIBase(artMterpAsmInstructionStart);
+  self->SetMterpCurrentIBase(TraceExecutionEnabled() ?
+                             artMterpAsmAltInstructionStart :
+                             artMterpAsmInstructionStart);
 }
 
 /*
@@ -139,6 +143,20 @@
   return entries[index];
 }
 
+extern "C" bool MterpShouldSwitchInterpreters()
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const instrumentation::Instrumentation* const instrumentation =
+      Runtime::Current()->GetInstrumentation();
+  bool unhandled_instrumentation;
+  // TODO: enable for other targets after more extensive testing.
+  if ((kRuntimeISA == kArm64) || (kRuntimeISA == kArm)) {
+    unhandled_instrumentation = instrumentation->NonJitProfilingActive();
+  } else {
+    unhandled_instrumentation = instrumentation->IsActive();
+  }
+  return unhandled_instrumentation || Dbg::IsDebuggerActive();
+}
+
 
 extern "C" bool MterpInvokeVirtual(Thread* self, ShadowFrame* shadow_frame,
                                    uint16_t* dex_pc_ptr,  uint16_t inst_data )
@@ -429,6 +447,7 @@
   } else {
     self->AssertNoPendingException();
   }
+  TraceExecution(*shadow_frame, inst, shadow_frame->GetDexPC());
 }
 
 extern "C" void MterpLogDivideByZeroException(Thread* self, ShadowFrame* shadow_frame)
@@ -488,6 +507,14 @@
             << self->IsExceptionPending();
 }
 
+extern "C" void MterpLogOSR(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "OSR: " << inst->Opcode(inst_data) << ", offset = " << offset;
+}
+
 extern "C" void MterpLogSuspendFallback(Thread* self, ShadowFrame* shadow_frame, uint32_t flags)
   SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
@@ -500,9 +527,10 @@
   }
 }
 
-extern "C" void MterpSuspendCheck(Thread* self)
+extern "C" bool MterpSuspendCheck(Thread* self)
   SHARED_REQUIRES(Locks::mutator_lock_) {
   self->AllowThreadSuspension();
+  return MterpShouldSwitchInterpreters();
 }
 
 extern "C" int artSet64IndirectStaticFromMterp(uint32_t field_idx, ArtMethod* referrer,
@@ -618,5 +646,15 @@
   return obj->GetFieldObject<mirror::Object>(MemberOffset(field_offset));
 }
 
+extern "C" bool  MterpProfileBranch(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtMethod* method = shadow_frame->GetMethod();
+  JValue* result = shadow_frame->GetResultRegister();
+  uint32_t dex_pc = shadow_frame->GetDexPC();
+  const auto* const instrumentation = Runtime::Current()->GetInstrumentation();
+  instrumentation->Branch(self, method, dex_pc, offset);
+  return jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, result);
+}
+
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/mterp/mterp.h b/runtime/interpreter/mterp/mterp.h
index 90d21e9..8d24641 100644
--- a/runtime/interpreter/mterp/mterp.h
+++ b/runtime/interpreter/mterp/mterp.h
@@ -30,6 +30,7 @@
 
 void InitMterpTls(Thread* self);
 void CheckMterpAsmConstants();
+extern "C" bool MterpShouldSwitchInterpreters();
 
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index ee19559..94cbd2d 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -92,6 +92,9 @@
  */
 #include "asm_support.h"
 
+#define MTERP_PROFILE_BRANCHES 1
+#define MTERP_LOGGING 0
+
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
 #define rPC     r4
@@ -116,14 +119,6 @@
 #define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
 /*
- *
- * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
- * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
- * mterp should do so as well.
- */
-#define MTERP_SUSPEND 0
-
-/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
@@ -1111,20 +1106,28 @@
      */
     /* goto +AA */
     /* tuning: use sbfx for 6t2+ targets */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
-    add     r2, r1, r1                  @ r2<- byte offset, set flags
-       @ If backwards branch refresh rIBASE
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+       @ If backwards branch refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
-    add     r2, r1, r1                  @ r2<- byte offset, set flags
+    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
        @ If backwards branch refresh rIBASE
     bmi     MterpCheckSuspendAndContinue
@@ -1143,17 +1146,25 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_SUSPEND
-    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
-    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+#if MTERP_PROFILE_BRANCHES
+    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
-    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
+    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1176,21 +1187,29 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- aaaa (lo)
     FETCH r1, 2                         @ r1<- AAAA (hi)
-    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
-    adds    r1, r0, r0                  @ r1<- byte offset
+    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     FETCH r0, 1                         @ r0<- aaaa (lo)
     FETCH r1, 2                         @ r1<- AAAA (hi)
+    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
-    adds    r1, r0, r0                  @ r1<- byte offset
+    adds    r1, rINST, rINST            @ r1<- byte offset
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1211,7 +1230,7 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- bbbb (lo)
     FETCH r1, 2                         @ r1<- BBBB (hi)
     mov     r3, rINST, lsr #8           @ r3<- AA
@@ -1219,9 +1238,18 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoPackedSwitch                       @ r0<- code-unit branch offset
-    adds    r1, r0, r0                  @ r1<- byte offset; clear V
-    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    mov     rINST, r0
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1232,8 +1260,9 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoPackedSwitch                       @ r0<- code-unit branch offset
+    mov     rINST, r0
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1255,7 +1284,7 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- bbbb (lo)
     FETCH r1, 2                         @ r1<- BBBB (hi)
     mov     r3, rINST, lsr #8           @ r3<- AA
@@ -1263,9 +1292,18 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoSparseSwitch                       @ r0<- code-unit branch offset
-    adds    r1, r0, r0                  @ r1<- byte offset; clear V
-    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    mov     rINST, r0
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1276,8 +1314,9 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoSparseSwitch                       @ r0<- code-unit branch offset
+    mov     rINST, r0
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1495,17 +1534,29 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movne r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    bne .L_op_if_eq_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_eq_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1514,10 +1565,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movne r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    movne rINST, #2              @ rINST<- BYTE branch dist for not-taken
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1538,17 +1589,29 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    moveq r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    beq .L_op_if_ne_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_ne_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1557,10 +1620,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    moveq r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    moveq rINST, #2              @ rINST<- BYTE branch dist for not-taken
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1581,17 +1644,29 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movge r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    bge .L_op_if_lt_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_lt_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1600,10 +1675,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movge r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    movge rINST, #2              @ rINST<- BYTE branch dist for not-taken
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1624,17 +1699,29 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movlt r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    blt .L_op_if_ge_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_ge_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1643,10 +1730,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movlt r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    movlt rINST, #2              @ rINST<- BYTE branch dist for not-taken
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1667,17 +1754,29 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movle r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    ble .L_op_if_gt_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_gt_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1686,10 +1785,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movle r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    movle rINST, #2              @ rINST<- BYTE branch dist for not-taken
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1710,17 +1809,29 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movgt r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    bgt .L_op_if_le_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_le_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1729,10 +1840,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movgt r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    movgt rINST, #2              @ rINST<- BYTE branch dist for not-taken
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1753,25 +1864,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movne r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    bne .L_op_if_eqz_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_eqz_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movne r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    movne rINST, #2              @ rINST<- inst branch dist for not-taken
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1792,25 +1915,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    moveq r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    beq .L_op_if_nez_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_nez_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    moveq r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    moveq rINST, #2              @ rINST<- inst branch dist for not-taken
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1831,25 +1966,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movge r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    bge .L_op_if_ltz_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_ltz_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movge r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    movge rINST, #2              @ rINST<- inst branch dist for not-taken
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1870,25 +2017,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movlt r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    blt .L_op_if_gez_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_gez_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movlt r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    movlt rINST, #2              @ rINST<- inst branch dist for not-taken
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1909,25 +2068,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movle r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    ble .L_op_if_gtz_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_gtz_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movle r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    movle rINST, #2              @ rINST<- inst branch dist for not-taken
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1948,25 +2119,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movgt r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    bgt .L_op_if_lez_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_lez_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movgt r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    movgt rINST, #2              @ rINST<- inst branch dist for not-taken
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -3294,6 +3477,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3326,6 +3512,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3358,6 +3547,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3383,6 +3575,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3409,6 +3604,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3453,6 +3651,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3478,6 +3679,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3503,6 +3707,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3528,6 +3735,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3553,6 +3763,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -4948,16 +5161,16 @@
     add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
-
-    mov     r1, r1, asl r2              @  r1<- r1 << r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
-    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r1, r1, asl r2              @ r1<- r1 << r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r1, r0, asl ip              @ if r2 >= 32, r1<- r0 << (r2-32)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    mov     r0, r0, asl r2              @ r0<- r0 << r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -4980,16 +5193,16 @@
     add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
-    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r0, r1, asr ip              @ if r2 >= 32, r0<-r1 >> (r2-32)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    mov     r1, r1, asr r2              @ r1<- r1 >> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -5012,16 +5225,16 @@
     add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
-    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r0, r1, lsr ip              @ if r2 >= 32, r0<-r1 >>> (r2-32)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    mov     r1, r1, lsr r2              @ r1<- r1 >>> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6087,17 +6300,17 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
-
-    mov     r1, r1, asl r2              @  r1<- r1 << r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
+    mov     r1, r1, asl r2              @ r1<- r1 << r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
-    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    movpl   r1, r0, asl ip              @ if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r0, r0, asl r2              @ r0<- r0 << r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6114,17 +6327,17 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
-    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    movpl   r0, r1, asr ip              @ if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r1, r1, asr r2              @ r1<- r1 >> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6141,17 +6354,17 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
-    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    movpl   r0, r1, lsr ip              @ if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r1, r1, lsr r2              @ r1<- r1 >>> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -7284,6 +7497,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -7309,6 +7525,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -12098,7 +12317,6 @@
  * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
  * TUNING: for consistency, we may want to just go ahead and handle these here.
  */
-#define MTERP_LOGGING 0
 common_errDivideByZero:
     EXPORT_PC
 #if MTERP_LOGGING
@@ -12189,8 +12407,12 @@
     ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
     add     rPC, r0, #CODEITEM_INSNS_OFFSET
     add     rPC, rPC, r1, lsl #1                    @ generate new dex_pc_ptr
-    str     rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+    /* Do we need to switch interpreters? */
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     /* resume execution at catch block */
+    EXPORT_PC
     FETCH_INST
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
@@ -12202,12 +12424,31 @@
  */
 MterpCheckSuspendAndContinue:
     ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
-    EXPORT_PC
-    mov     r0, rSELF
     ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    blne    MterpSuspendCheck           @ (self)
+    bne     1f
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
+1:
+    EXPORT_PC
+    mov     r0, rSELF
+    bl      MterpSuspendCheck           @ (self)
+    cmp     r0, #0
+    bne     MterpFallback
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov r0, rSELF
+    add r1, rFP, #OFF_FP_SHADOWFRAME
+    mov r2, rINST
+    bl MterpLogOSR
+#endif
+    mov r0, #1                          @ Signal normal return
+    b MterpDone
 
 /*
  * Bail out to reference interpreter.
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index e9d28ab..e4825f0 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -94,6 +94,9 @@
  */
 #include "asm_support.h"
 
+#define MTERP_PROFILE_BRANCHES 1
+#define MTERP_LOGGING 0
+
 /* During bringup, we'll use the shadow frame model instead of xFP */
 /* single-purpose registers, given names for clarity */
 #define xPC     x20
@@ -121,14 +124,6 @@
 #define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
 /*
- *
- * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
- * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
- * mterp should do so as well.
- */
-#define MTERP_SUSPEND 0
-
-/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
@@ -1087,26 +1082,23 @@
      */
     /* goto +AA */
     /* tuning: use sbfx for 6t2+ targets */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsl #16          // w0<- AAxx0000
-    movs    w1, w0, asr #24             // w1<- ssssssAA (sign-extended)
-    add     w2, w1, w1                  // w2<- byte offset, set flags
-       // If backwards branch refresh rIBASE
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
     lsl     w0, wINST, #16              // w0<- AAxx0000
-    asr     w0, w0, #24                 // w0<- ssssssAA (sign-extended)
-    adds    w1, w0, w0                  // Convert dalvik offset to byte offset, setting flags
+    asr     wINST, w0, #24              // wINST<- ssssssAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
+    adds    w1, wINST, wINST            // Convert dalvik offset to byte offset, setting flags
     FETCH_ADVANCE_INST_RB w1            // load wINST and advance xPC
        // If backwards branch refresh rIBASE
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
 
 /* ------------------------------ */
     .balign 128
@@ -1119,22 +1111,21 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_SUSPEND
-    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
-    adds    w1, w0, w0                  // w1<- byte offset, flags set
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
-    ldrmi   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    GET_INST_OPCODE ip                  // extract opcode from rINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
-    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
+    FETCH_S wINST, 1                    // wINST<- ssssAAAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, w0, w0                  // w1<- byte offset, flags set
+    adds    w1, wINST, wINST            // w1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from rINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
 
 /* ------------------------------ */
     .balign 128
@@ -1152,26 +1143,23 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_SUSPEND
     FETCH w0, 1                         // w0<- aaaa (lo)
     FETCH w1, 2                         // w1<- AAAA (hi)
-    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
-    adds    w1, w0, w0                  // w1<- byte offset
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
-    ldrle   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    GET_INST_OPCODE ip                  // extract opcode from xINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
-    FETCH w0, 1                         // w0<- aaaa (lo)
-    FETCH w1, 2                         // w1<- AAAA (hi)
+    orr     wINST, w0, w1, lsl #16      // wINST<- AAAAaaaa
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
-    adds    w1, w0, w0                  // w1<- byte offset
+    adds    w1, wINST, wINST            // w1<- byte offset
     FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
     b.le    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from xINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
 
 /* ------------------------------ */
     .balign 128
@@ -1187,20 +1175,6 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_SUSPEND
-    FETCH w0, 1                         // w0<- bbbb (lo)
-    FETCH w1, 2                         // w1<- BBBB (hi)
-    mov     w3, wINST, lsr #8           // w3<- AA
-    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
-    GET_VREG w1, w3                     // w1<- vAA
-    add     w0, rPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
-    bl      MterpDoPackedSwitch                       // w0<- code-unit branch offset
-    adds    w1, w0, w0                  // w1<- byte offset; clear V
-    ldrle   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
     FETCH w0, 1                         // w0<- bbbb (lo)
     FETCH w1, 2                         // w1<- BBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
@@ -1208,13 +1182,21 @@
     GET_VREG w1, w3                     // w1<- vAA
     add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
     bl      MterpDoPackedSwitch                       // w0<- code-unit branch offset
+    sbfm    xINST, x0, 0, 31
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement
+#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, w0, w0                  // w1<- byte offset; clear V
+    adds    w1, wINST, wINST            // w1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
     b.le    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
 
 /* ------------------------------ */
     .balign 128
@@ -1231,20 +1213,6 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_SUSPEND
-    FETCH w0, 1                         // w0<- bbbb (lo)
-    FETCH w1, 2                         // w1<- BBBB (hi)
-    mov     w3, wINST, lsr #8           // w3<- AA
-    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
-    GET_VREG w1, w3                     // w1<- vAA
-    add     w0, rPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
-    bl      MterpDoSparseSwitch                       // w0<- code-unit branch offset
-    adds    w1, w0, w0                  // w1<- byte offset; clear V
-    ldrle   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
     FETCH w0, 1                         // w0<- bbbb (lo)
     FETCH w1, 2                         // w1<- BBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
@@ -1252,13 +1220,21 @@
     GET_VREG w1, w3                     // w1<- vAA
     add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
     bl      MterpDoSparseSwitch                       // w0<- code-unit branch offset
+    sbfm    xINST, x0, 0, 31
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement
+#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, w0, w0                  // w1<- byte offset; clear V
+    adds    w1, wINST, wINST            // w1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
     b.le    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
 
 
 /* ------------------------------ */
@@ -1396,17 +1372,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
-    mov     w1, wINST, lsr #12          // w1<- B
+#if MTERP_PROFILE_BRANCHES
+    lsr     w1, wINST, #12              // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    moveq w1, #2                 // w1<- BYTE branch dist for not-taken
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    b.eq .L_op_if_eq_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_eq_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1415,11 +1402,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    w1, w1, w0, eq    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    csel    wINST, w1, w0, eq // Branch if true, stashing result in callee save reg.
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1440,17 +1427,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
-    mov     w1, wINST, lsr #12          // w1<- B
+#if MTERP_PROFILE_BRANCHES
+    lsr     w1, wINST, #12              // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    movne w1, #2                 // w1<- BYTE branch dist for not-taken
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    b.ne .L_op_if_ne_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_ne_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1459,11 +1457,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    w1, w1, w0, ne    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    csel    wINST, w1, w0, ne // Branch if true, stashing result in callee save reg.
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1484,17 +1482,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
-    mov     w1, wINST, lsr #12          // w1<- B
+#if MTERP_PROFILE_BRANCHES
+    lsr     w1, wINST, #12              // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    movlt w1, #2                 // w1<- BYTE branch dist for not-taken
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    b.lt .L_op_if_lt_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_lt_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1503,11 +1512,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    w1, w1, w0, lt    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    csel    wINST, w1, w0, lt // Branch if true, stashing result in callee save reg.
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1528,17 +1537,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
-    mov     w1, wINST, lsr #12          // w1<- B
+#if MTERP_PROFILE_BRANCHES
+    lsr     w1, wINST, #12              // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    movge w1, #2                 // w1<- BYTE branch dist for not-taken
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    b.ge .L_op_if_ge_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_ge_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1547,11 +1567,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    w1, w1, w0, ge    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    csel    wINST, w1, w0, ge // Branch if true, stashing result in callee save reg.
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1572,17 +1592,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
-    mov     w1, wINST, lsr #12          // w1<- B
+#if MTERP_PROFILE_BRANCHES
+    lsr     w1, wINST, #12              // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    movgt w1, #2                 // w1<- BYTE branch dist for not-taken
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    b.gt .L_op_if_gt_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_gt_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1591,11 +1622,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    w1, w1, w0, gt    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    csel    wINST, w1, w0, gt // Branch if true, stashing result in callee save reg.
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1616,17 +1647,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
-    mov     w1, wINST, lsr #12          // w1<- B
+#if MTERP_PROFILE_BRANCHES
+    lsr     w1, wINST, #12              // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    movle w1, #2                 // w1<- BYTE branch dist for not-taken
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    b.le .L_op_if_le_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_le_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1635,11 +1677,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    w1, w1, w0, le    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    csel    wINST, w1, w0, le // Branch if true, stashing result in callee save reg.
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1660,26 +1702,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsr #8           // w0<- AA
+#if MTERP_PROFILE_BRANCHES
+    lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    moveq w1, #2                 // w1<- inst branch dist for not-taken
-    adds    w1, w1, w1                  // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    b.eq .L_op_if_eqz_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_eqz_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    w1, w1, w0, eq    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes & set flags
+    csel    wINST, w1, w0, eq // Branch if true, stashing result in callee save reg
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1700,26 +1753,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsr #8           // w0<- AA
+#if MTERP_PROFILE_BRANCHES
+    lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    movne w1, #2                 // w1<- inst branch dist for not-taken
-    adds    w1, w1, w1                  // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    b.ne .L_op_if_nez_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_nez_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    w1, w1, w0, ne    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes & set flags
+    csel    wINST, w1, w0, ne // Branch if true, stashing result in callee save reg
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1740,26 +1804,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsr #8           // w0<- AA
+#if MTERP_PROFILE_BRANCHES
+    lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    movlt w1, #2                 // w1<- inst branch dist for not-taken
-    adds    w1, w1, w1                  // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    b.lt .L_op_if_ltz_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_ltz_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    w1, w1, w0, lt    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes & set flags
+    csel    wINST, w1, w0, lt // Branch if true, stashing result in callee save reg
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1780,26 +1855,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsr #8           // w0<- AA
+#if MTERP_PROFILE_BRANCHES
+    lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    movge w1, #2                 // w1<- inst branch dist for not-taken
-    adds    w1, w1, w1                  // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    b.ge .L_op_if_gez_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_gez_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    w1, w1, w0, ge    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes & set flags
+    csel    wINST, w1, w0, ge // Branch if true, stashing result in callee save reg
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1820,26 +1906,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsr #8           // w0<- AA
+#if MTERP_PROFILE_BRANCHES
+    lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    movgt w1, #2                 // w1<- inst branch dist for not-taken
-    adds    w1, w1, w1                  // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    b.gt .L_op_if_gtz_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_gtz_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    w1, w1, w0, gt    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes & set flags
+    csel    wINST, w1, w0, gt // Branch if true, stashing result in callee save reg
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1860,26 +1957,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsr #8           // w0<- AA
+#if MTERP_PROFILE_BRANCHES
+    lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    movle w1, #2                 // w1<- inst branch dist for not-taken
-    adds    w1, w1, w1                  // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    b.le .L_op_if_lez_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_lez_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    w1, w1, w0, le    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes & set flags
+    csel    wINST, w1, w0, le // Branch if true, stashing result in callee save reg
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -2401,6 +2509,7 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGet32InstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -2457,6 +2566,7 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGetObjInstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -2488,6 +2598,7 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGetBooleanInstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    uxtb w0, w0
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -2519,6 +2630,7 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGetByteInstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    sxtb w0, w0
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -2550,6 +2662,7 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGetCharInstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    uxth w0, w0
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -2581,6 +2694,7 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGetShortInstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    sxth w0, w0
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -3158,11 +3272,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeVirtual
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3190,11 +3305,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeSuper
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3222,11 +3338,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeDirect
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3247,11 +3364,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeStatic
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3273,11 +3391,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeInterface
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3320,11 +3439,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeVirtualRange
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3345,11 +3465,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeSuperRange
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3370,11 +3491,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeDirectRange
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3395,11 +3517,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeStaticRange
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3420,11 +3543,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeInterfaceRange
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -6852,11 +6976,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeVirtualQuick
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -6877,11 +7002,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeVirtualQuickRange
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -11565,7 +11691,6 @@
  * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
  * TUNING: for consistency, we may want to just go ahead and handle these here.
  */
-#define MTERP_LOGGING 0
 common_errDivideByZero:
     EXPORT_PC
 #if MTERP_LOGGING
@@ -11654,8 +11779,11 @@
     ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
     add     xPC, x0, #CODEITEM_INSNS_OFFSET
     add     xPC, xPC, x1, lsl #1                    // generate new dex_pc_ptr
-    str     xPC, [xFP, #OFF_FP_DEX_PC_PTR]
+    /* Do we need to switch interpreters? */
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     /* resume execution at catch block */
+    EXPORT_PC
     FETCH_INST
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
@@ -11675,10 +11803,24 @@
     EXPORT_PC
     mov     x0, xSELF
     bl      MterpSuspendCheck           // (self)
+    cbnz    x0, MterpFallback           // Something in the environment changed, switch interpreters
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
 /*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm x2, xINST, 0, 31
+    bl MterpLogOSR
+#endif
+    mov  x0, #1                         // Signal normal return
+    b    MterpDone
+
+/*
  * Bail out to reference interpreter.
  */
 MterpFallback:
diff --git a/runtime/interpreter/mterp/out/mterp_x86.S b/runtime/interpreter/mterp/out/mterp_x86.S
index 96229ce..d365a4f 100644
--- a/runtime/interpreter/mterp/out/mterp_x86.S
+++ b/runtime/interpreter/mterp/out/mterp_x86.S
@@ -189,11 +189,6 @@
 
 /*
  * Refresh handler table.
- * IBase handles uses the caller save register so we must restore it after each call.
- * Also it is used as a result of some 64-bit operations (like imul) and we should
- * restore it in such cases also.
- *
- * TODO: Consider spilling the IBase instead of restoring it from Thread structure.
  */
 .macro REFRESH_IBASE
     movl    rSELF, rIBASE
@@ -201,9 +196,22 @@
 .endm
 
 /*
+ * Refresh handler table.
+ * IBase handles uses the caller save register so we must restore it after each call.
+ * Also it is used as a result of some 64-bit operations (like imul) and we should
+ * restore it in such cases also.
+ *
+ * TODO: Consider spilling the IBase instead of restoring it from Thread structure.
+ */
+.macro RESTORE_IBASE
+    movl    rSELF, rIBASE
+    movl    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
+.endm
+
+/*
  * If rSELF is already loaded then we can use it from known reg.
  */
-.macro REFRESH_IBASE_FROM_SELF _reg
+.macro RESTORE_IBASE_FROM_SELF _reg
     movl    THREAD_CURRENT_IBASE_OFFSET(\_reg), rIBASE
 .endm
 
@@ -771,8 +779,8 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpConstString)        # (index, tgt_reg, shadow_frame, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -790,8 +798,8 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpConstString)        # (index, tgt_reg, shadow_frame, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
@@ -809,8 +817,8 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpConstClass)         # (index, tgt_reg, shadow_frame, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -828,8 +836,8 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG1(%esp)
     call    SYMBOL(artLockObjectFromCode)   # (object, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
@@ -851,8 +859,8 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG1(%esp)
     call    SYMBOL(artUnlockObjectFromCode) # (object, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
@@ -874,8 +882,8 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpCheckCast)          # (index, &obj, method, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -903,7 +911,7 @@
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpInstanceOf)         # (index, &obj, method, self)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     andb    $0xf, rINSTbl                  # rINSTbl <- A
@@ -943,8 +951,8 @@
     REFRESH_INST 34
     movl    rINST, OUT_ARG2(%esp)
     call    SYMBOL(MterpNewInstance)
-    REFRESH_IBASE
-    testl   %eax, %eax                 # 0 means an exception is thrown
+    RESTORE_IBASE
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -969,8 +977,8 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpNewArray)
-    REFRESH_IBASE
-    testl   %eax, %eax                      # 0 means an exception is thrown
+    RESTORE_IBASE
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -994,7 +1002,7 @@
     movl    %ecx, OUT_ARG2(%esp)
     call    SYMBOL(MterpFilledNewArray)
     REFRESH_IBASE
-    testl   %eax, %eax                      # 0 means an exception is thrown
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
@@ -1019,7 +1027,7 @@
     movl    %ecx, OUT_ARG2(%esp)
     call    SYMBOL(MterpFilledNewArrayRange)
     REFRESH_IBASE
-    testl   %eax, %eax                      # 0 means an exception is thrown
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
@@ -1037,7 +1045,7 @@
     movl    %ecx, OUT_ARG1(%esp)
     call    SYMBOL(MterpFillArrayData)      # (obj, payload)
     REFRESH_IBASE
-    testl   %eax, %eax                      # 0 means an exception is thrown
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
@@ -1923,7 +1931,7 @@
     movl    %ecx, OUT_ARG1(%esp)
     call    SYMBOL(artAGetObjectFromMterp)  # (array, index)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     SET_VREG_OBJECT %eax, rINST
@@ -2090,8 +2098,8 @@
     REFRESH_INST 77
     movl    rINST, OUT_ARG2(%esp)
     call    SYMBOL(MterpAputObject)         # (array, index)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -2221,7 +2229,7 @@
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artGet32InstanceFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
@@ -2259,7 +2267,7 @@
     andb    $0xf, rINSTbl                  # rINST <- A
     SET_VREG %eax, rINST
     SET_VREG_HIGH %edx, rINST
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2285,7 +2293,7 @@
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artGetObjInstanceFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
@@ -2320,7 +2328,7 @@
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artGetBooleanInstanceFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
@@ -2355,7 +2363,7 @@
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artGetByteInstanceFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
@@ -2390,7 +2398,7 @@
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artGetCharInstanceFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
@@ -2425,7 +2433,7 @@
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artGetShortInstanceFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
@@ -2461,9 +2469,9 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL(artSet32InstanceFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2485,9 +2493,9 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL(artSet64InstanceFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2503,9 +2511,9 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpIputObject)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2533,9 +2541,9 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL(artSet8InstanceFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -2564,9 +2572,9 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL(artSet8InstanceFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -2595,9 +2603,9 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL(artSet16InstanceFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -2626,9 +2634,9 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL(artSet16InstanceFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -2652,7 +2660,7 @@
     movl    %ecx, OUT_ARG2(%esp)            # self
     call    SYMBOL(artGet32StaticFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     .if 0
@@ -2685,7 +2693,7 @@
     jnz     MterpException
     SET_VREG %eax, rINST                    # fp[A]<- low part
     SET_VREG_HIGH %edx, rINST               # fp[A+1]<- high part
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2709,7 +2717,7 @@
     movl    %ecx, OUT_ARG2(%esp)            # self
     call    SYMBOL(artGetObjStaticFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     .if 1
@@ -2741,7 +2749,7 @@
     movl    %ecx, OUT_ARG2(%esp)            # self
     call    SYMBOL(artGetBooleanStaticFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     .if 0
@@ -2773,7 +2781,7 @@
     movl    %ecx, OUT_ARG2(%esp)            # self
     call    SYMBOL(artGetByteStaticFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     .if 0
@@ -2805,7 +2813,7 @@
     movl    %ecx, OUT_ARG2(%esp)            # self
     call    SYMBOL(artGetCharStaticFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     .if 0
@@ -2837,7 +2845,7 @@
     movl    %ecx, OUT_ARG2(%esp)            # self
     call    SYMBOL(artGetShortStaticFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     .if 0
@@ -2869,9 +2877,9 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artSet32StaticFromCode)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2894,9 +2902,9 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artSet64IndirectStaticFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2912,9 +2920,9 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpSputObject)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2939,9 +2947,9 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artSet8StaticFromCode)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -2967,9 +2975,9 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artSet8StaticFromCode)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -2995,9 +3003,9 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artSet16StaticFromCode)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -3023,9 +3031,9 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artSet16StaticFromCode)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -3049,9 +3057,9 @@
     REFRESH_INST 110
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeVirtual)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 /*
@@ -3082,9 +3090,9 @@
     REFRESH_INST 111
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeSuper)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 /*
@@ -3115,9 +3123,9 @@
     REFRESH_INST 112
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeDirect)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -3141,9 +3149,9 @@
     REFRESH_INST 113
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeStatic)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -3168,9 +3176,9 @@
     REFRESH_INST 114
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeInterface)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 /*
@@ -3215,9 +3223,9 @@
     REFRESH_INST 116
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeVirtualRange)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -3241,9 +3249,9 @@
     REFRESH_INST 117
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeSuperRange)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -3267,9 +3275,9 @@
     REFRESH_INST 118
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeDirectRange)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -3293,9 +3301,9 @@
     REFRESH_INST 119
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeStaticRange)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -3319,9 +3327,9 @@
     REFRESH_INST 120
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeInterfaceRange)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -4047,10 +4055,10 @@
     je      common_errDivideByZero
     movl    %eax, %edx
     orl     %ecx, %edx
-    test    $0xFFFFFF00, %edx              # If both arguments are less
+    testl   $0xFFFFFF00, %edx              # If both arguments are less
                                             #   than 8-bit and +ve
     jz      .Lop_div_int_8                   # Do 8-bit divide
-    test    $0xFFFF0000, %edx              # If both arguments are less
+    testl   $0xFFFF0000, %edx              # If both arguments are less
                                             #   than 16-bit and +ve
     jz      .Lop_div_int_16                  # Do 16-bit divide
     cmpl    $-1, %ecx
@@ -4101,10 +4109,10 @@
     je      common_errDivideByZero
     movl    %eax, %edx
     orl     %ecx, %edx
-    test    $0xFFFFFF00, %edx              # If both arguments are less
+    testl   $0xFFFFFF00, %edx              # If both arguments are less
                                             #   than 8-bit and +ve
     jz      .Lop_rem_int_8                   # Do 8-bit divide
-    test    $0xFFFF0000, %edx              # If both arguments are less
+    testl   $0xFFFF0000, %edx              # If both arguments are less
                                             #   than 16-bit and +ve
     jz      .Lop_rem_int_16                  # Do 16-bit divide
     cmpl    $-1, %ecx
@@ -4785,9 +4793,9 @@
     sarl    $4, rINST                      # rINST <- B
     GET_VREG %eax, rINST                    # eax <- vB
     andb    $0xf, %cl                      # ecx <- A
-    mov     rIBASE, LOCAL0(%esp)
+    movl    rIBASE, rINST
     imull   (rFP,%ecx,4), %eax              # trashes rIBASE/edx
-    mov     LOCAL0(%esp), rIBASE
+    movl    rINST, rIBASE
     SET_VREG %eax, %ecx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
@@ -5514,11 +5522,11 @@
     movzbl  rINSTbl, %eax                   # eax <- 000000BA
     sarl    $4, %eax                       # eax <- B
     GET_VREG %eax, %eax                     # eax <- vB
-    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    movl    rIBASE, %ecx
+    movswl  2(rPC), rIBASE                  # rIBASE <- ssssCCCC
     andb    $0xf, rINSTbl                  # rINST <- A
-    mov     rIBASE, LOCAL0(%esp)
-    imull   %ecx, %eax                      # trashes rIBASE/edx
-    mov     LOCAL0(%esp), rIBASE
+    imull   rIBASE, %eax                    # trashes rIBASE/edx
+    movl    %ecx, rIBASE
     SET_VREG %eax, rINST
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -5721,11 +5729,11 @@
 /* File: x86/op_mul_int_lit8.S */
     /* mul/lit8 vAA, vBB, #+CC */
     movzbl  2(rPC), %eax                    # eax <- BB
-    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    movl    rIBASE, %ecx
     GET_VREG  %eax, %eax                    # eax <- rBB
-    mov     rIBASE, LOCAL0(%esp)
-    imull   %ecx, %eax                      # trashes rIBASE/edx
-    mov     LOCAL0(%esp), rIBASE
+    movsbl  3(rPC), rIBASE                  # rIBASE <- ssssssCC
+    imull   rIBASE, %eax                    # trashes rIBASE/edx
+    movl    %ecx, rIBASE
     SET_VREG %eax, rINST
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -5985,7 +5993,7 @@
     EXPORT_PC
     call    SYMBOL(artIGetObjectFromMterp)  # (obj, offset)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $0xf,rINSTbl                   # rINST <- A
@@ -6037,9 +6045,9 @@
     REFRESH_INST 232
     movl    rINST, OUT_ARG2(%esp)
     call    SYMBOL(MterpIputObjectQuick)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -6062,9 +6070,9 @@
     REFRESH_INST 233
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeVirtualQuick)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -6088,9 +6096,9 @@
     REFRESH_INST 234
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeVirtualQuickRange)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -12912,7 +12920,7 @@
     lea     OFF_FP_SHADOWFRAME(rFP), %ecx
     movl    %ecx, OUT_ARG1(%esp)
     call    SYMBOL(MterpHandleException)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpExceptionReturn
     REFRESH_IBASE
     movl    OFF_FP_CODE_ITEM(rFP), %eax
diff --git a/runtime/interpreter/mterp/x86/bindiv.S b/runtime/interpreter/mterp/x86/bindiv.S
index bb5b319..e87ba45 100644
--- a/runtime/interpreter/mterp/x86/bindiv.S
+++ b/runtime/interpreter/mterp/x86/bindiv.S
@@ -13,10 +13,10 @@
     je      common_errDivideByZero
     movl    %eax, %edx
     orl     %ecx, %edx
-    test    $$0xFFFFFF00, %edx              # If both arguments are less
+    testl   $$0xFFFFFF00, %edx              # If both arguments are less
                                             #   than 8-bit and +ve
     jz      .L${opcode}_8                   # Do 8-bit divide
-    test    $$0xFFFF0000, %edx              # If both arguments are less
+    testl   $$0xFFFF0000, %edx              # If both arguments are less
                                             #   than 16-bit and +ve
     jz      .L${opcode}_16                  # Do 16-bit divide
     cmpl    $$-1, %ecx
diff --git a/runtime/interpreter/mterp/x86/footer.S b/runtime/interpreter/mterp/x86/footer.S
index 385e784..a1532fa 100644
--- a/runtime/interpreter/mterp/x86/footer.S
+++ b/runtime/interpreter/mterp/x86/footer.S
@@ -114,7 +114,7 @@
     lea     OFF_FP_SHADOWFRAME(rFP), %ecx
     movl    %ecx, OUT_ARG1(%esp)
     call    SYMBOL(MterpHandleException)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpExceptionReturn
     REFRESH_IBASE
     movl    OFF_FP_CODE_ITEM(rFP), %eax
diff --git a/runtime/interpreter/mterp/x86/header.S b/runtime/interpreter/mterp/x86/header.S
index 0977b90..3fbbbf9 100644
--- a/runtime/interpreter/mterp/x86/header.S
+++ b/runtime/interpreter/mterp/x86/header.S
@@ -182,11 +182,6 @@
 
 /*
  * Refresh handler table.
- * IBase handles uses the caller save register so we must restore it after each call.
- * Also it is used as a result of some 64-bit operations (like imul) and we should
- * restore it in such cases also.
- *
- * TODO: Consider spilling the IBase instead of restoring it from Thread structure.
  */
 .macro REFRESH_IBASE
     movl    rSELF, rIBASE
@@ -194,9 +189,22 @@
 .endm
 
 /*
+ * Refresh handler table.
+ * IBase handles uses the caller save register so we must restore it after each call.
+ * Also it is used as a result of some 64-bit operations (like imul) and we should
+ * restore it in such cases also.
+ *
+ * TODO: Consider spilling the IBase instead of restoring it from Thread structure.
+ */
+.macro RESTORE_IBASE
+    movl    rSELF, rIBASE
+    movl    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
+.endm
+
+/*
  * If rSELF is already loaded then we can use it from known reg.
  */
-.macro REFRESH_IBASE_FROM_SELF _reg
+.macro RESTORE_IBASE_FROM_SELF _reg
     movl    THREAD_CURRENT_IBASE_OFFSET(\_reg), rIBASE
 .endm
 
diff --git a/runtime/interpreter/mterp/x86/invoke.S b/runtime/interpreter/mterp/x86/invoke.S
index 054fbfd..bbd88cf 100644
--- a/runtime/interpreter/mterp/x86/invoke.S
+++ b/runtime/interpreter/mterp/x86/invoke.S
@@ -14,7 +14,7 @@
     REFRESH_INST ${opnum}
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL($helper)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_aget_object.S b/runtime/interpreter/mterp/x86/op_aget_object.S
index cbfb50c..35ec053 100644
--- a/runtime/interpreter/mterp/x86/op_aget_object.S
+++ b/runtime/interpreter/mterp/x86/op_aget_object.S
@@ -13,7 +13,7 @@
     movl    %ecx, OUT_ARG1(%esp)
     call    SYMBOL(artAGetObjectFromMterp)  # (array, index)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     SET_VREG_OBJECT %eax, rINST
diff --git a/runtime/interpreter/mterp/x86/op_aput_object.S b/runtime/interpreter/mterp/x86/op_aput_object.S
index 9cfc221..980b26a 100644
--- a/runtime/interpreter/mterp/x86/op_aput_object.S
+++ b/runtime/interpreter/mterp/x86/op_aput_object.S
@@ -9,7 +9,7 @@
     REFRESH_INST ${opnum}
     movl    rINST, OUT_ARG2(%esp)
     call    SYMBOL(MterpAputObject)         # (array, index)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_check_cast.S b/runtime/interpreter/mterp/x86/op_check_cast.S
index ae2ff9e..d090aa3 100644
--- a/runtime/interpreter/mterp/x86/op_check_cast.S
+++ b/runtime/interpreter/mterp/x86/op_check_cast.S
@@ -12,7 +12,7 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpCheckCast)          # (index, &obj, method, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_class.S b/runtime/interpreter/mterp/x86/op_const_class.S
index 343e110..60be789 100644
--- a/runtime/interpreter/mterp/x86/op_const_class.S
+++ b/runtime/interpreter/mterp/x86/op_const_class.S
@@ -8,7 +8,7 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpConstClass)         # (index, tgt_reg, shadow_frame, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_string.S b/runtime/interpreter/mterp/x86/op_const_string.S
index bbac69c..ff93b23 100644
--- a/runtime/interpreter/mterp/x86/op_const_string.S
+++ b/runtime/interpreter/mterp/x86/op_const_string.S
@@ -8,7 +8,7 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpConstString)        # (index, tgt_reg, shadow_frame, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_string_jumbo.S b/runtime/interpreter/mterp/x86/op_const_string_jumbo.S
index 4236807..e7f952a 100644
--- a/runtime/interpreter/mterp/x86/op_const_string_jumbo.S
+++ b/runtime/interpreter/mterp/x86/op_const_string_jumbo.S
@@ -8,7 +8,7 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpConstString)        # (index, tgt_reg, shadow_frame, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_fill_array_data.S b/runtime/interpreter/mterp/x86/op_fill_array_data.S
index 004aed9..5855284 100644
--- a/runtime/interpreter/mterp/x86/op_fill_array_data.S
+++ b/runtime/interpreter/mterp/x86/op_fill_array_data.S
@@ -7,6 +7,6 @@
     movl    %ecx, OUT_ARG1(%esp)
     call    SYMBOL(MterpFillArrayData)      # (obj, payload)
     REFRESH_IBASE
-    testl   %eax, %eax                      # 0 means an exception is thrown
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_filled_new_array.S b/runtime/interpreter/mterp/x86/op_filled_new_array.S
index a2bac29..35b2fe8 100644
--- a/runtime/interpreter/mterp/x86/op_filled_new_array.S
+++ b/runtime/interpreter/mterp/x86/op_filled_new_array.S
@@ -15,6 +15,6 @@
     movl    %ecx, OUT_ARG2(%esp)
     call    SYMBOL($helper)
     REFRESH_IBASE
-    testl   %eax, %eax                      # 0 means an exception is thrown
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_iget.S b/runtime/interpreter/mterp/x86/op_iget.S
index 9932610..e3304ba 100644
--- a/runtime/interpreter/mterp/x86/op_iget.S
+++ b/runtime/interpreter/mterp/x86/op_iget.S
@@ -17,7 +17,7 @@
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL($helper)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $$0xf, rINSTbl                  # rINST <- A
diff --git a/runtime/interpreter/mterp/x86/op_iget_object_quick.S b/runtime/interpreter/mterp/x86/op_iget_object_quick.S
index fe16694..b1551a0 100644
--- a/runtime/interpreter/mterp/x86/op_iget_object_quick.S
+++ b/runtime/interpreter/mterp/x86/op_iget_object_quick.S
@@ -9,7 +9,7 @@
     EXPORT_PC
     call    SYMBOL(artIGetObjectFromMterp)  # (obj, offset)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $$0xf,rINSTbl                   # rINST <- A
diff --git a/runtime/interpreter/mterp/x86/op_iget_wide.S b/runtime/interpreter/mterp/x86/op_iget_wide.S
index 92126b4..a5d7e69 100644
--- a/runtime/interpreter/mterp/x86/op_iget_wide.S
+++ b/runtime/interpreter/mterp/x86/op_iget_wide.S
@@ -21,5 +21,5 @@
     andb    $$0xf, rINSTbl                  # rINST <- A
     SET_VREG %eax, rINST
     SET_VREG_HIGH %edx, rINST
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_instance_of.S b/runtime/interpreter/mterp/x86/op_instance_of.S
index fd5bf44..e6fe5b2 100644
--- a/runtime/interpreter/mterp/x86/op_instance_of.S
+++ b/runtime/interpreter/mterp/x86/op_instance_of.S
@@ -18,7 +18,7 @@
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpInstanceOf)         # (index, &obj, method, self)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     andb    $$0xf, rINSTbl                  # rINSTbl <- A
diff --git a/runtime/interpreter/mterp/x86/op_iput.S b/runtime/interpreter/mterp/x86/op_iput.S
index 13cfe5c..c847e2d 100644
--- a/runtime/interpreter/mterp/x86/op_iput.S
+++ b/runtime/interpreter/mterp/x86/op_iput.S
@@ -19,7 +19,7 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL($handler)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_object.S b/runtime/interpreter/mterp/x86/op_iput_object.S
index f63075c..e013697 100644
--- a/runtime/interpreter/mterp/x86/op_iput_object.S
+++ b/runtime/interpreter/mterp/x86/op_iput_object.S
@@ -7,7 +7,7 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpIputObject)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_object_quick.S b/runtime/interpreter/mterp/x86/op_iput_object_quick.S
index d54b1b7..cb77929 100644
--- a/runtime/interpreter/mterp/x86/op_iput_object_quick.S
+++ b/runtime/interpreter/mterp/x86/op_iput_object_quick.S
@@ -5,7 +5,7 @@
     REFRESH_INST ${opnum}
     movl    rINST, OUT_ARG2(%esp)
     call    SYMBOL(MterpIputObjectQuick)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_wide.S b/runtime/interpreter/mterp/x86/op_iput_wide.S
index 573e14d..122eecf 100644
--- a/runtime/interpreter/mterp/x86/op_iput_wide.S
+++ b/runtime/interpreter/mterp/x86/op_iput_wide.S
@@ -13,7 +13,7 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL(artSet64InstanceFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_monitor_enter.S b/runtime/interpreter/mterp/x86/op_monitor_enter.S
index 9e885bd..b35c684 100644
--- a/runtime/interpreter/mterp/x86/op_monitor_enter.S
+++ b/runtime/interpreter/mterp/x86/op_monitor_enter.S
@@ -8,7 +8,7 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG1(%esp)
     call    SYMBOL(artLockObjectFromCode)   # (object, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_monitor_exit.S b/runtime/interpreter/mterp/x86/op_monitor_exit.S
index 0904800..2d17d5e 100644
--- a/runtime/interpreter/mterp/x86/op_monitor_exit.S
+++ b/runtime/interpreter/mterp/x86/op_monitor_exit.S
@@ -12,7 +12,7 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG1(%esp)
     call    SYMBOL(artUnlockObjectFromCode) # (object, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_mul_int_2addr.S b/runtime/interpreter/mterp/x86/op_mul_int_2addr.S
index f92a28e..da699ae 100644
--- a/runtime/interpreter/mterp/x86/op_mul_int_2addr.S
+++ b/runtime/interpreter/mterp/x86/op_mul_int_2addr.S
@@ -3,8 +3,8 @@
     sarl    $$4, rINST                      # rINST <- B
     GET_VREG %eax, rINST                    # eax <- vB
     andb    $$0xf, %cl                      # ecx <- A
-    mov     rIBASE, LOCAL0(%esp)
+    movl    rIBASE, rINST
     imull   (rFP,%ecx,4), %eax              # trashes rIBASE/edx
-    mov     LOCAL0(%esp), rIBASE
+    movl    rINST, rIBASE
     SET_VREG %eax, %ecx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_mul_int_lit16.S b/runtime/interpreter/mterp/x86/op_mul_int_lit16.S
index 31ab613..056f491 100644
--- a/runtime/interpreter/mterp/x86/op_mul_int_lit16.S
+++ b/runtime/interpreter/mterp/x86/op_mul_int_lit16.S
@@ -3,10 +3,10 @@
     movzbl  rINSTbl, %eax                   # eax <- 000000BA
     sarl    $$4, %eax                       # eax <- B
     GET_VREG %eax, %eax                     # eax <- vB
-    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    movl    rIBASE, %ecx
+    movswl  2(rPC), rIBASE                  # rIBASE <- ssssCCCC
     andb    $$0xf, rINSTbl                  # rINST <- A
-    mov     rIBASE, LOCAL0(%esp)
-    imull   %ecx, %eax                      # trashes rIBASE/edx
-    mov     LOCAL0(%esp), rIBASE
+    imull   rIBASE, %eax                    # trashes rIBASE/edx
+    movl    %ecx, rIBASE
     SET_VREG %eax, rINST
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_mul_int_lit8.S b/runtime/interpreter/mterp/x86/op_mul_int_lit8.S
index 6637aa7..59b3844 100644
--- a/runtime/interpreter/mterp/x86/op_mul_int_lit8.S
+++ b/runtime/interpreter/mterp/x86/op_mul_int_lit8.S
@@ -1,9 +1,9 @@
     /* mul/lit8 vAA, vBB, #+CC */
     movzbl  2(rPC), %eax                    # eax <- BB
-    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    movl    rIBASE, %ecx
     GET_VREG  %eax, %eax                    # eax <- rBB
-    mov     rIBASE, LOCAL0(%esp)
-    imull   %ecx, %eax                      # trashes rIBASE/edx
-    mov     LOCAL0(%esp), rIBASE
+    movsbl  3(rPC), rIBASE                  # rIBASE <- ssssssCC
+    imull   rIBASE, %eax                    # trashes rIBASE/edx
+    movl    %ecx, rIBASE
     SET_VREG %eax, rINST
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_new_array.S b/runtime/interpreter/mterp/x86/op_new_array.S
index 2490477..16226e9 100644
--- a/runtime/interpreter/mterp/x86/op_new_array.S
+++ b/runtime/interpreter/mterp/x86/op_new_array.S
@@ -15,7 +15,7 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpNewArray)
-    REFRESH_IBASE
-    testl   %eax, %eax                      # 0 means an exception is thrown
+    RESTORE_IBASE
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_new_instance.S b/runtime/interpreter/mterp/x86/op_new_instance.S
index 712a5eb..f976acc 100644
--- a/runtime/interpreter/mterp/x86/op_new_instance.S
+++ b/runtime/interpreter/mterp/x86/op_new_instance.S
@@ -10,7 +10,7 @@
     REFRESH_INST ${opnum}
     movl    rINST, OUT_ARG2(%esp)
     call    SYMBOL(MterpNewInstance)
-    REFRESH_IBASE
-    testl   %eax, %eax                 # 0 means an exception is thrown
+    RESTORE_IBASE
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sget.S b/runtime/interpreter/mterp/x86/op_sget.S
index ec96458..0e9a3d8 100644
--- a/runtime/interpreter/mterp/x86/op_sget.S
+++ b/runtime/interpreter/mterp/x86/op_sget.S
@@ -15,7 +15,7 @@
     movl    %ecx, OUT_ARG2(%esp)            # self
     call    SYMBOL($helper)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     .if $is_object
diff --git a/runtime/interpreter/mterp/x86/op_sget_wide.S b/runtime/interpreter/mterp/x86/op_sget_wide.S
index 833f266..2b60303 100644
--- a/runtime/interpreter/mterp/x86/op_sget_wide.S
+++ b/runtime/interpreter/mterp/x86/op_sget_wide.S
@@ -17,5 +17,5 @@
     jnz     MterpException
     SET_VREG %eax, rINST                    # fp[A]<- low part
     SET_VREG_HIGH %edx, rINST               # fp[A+1]<- high part
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sput.S b/runtime/interpreter/mterp/x86/op_sput.S
index a199281..0b5de09 100644
--- a/runtime/interpreter/mterp/x86/op_sput.S
+++ b/runtime/interpreter/mterp/x86/op_sput.S
@@ -16,7 +16,7 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL($helper)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sput_object.S b/runtime/interpreter/mterp/x86/op_sput_object.S
index e3e57fc..0db5177 100644
--- a/runtime/interpreter/mterp/x86/op_sput_object.S
+++ b/runtime/interpreter/mterp/x86/op_sput_object.S
@@ -7,7 +7,7 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpSputObject)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sput_wide.S b/runtime/interpreter/mterp/x86/op_sput_wide.S
index 7544838..19cff0d 100644
--- a/runtime/interpreter/mterp/x86/op_sput_wide.S
+++ b/runtime/interpreter/mterp/x86/op_sput_wide.S
@@ -14,7 +14,7 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artSet64IndirectStaticFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 60ad0cb..0e175b8 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -261,6 +261,16 @@
   }
 }
 
+void UnstartedRuntime::UnstartedClassGetEnclosingClass(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Class> klass(hs.NewHandle(shadow_frame->GetVRegReference(arg_offset)->AsClass()));
+  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+    result->SetL(nullptr);
+  }
+  result->SetL(klass->GetDexFile().GetEnclosingClass(klass));
+}
+
 void UnstartedRuntime::UnstartedVmClassLoaderFindLoadedClass(
     Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
   mirror::String* class_name = shadow_frame->GetVRegReference(arg_offset + 1)->AsString();
diff --git a/runtime/interpreter/unstarted_runtime_list.h b/runtime/interpreter/unstarted_runtime_list.h
index 047e906..6d4d711 100644
--- a/runtime/interpreter/unstarted_runtime_list.h
+++ b/runtime/interpreter/unstarted_runtime_list.h
@@ -24,6 +24,7 @@
   V(ClassClassForName, "java.lang.Class java.lang.Class.classForName(java.lang.String, boolean, java.lang.ClassLoader)") \
   V(ClassNewInstance, "java.lang.Object java.lang.Class.newInstance()") \
   V(ClassGetDeclaredField, "java.lang.reflect.Field java.lang.Class.getDeclaredField(java.lang.String)") \
+  V(ClassGetEnclosingClass, "java.lang.Class java.lang.Class.getEnclosingClass()") \
   V(VmClassLoaderFindLoadedClass, "java.lang.Class java.lang.VMClassLoader.findLoadedClass(java.lang.ClassLoader, java.lang.String)") \
   V(VoidLookupType, "java.lang.Class java.lang.Void.lookupType()") \
   V(SystemArraycopy, "void java.lang.System.arraycopy(java.lang.Object, int, java.lang.Object, int, int)") \
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 8d3da37..bdc7ee2 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -113,8 +113,7 @@
     *error_msg = oss.str();
     return false;
   }
-  jit_load_ = reinterpret_cast<void* (*)(CompilerCallbacks**, bool*)>(
-      dlsym(jit_library_handle_, "jit_load"));
+  jit_load_ = reinterpret_cast<void* (*)(bool*)>(dlsym(jit_library_handle_, "jit_load"));
   if (jit_load_ == nullptr) {
     dlclose(jit_library_handle_);
     *error_msg = "JIT couldn't find jit_load entry point";
@@ -141,23 +140,15 @@
     *error_msg = "JIT couldn't find jit_types_loaded entry point";
     return false;
   }
-  CompilerCallbacks* callbacks = nullptr;
   bool will_generate_debug_symbols = false;
   VLOG(jit) << "Calling JitLoad interpreter_only="
       << Runtime::Current()->GetInstrumentation()->InterpretOnly();
-  jit_compiler_handle_ = (jit_load_)(&callbacks, &will_generate_debug_symbols);
+  jit_compiler_handle_ = (jit_load_)(&will_generate_debug_symbols);
   if (jit_compiler_handle_ == nullptr) {
     dlclose(jit_library_handle_);
     *error_msg = "JIT couldn't load compiler";
     return false;
   }
-  if (callbacks == nullptr) {
-    dlclose(jit_library_handle_);
-    *error_msg = "JIT compiler callbacks were not set";
-    jit_compiler_handle_ = nullptr;
-    return false;
-  }
-  compiler_callbacks_ = callbacks;
   generate_debug_info_ = will_generate_debug_symbols;
   return true;
 }
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 042da92..109ca3d 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -32,7 +32,6 @@
 namespace art {
 
 class ArtMethod;
-class CompilerCallbacks;
 struct RuntimeArgumentMap;
 
 namespace jit {
@@ -55,9 +54,6 @@
                                   size_t warmup_threshold,
                                   size_t osr_threshold);
   void CreateThreadPool();
-  CompilerCallbacks* GetCompilerCallbacks() {
-    return compiler_callbacks_;
-  }
   const JitCodeCache* GetCodeCache() const {
     return code_cache_.get();
   }
@@ -108,7 +104,7 @@
   // JIT compiler
   void* jit_library_handle_;
   void* jit_compiler_handle_;
-  void* (*jit_load_)(CompilerCallbacks**, bool*);
+  void* (*jit_load_)(bool*);
   void (*jit_unload_)(void*);
   bool (*jit_compile_method_)(void*, ArtMethod*, Thread*, bool);
   void (*jit_types_loaded_)(void*, mirror::Class**, size_t count);
@@ -119,7 +115,6 @@
 
   std::unique_ptr<jit::JitInstrumentationCache> instrumentation_cache_;
   std::unique_ptr<jit::JitCodeCache> code_cache_;
-  CompilerCallbacks* compiler_callbacks_;  // Owned by the jit compiler.
 
   bool save_profiling_info_;
   bool generate_debug_info_;
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 307bb6d..d5a9d66 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -232,25 +232,20 @@
 void JitCodeCache::FreeCode(const void* code_ptr, ArtMethod* method ATTRIBUTE_UNUSED) {
   uintptr_t allocation = FromCodeToAllocation(code_ptr);
   const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
-  const uint8_t* data = method_header->GetNativeGcMap();
   // Notify native debugger that we are about to remove the code.
   // It does nothing if we are not using native debugger.
   DeleteJITCodeEntryForAddress(reinterpret_cast<uintptr_t>(code_ptr));
-  if (data != nullptr) {
-    mspace_free(data_mspace_, const_cast<uint8_t*>(data));
-  }
-  data = method_header->GetMappingTable();
-  if (data != nullptr) {
-    mspace_free(data_mspace_, const_cast<uint8_t*>(data));
-  }
+
+  FreeData(const_cast<uint8_t*>(method_header->GetNativeGcMap()));
+  FreeData(const_cast<uint8_t*>(method_header->GetMappingTable()));
   // Use the offset directly to prevent sanity check that the method is
   // compiled with optimizing.
   // TODO(ngeoffray): Clean up.
   if (method_header->vmap_table_offset_ != 0) {
-    data = method_header->code_ - method_header->vmap_table_offset_;
-    mspace_free(data_mspace_, const_cast<uint8_t*>(data));
+    const uint8_t* data = method_header->code_ - method_header->vmap_table_offset_;
+    FreeData(const_cast<uint8_t*>(data));
   }
-  mspace_free(code_mspace_, reinterpret_cast<uint8_t*>(allocation));
+  FreeCode(reinterpret_cast<uint8_t*>(allocation));
 }
 
 void JitCodeCache::RemoveMethodsIn(Thread* self, const LinearAlloc& alloc) {
@@ -281,7 +276,7 @@
     ProfilingInfo* info = *it;
     if (alloc.ContainsUnsafe(info->GetMethod())) {
       info->GetMethod()->SetProfilingInfo(nullptr);
-      mspace_free(data_mspace_, reinterpret_cast<uint8_t*>(info));
+      FreeData(reinterpret_cast<uint8_t*>(info));
       it = profiling_infos_.erase(it);
     } else {
       ++it;
@@ -307,19 +302,18 @@
 
   OatQuickMethodHeader* method_header = nullptr;
   uint8_t* code_ptr = nullptr;
+  uint8_t* memory = nullptr;
   {
     ScopedThreadSuspension sts(self, kSuspended);
     MutexLock mu(self, lock_);
     WaitForPotentialCollectionToComplete(self);
     {
       ScopedCodeCacheWrite scc(code_map_.get());
-      uint8_t* result = reinterpret_cast<uint8_t*>(
-          mspace_memalign(code_mspace_, alignment, total_size));
-      if (result == nullptr) {
+      memory = AllocateCode(total_size);
+      if (memory == nullptr) {
         return nullptr;
       }
-      code_ptr = result + header_size;
-      DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(code_ptr), alignment);
+      code_ptr = memory + header_size;
 
       std::copy(code, code + code_size, code_ptr);
       method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
@@ -376,9 +370,7 @@
 }
 
 size_t JitCodeCache::CodeCacheSizeLocked() {
-  size_t bytes_allocated = 0;
-  mspace_inspect_all(code_mspace_, DlmallocBytesAllocatedCallback, &bytes_allocated);
-  return bytes_allocated;
+  return used_memory_for_code_;
 }
 
 size_t JitCodeCache::DataCacheSize() {
@@ -387,9 +379,7 @@
 }
 
 size_t JitCodeCache::DataCacheSizeLocked() {
-  size_t bytes_allocated = 0;
-  mspace_inspect_all(data_mspace_, DlmallocBytesAllocatedCallback, &bytes_allocated);
-  return bytes_allocated;
+  return used_memory_for_data_;
 }
 
 size_t JitCodeCache::NumberOfCompiledCode() {
@@ -399,7 +389,7 @@
 
 void JitCodeCache::ClearData(Thread* self, void* data) {
   MutexLock mu(self, lock_);
-  mspace_free(data_mspace_, data);
+  FreeData(reinterpret_cast<uint8_t*>(data));
 }
 
 uint8_t* JitCodeCache::ReserveData(Thread* self, size_t size) {
@@ -410,7 +400,7 @@
     ScopedThreadSuspension sts(self, kSuspended);
     MutexLock mu(self, lock_);
     WaitForPotentialCollectionToComplete(self);
-    result = reinterpret_cast<uint8_t*>(mspace_malloc(data_mspace_, size));
+    result = AllocateData(size);
   }
 
   if (result == nullptr) {
@@ -419,7 +409,7 @@
     ScopedThreadSuspension sts(self, kSuspended);
     MutexLock mu(self, lock_);
     WaitForPotentialCollectionToComplete(self);
-    result = reinterpret_cast<uint8_t*>(mspace_malloc(data_mspace_, size));
+    result = AllocateData(size);
   }
 
   return result;
@@ -628,12 +618,11 @@
       }
     }
 
-    void* data_mspace = data_mspace_;
     // Free all profiling infos of methods that were not being compiled.
     auto profiling_kept_end = std::remove_if(profiling_infos_.begin(), profiling_infos_.end(),
-      [data_mspace] (ProfilingInfo* info) {
+      [this] (ProfilingInfo* info) NO_THREAD_SAFETY_ANALYSIS {
         if (info->GetMethod()->GetProfilingInfo(sizeof(void*)) == nullptr) {
-          mspace_free(data_mspace, reinterpret_cast<uint8_t*>(info));
+          FreeData(reinterpret_cast<uint8_t*>(info));
           return true;
         }
         return false;
@@ -718,7 +707,7 @@
     return info;
   }
 
-  uint8_t* data = reinterpret_cast<uint8_t*>(mspace_malloc(data_mspace_, profile_info_size));
+  uint8_t* data = AllocateData(profile_info_size);
   if (data == nullptr) {
     return nullptr;
   }
@@ -809,5 +798,32 @@
   }
 }
 
+uint8_t* JitCodeCache::AllocateCode(size_t code_size) {
+  size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
+  uint8_t* result = reinterpret_cast<uint8_t*>(
+      mspace_memalign(code_mspace_, alignment, code_size));
+  size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
+  // Ensure the header ends up at expected instruction alignment.
+  DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(result + header_size), alignment);
+  used_memory_for_code_ += mspace_usable_size(result);
+  return result;
+}
+
+void JitCodeCache::FreeCode(uint8_t* code) {
+  used_memory_for_code_ -= mspace_usable_size(code);
+  mspace_free(code_mspace_, code);
+}
+
+uint8_t* JitCodeCache::AllocateData(size_t data_size) {
+  void* result = mspace_malloc(data_mspace_, data_size);
+  used_memory_for_data_ += mspace_usable_size(result);
+  return reinterpret_cast<uint8_t*>(result);
+}
+
+void JitCodeCache::FreeData(uint8_t* data) {
+  used_memory_for_data_ -= mspace_usable_size(data);
+  mspace_free(data_mspace_, data);
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 3fb5c70..087d1de 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -279,6 +279,17 @@
   // Whether we can do garbage collection.
   const bool garbage_collect_code_;
 
+  // The size in bytes of used memory for the data portion of the code cache.
+  size_t used_memory_for_data_ GUARDED_BY(lock_);
+
+  // The size in bytes of used memory for the code portion of the code cache.
+  size_t used_memory_for_code_ GUARDED_BY(lock_);
+
+  void FreeCode(uint8_t* code) REQUIRES(lock_);
+  uint8_t* AllocateCode(size_t code_size) REQUIRES(lock_);
+  void FreeData(uint8_t* data) REQUIRES(lock_);
+  uint8_t* AllocateData(size_t data_size) REQUIRES(lock_);
+
   // Number of compilations done throughout the lifetime of the JIT.
   size_t number_of_compilations_ GUARDED_BY(lock_);
 
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 422832e..3f806d3 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -532,8 +532,9 @@
   return GetFieldPtr<LengthPrefixedArray<ArtField>*>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_));
 }
 
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline MemberOffset Class::GetFirstReferenceInstanceFieldOffset() {
-  Class* super_class = GetSuperClass();
+  Class* super_class = GetSuperClass<kVerifyFlags, kReadBarrierOption>();
   return (super_class != nullptr)
       ? MemberOffset(RoundUp(super_class->GetObjectSize(),
                              sizeof(mirror::HeapReference<mirror::Object>)))
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index cdc6204..9190e44 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -1048,5 +1048,11 @@
   return depth;
 }
 
+uint32_t Class::FindTypeIndexInOtherDexFile(const DexFile& dex_file) {
+  std::string temp;
+  const DexFile::TypeId* type_id = dex_file.FindTypeId(GetDescriptor(&temp));
+  return (type_id == nullptr) ? DexFile::kDexNoIndex : dex_file.GetIndexForTypeId(*type_id);
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 388a231..6e3463c 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -1006,6 +1006,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Get the offset of the first reference instance field. Other reference instance fields follow.
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   MemberOffset GetFirstReferenceInstanceFieldOffset()
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -1119,6 +1121,9 @@
     SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, dex_type_idx_), type_idx);
   }
 
+  uint32_t FindTypeIndexInOtherDexFile(const DexFile& dex_file)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   static Class* GetJavaLangClass() SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK(HasJavaLangClass());
     return java_lang_Class_.Read();
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index eb391be..76a36ac 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -1068,7 +1068,7 @@
       MemberOffset field_offset = kIsStatic
           ? klass->GetFirstReferenceStaticFieldOffset<kVerifyFlags, kReadBarrierOption>(
               Runtime::Current()->GetClassLinker()->GetImagePointerSize())
-          : klass->GetFirstReferenceInstanceFieldOffset();
+          : klass->GetFirstReferenceInstanceFieldOffset<kVerifyFlags, kReadBarrierOption>();
       for (size_t i = 0u; i < num_reference_fields; ++i) {
         // TODO: Do a simpler check?
         if (field_offset.Uint32Value() != ClassOffset().Uint32Value()) {
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index bcc2d33..910163c 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -40,6 +40,12 @@
 class OatHeader;
 class OatDexFile;
 
+namespace gc {
+namespace collector {
+class DummyOatFile;
+}  // namespace collector
+}  // namespace gc
+
 class OatFile {
  public:
   typedef art::OatDexFile OatDexFile;
@@ -312,6 +318,7 @@
   // elements. std::list<> and std::deque<> satisfy this requirement, std::vector<> doesn't.
   mutable std::list<std::string> string_cache_ GUARDED_BY(secondary_lookup_lock_);
 
+  friend class gc::collector::DummyOatFile;  // For modifying begin_ and end_.
   friend class OatClass;
   friend class art::OatDexFile;
   friend class OatDumper;  // For GetBase and GetLimit
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index f9d916a..d64aa43 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -394,6 +394,7 @@
 // Intended for local changes only.
 static void MaybeOverrideVerbosity() {
   //  gLogVerbosity.class_linker = true;  // TODO: don't check this in!
+  //  gLogVerbosity.collector = true;  // TODO: don't check this in!
   //  gLogVerbosity.compiler = true;  // TODO: don't check this in!
   //  gLogVerbosity.deopt = true;  // TODO: don't check this in!
   //  gLogVerbosity.gc = true;  // TODO: don't check this in!
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index dd384c7..2dfa860 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -290,13 +290,18 @@
         stacked_shadow_frame_pushed_(false),
         single_frame_deopt_(single_frame),
         single_frame_done_(false),
-        single_frame_deopt_method_(nullptr) {
+        single_frame_deopt_method_(nullptr),
+        single_frame_deopt_quick_method_header_(nullptr) {
   }
 
   ArtMethod* GetSingleFrameDeoptMethod() const {
     return single_frame_deopt_method_;
   }
 
+  const OatQuickMethodHeader* GetSingleFrameDeoptQuickMethodHeader() const {
+    return single_frame_deopt_quick_method_header_;
+  }
+
   bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     exception_handler_->SetHandlerFrameDepth(GetFrameDepth());
     ArtMethod* method = GetMethod();
@@ -368,6 +373,7 @@
         exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(method));
         single_frame_done_ = true;
         single_frame_deopt_method_ = method;
+        single_frame_deopt_quick_method_header_ = GetCurrentOatQuickMethodHeader();
       }
       return true;
     }
@@ -603,6 +609,7 @@
   const bool single_frame_deopt_;
   bool single_frame_done_;
   ArtMethod* single_frame_deopt_method_;
+  const OatQuickMethodHeader* single_frame_deopt_quick_method_header_;
 
   DISALLOW_COPY_AND_ASSIGN(DeoptimizeStackVisitor);
 };
@@ -636,7 +643,7 @@
   DCHECK(deopt_method != nullptr);
   if (Runtime::Current()->UseJit()) {
     Runtime::Current()->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor(
-        deopt_method, handler_method_header_);
+        deopt_method, visitor.GetSingleFrameDeoptQuickMethodHeader());
   } else {
     // Transfer the code to interpreter.
     Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 2aeb792..861bd85 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1887,7 +1887,6 @@
   std::string error_msg;
   jit_.reset(jit::Jit::Create(jit_options_.get(), &error_msg));
   if (jit_.get() != nullptr) {
-    compiler_callbacks_ = jit_->GetCompilerCallbacks();
     jit_->CreateInstrumentationCache(jit_options_->GetCompileThreshold(),
                                      jit_options_->GetWarmupThreshold(),
                                      jit_options_->GetOsrThreshold());
diff --git a/runtime/thread.h b/runtime/thread.h
index 2726e91..97c47e1 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -852,6 +852,22 @@
     tls32_.weak_ref_access_enabled = enabled;
   }
 
+  uint32_t GetDisableThreadFlipCount() const {
+    CHECK(kUseReadBarrier);
+    return tls32_.disable_thread_flip_count;
+  }
+
+  void IncrementDisableThreadFlipCount() {
+    CHECK(kUseReadBarrier);
+    ++tls32_.disable_thread_flip_count;
+  }
+
+  void DecrementDisableThreadFlipCount() {
+    CHECK(kUseReadBarrier);
+    DCHECK_GT(tls32_.disable_thread_flip_count, 0U);
+    --tls32_.disable_thread_flip_count;
+  }
+
   // Activates single step control for debugging. The thread takes the
   // ownership of the given SingleStepControl*. It is deleted by a call
   // to DeactivateSingleStepControl or upon thread destruction.
@@ -1214,7 +1230,8 @@
       daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0),
       thread_exit_check_count(0), handling_signal_(false),
       suspended_at_suspend_check(false), ready_for_debug_invoke(false),
-      debug_method_entry_(false), is_gc_marking(false), weak_ref_access_enabled(true) {
+      debug_method_entry_(false), is_gc_marking(false), weak_ref_access_enabled(true),
+      disable_thread_flip_count(0) {
     }
 
     union StateAndFlags state_and_flags;
@@ -1281,6 +1298,11 @@
     // pause, this is not an issue.) Other collectors use Runtime::DisallowNewSystemWeaks() and
     // ReferenceProcessor::EnableSlowPath().
     bool32_t weak_ref_access_enabled;
+
+    // A thread local version of Heap::disable_thread_flip_count_. This keeps track of how many
+    // levels of (nested) JNI critical sections the thread is in and is used to detect a nested JNI
+    // critical section enter.
+    uint32_t disable_thread_flip_count;
   } tls32_;
 
   struct PACKED(8) tls_64bit_sized_values {
diff --git a/test/004-JniTest/jni_test.cc b/test/004-JniTest/jni_test.cc
index be7888b..7045482 100644
--- a/test/004-JniTest/jni_test.cc
+++ b/test/004-JniTest/jni_test.cc
@@ -639,3 +639,23 @@
 extern "C" JNIEXPORT jlong JNICALL Java_Main_testGetMethodID(JNIEnv* env, jclass, jclass c) {
   return reinterpret_cast<jlong>(env->GetMethodID(c, "a", "()V"));
 }
+
+extern "C" JNIEXPORT void JNICALL Java_Main_enterJniCriticalSection(JNIEnv* env, jclass,
+                                                                    jint arraySize,
+                                                                    jbyteArray array0,
+                                                                    jbyteArray array1) {
+  for (int i = 0; i < 50000; ++i) {
+    char* data0 = reinterpret_cast<char*>(env->GetPrimitiveArrayCritical(array0, nullptr));
+    char* data1 = reinterpret_cast<char*>(env->GetPrimitiveArrayCritical(array1, nullptr));
+    bool up = i % 2 == 0;
+    for (int j = 0; j < arraySize; ++j) {
+      if (up) {
+        data1[j] = data0[j] + 1;
+      } else {
+        data0[j] = data1[j] + 1;
+      }
+    }
+    env->ReleasePrimitiveArrayCritical(array1, data1, 0);
+    env->ReleasePrimitiveArrayCritical(array0, data0, 0);
+  }
+}
diff --git a/test/004-JniTest/src/Main.java b/test/004-JniTest/src/Main.java
index ee3a3b9..5c39ede 100644
--- a/test/004-JniTest/src/Main.java
+++ b/test/004-JniTest/src/Main.java
@@ -38,6 +38,7 @@
         testNewStringObject();
         testRemoveLocalObject();
         testProxyGetMethodID();
+        testJniCriticalSectionAndGc();
     }
 
     private static native void testFindClassOnAttachedNativeThread();
@@ -222,6 +223,35 @@
     }
 
     private static native long testGetMethodID(Class<?> c);
+
+    // Exercise GC and JNI critical sections in parallel.
+    private static void testJniCriticalSectionAndGc() {
+        Thread runGcThread = new Thread(new Runnable() {
+            @Override
+            public void run() {
+                for (int i = 0; i < 10; ++i) {
+                    Runtime.getRuntime().gc();
+                }
+            }
+        });
+        Thread jniCriticalThread = new Thread(new Runnable() {
+            @Override
+            public void run() {
+                final int arraySize = 32;
+                byte[] array0 = new byte[arraySize];
+                byte[] array1 = new byte[arraySize];
+                enterJniCriticalSection(arraySize, array0, array1);
+            }
+        });
+        jniCriticalThread.start();
+        runGcThread.start();
+        try {
+            jniCriticalThread.join();
+            runGcThread.join();
+        } catch (InterruptedException ignored) {}
+    }
+
+    private static native void enterJniCriticalSection(int arraySize, byte[] array0, byte[] array);
 }
 
 class JniCallNonvirtualTest {
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index af25d9b..e5c9dba 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -804,6 +804,7 @@
     Assert.assertEquals(Math.round(-2.9d), -3l);
     Assert.assertEquals(Math.round(-3.0d), -3l);
     Assert.assertEquals(Math.round(0.49999999999999994d), 0l);
+    Assert.assertEquals(Math.round(9007199254740991.0d), 9007199254740991l);  // 2^53 - 1
     Assert.assertEquals(Math.round(Double.NaN), (long)+0.0d);
     Assert.assertEquals(Math.round(Long.MAX_VALUE + 1.0d), Long.MAX_VALUE);
     Assert.assertEquals(Math.round(Long.MIN_VALUE - 1.0d), Long.MIN_VALUE);
@@ -825,6 +826,7 @@
     Assert.assertEquals(Math.round(-2.5f), -2);
     Assert.assertEquals(Math.round(-2.9f), -3);
     Assert.assertEquals(Math.round(-3.0f), -3);
+    Assert.assertEquals(Math.round(16777215.0f), 16777215);  // 2^24 - 1
     Assert.assertEquals(Math.round(Float.NaN), (int)+0.0f);
     Assert.assertEquals(Math.round(Integer.MAX_VALUE + 1.0f), Integer.MAX_VALUE);
     Assert.assertEquals(Math.round(Integer.MIN_VALUE - 1.0f), Integer.MIN_VALUE);
diff --git a/test/130-hprof/src-ex/Allocator.java b/test/130-hprof/src-ex/Allocator.java
new file mode 100644
index 0000000..ee75a14
--- /dev/null
+++ b/test/130-hprof/src-ex/Allocator.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Simple allocator that returns a boot class path object.
+public class Allocator {
+    public static Object allocObject() {
+        return new Object();
+    }
+}
diff --git a/test/130-hprof/src/Main.java b/test/130-hprof/src/Main.java
index 67e5232..9868c61 100644
--- a/test/130-hprof/src/Main.java
+++ b/test/130-hprof/src/Main.java
@@ -16,6 +16,7 @@
 
 import java.io.File;
 import java.lang.ref.WeakReference;
+import java.lang.reflect.Constructor;
 import java.lang.reflect.Method;
 import java.lang.reflect.InvocationTargetException;
 
@@ -34,24 +35,21 @@
         }
     }
 
-    public static void main(String[] args) {
-        // Create some data.
-        Object data[] = new Object[TEST_LENGTH];
-        for (int i = 0; i < data.length; i++) {
-            if (makeArray(i)) {
-                data[i] = new Object[TEST_LENGTH];
-            } else {
-                data[i] = String.valueOf(i);
-            }
+    private static Object allocInDifferentLoader() throws Exception {
+        final String DEX_FILE = System.getenv("DEX_LOCATION") + "/130-hprof-ex.jar";
+        Class pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
+        if (pathClassLoader == null) {
+            throw new AssertionError("Couldn't find path class loader class");
         }
-        for (int i = 0; i < data.length; i++) {
-            if (makeArray(i)) {
-                Object data2[] = (Object[]) data[i];
-                fillArray(data, data2, i);
-            }
-        }
-        System.out.println("Generated data.");
+        Constructor constructor =
+            pathClassLoader.getDeclaredConstructor(String.class, ClassLoader.class);
+        ClassLoader loader = (ClassLoader)constructor.newInstance(
+                DEX_FILE, ClassLoader.getSystemClassLoader());
+        Class allocator = loader.loadClass("Allocator");
+        return allocator.getDeclaredMethod("allocObject", null).invoke(null);
+    }
 
+    private static void createDumpAndConv() throws RuntimeException {
         File dumpFile = null;
         File convFile = null;
 
@@ -88,6 +86,43 @@
         }
     }
 
+    public static void main(String[] args) throws Exception {
+        // Create some data.
+        Object data[] = new Object[TEST_LENGTH];
+        for (int i = 0; i < data.length; i++) {
+            if (makeArray(i)) {
+                data[i] = new Object[TEST_LENGTH];
+            } else {
+                data[i] = String.valueOf(i);
+            }
+        }
+        for (int i = 0; i < data.length; i++) {
+            if (makeArray(i)) {
+                Object data2[] = (Object[]) data[i];
+                fillArray(data, data2, i);
+            }
+        }
+        System.out.println("Generated data.");
+
+        createDumpAndConv();
+        Class klass = Class.forName("org.apache.harmony.dalvik.ddmc.DdmVmInternal");
+        if (klass == null) {
+            throw new AssertionError("Couldn't find path class loader class");
+        }
+        Method enableMethod = klass.getDeclaredMethod("enableRecentAllocations",
+                Boolean.TYPE);
+        if (enableMethod == null) {
+            throw new AssertionError("Couldn't find path class loader class");
+        }
+        enableMethod.invoke(null, true);
+        Object o = allocInDifferentLoader();
+        // Run GC to cause class unloading.
+        Runtime.getRuntime().gc();
+        createDumpAndConv();
+        // TODO: Somehow check contents of hprof file.
+        enableMethod.invoke(null, false);
+    }
+
     private static File getHprofConf() {
         // Use the java.library.path. It points to the lib directory.
         File libDir = new File(System.getProperty("java.library.path"));
diff --git a/test/576-polymorphic-inlining/expected.txt b/test/576-polymorphic-inlining/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/576-polymorphic-inlining/expected.txt
diff --git a/test/576-polymorphic-inlining/info.txt b/test/576-polymorphic-inlining/info.txt
new file mode 100644
index 0000000..b3ef0c8
--- /dev/null
+++ b/test/576-polymorphic-inlining/info.txt
@@ -0,0 +1 @@
+Test for polymorphic inlining.
diff --git a/test/576-polymorphic-inlining/src/Main.java b/test/576-polymorphic-inlining/src/Main.java
new file mode 100644
index 0000000..d8d09af
--- /dev/null
+++ b/test/576-polymorphic-inlining/src/Main.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    for (int i = 0; i < 20000; ++i) {
+      $noinline$testVoid(new Main());
+      $noinline$testVoid(new SubMain());
+      $noinline$testVoid(new SubSubMain());
+
+      $noinline$testWithReturnValue(new Main());
+      $noinline$testWithReturnValue(new SubMain());
+      $noinline$testWithReturnValue(new SubSubMain());
+
+      $noinline$testWithBackEdge(new Main());
+      $noinline$testWithBackEdge(new SubMain());
+      $noinline$testWithBackEdge(new SubSubMain());
+    }
+  }
+
+  public static void assertIdentical(Object expected, Object actual) {
+    if (expected != actual) {
+      throw new Error("Expected " + expected + ", got " + actual);
+    }
+  }
+
+  public static void $noinline$testVoid(Main m) {
+    if (doThrow) throw new Error("");
+    m.willInlineVoid();
+    m.willOnlyInlineForMainVoid();
+  }
+
+  public static void $noinline$testWithReturnValue(Main m) {
+    if (doThrow) throw new Error("");
+    assertIdentical(m.getClass(), m.willInlineWithReturnValue());
+    assertIdentical(m.getClass(), m.willOnlyInlineForMainWithReturnValue());
+  }
+
+  public static void $noinline$testWithBackEdge(Main m) {
+    if (doThrow) throw new Error("");
+    for (int i = 0; i < 10; ++i) {
+      m.willInlineVoid();
+    }
+    for (int i = 0; i < 10; ++i) {
+      m.willOnlyInlineForMainVoid();
+    }
+  }
+
+  public void willInlineVoid() {
+  }
+
+  public void willOnlyInlineForMainVoid() {
+  }
+
+  public Class willInlineWithReturnValue() {
+    return Main.class;
+  }
+
+  public Class willOnlyInlineForMainWithReturnValue() {
+    return Main.class;
+  }
+  public static boolean doThrow;
+}
+
+class SubMain extends Main {
+  public void willOnlyInlineForMainVoid() {
+    if (doThrow) throw new Error("");
+  }
+
+  public void willInlineVoid() {
+  }
+
+  public Class willInlineWithReturnValue() {
+    return SubMain.class;
+  }
+
+  public Class willOnlyInlineForMainWithReturnValue() {
+    return SubMain.class;
+  }
+}
+
+class SubSubMain extends SubMain {
+  public Class willInlineWithReturnValue() {
+    return SubSubMain.class;
+  }
+
+  public Class willOnlyInlineForMainWithReturnValue() {
+    return SubSubMain.class;
+  }
+}
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 44206df..e6394a9 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -272,5 +272,10 @@
           "libcore.util.NativeAllocationRegistryTest#testNativeAllocationNoAllocatorAndNoSharedRegistry",
           "libcore.util.NativeAllocationRegistryTest#testNativeAllocationNoAllocatorAndSharedRegistry",
           "libcore.util.NativeAllocationRegistryTest#testNullArguments"]
+},
+{
+  description: "Only work with --mode=activity",
+  result: EXEC_FAILED,
+  names: [ "libcore.java.io.FileTest#testJavaIoTmpdirMutable" ]
 }
 ]
diff --git a/tools/libcore_failures_concurrent_collector.txt b/tools/libcore_failures_concurrent_collector.txt
index d8ef9ba..f347429 100644
--- a/tools/libcore_failures_concurrent_collector.txt
+++ b/tools/libcore_failures_concurrent_collector.txt
@@ -29,6 +29,7 @@
   modes: [host],
   names: ["libcore.java.util.zip.DeflaterOutputStreamTest#testSyncFlushEnabled",
           "libcore.java.util.zip.DeflaterOutputStreamTest#testSyncFlushDisabled",
+          "libcore.java.util.zip.GZIPInputStreamTest#testLongMessage",
           "libcore.java.util.zip.GZIPOutputStreamTest#testSyncFlushEnabled",
           "libcore.java.util.zip.OldAndroidGZIPStreamTest#testGZIPStream",
           "libcore.java.util.zip.OldAndroidZipStreamTest#testZipStream",