Merge "Don't reset to a level above warmup threshold."
diff --git a/build/Android.bp b/build/Android.bp
index c54f436..ed6de35 100644
--- a/build/Android.bp
+++ b/build/Android.bp
@@ -59,10 +59,8 @@
         "-Wunreachable-code-break",
         "-Wunreachable-code-return",
 
-        // Bug: http://b/29823425  Disable -Wconstant-conversion and
-        // -Wundefined-var-template for Clang update to r271374
+        // Bug: http://b/29823425  Disable -Wconstant-conversion for Clang update to r271374
         "-Wno-constant-conversion",
-        "-Wno-undefined-var-template",
 
         // Enable thread annotations for std::mutex, etc.
         "-D_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS",
@@ -145,6 +143,15 @@
 
     tidy_checks: [
         "-google-default-arguments",
+        // We have local stores that are only used for debug checks.
+        "-clang-analyzer-deadcode.DeadStores",
+        // We are OK with some static globals and that they can, in theory, throw.
+        "-cert-err58-cpp",
+        // We have lots of C-style variadic functions, and are OK with them. JNI ensures
+        // that working around this warning would be extra-painful.
+        "-cert-dcl50-cpp",
+        // No exceptions.
+        "-misc-noexcept-move-constructor",
     ],
 }
 
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index b87cb61..04ceca0 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -110,12 +110,12 @@
   // This method should only be called for classes verified at compile time,
   // which have no verifier error, nor has methods that we know will throw
   // at runtime.
-  atomic_verified_methods_.Insert(
-      ref,
-      /*expected*/ nullptr,
-      new VerifiedMethod(/* encountered_error_types */ 0, /* has_runtime_throw */ false));
-  // We don't check the result of `Insert` as we could insert twice for the same
-  // MethodReference in the presence of duplicate methods.
+  std::unique_ptr<VerifiedMethod> verified_method = std::make_unique<VerifiedMethod>(
+      /* encountered_error_types */ 0, /* has_runtime_throw */ false);
+  if (atomic_verified_methods_.Insert(ref, /*expected*/ nullptr, verified_method.get()) ==
+          AtomicMap::InsertResult::kInsertResultSuccess) {
+    verified_method.release();
+  }
 }
 
 void VerificationResults::AddRejectedClass(ClassReference ref) {
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 29413d9..0d0769f 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -2241,7 +2241,7 @@
  public:
   explicit InitializeClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {}
 
-  void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE {
+  void Visit(size_t class_def_index) OVERRIDE {
     ATRACE_CALL();
     jobject jclass_loader = manager_->GetClassLoader();
     const DexFile& dex_file = *manager_->GetDexFile();
@@ -2256,89 +2256,123 @@
     Handle<mirror::Class> klass(
         hs.NewHandle(manager_->GetClassLinker()->FindClass(soa.Self(), descriptor, class_loader)));
 
-    if (klass != nullptr && !SkipClass(jclass_loader, dex_file, klass.Get())) {
-      // Only try to initialize classes that were successfully verified.
-      if (klass->IsVerified()) {
-        // Attempt to initialize the class but bail if we either need to initialize the super-class
-        // or static fields.
-        manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, false);
-        if (!klass->IsInitialized()) {
-          // We don't want non-trivial class initialization occurring on multiple threads due to
-          // deadlock problems. For example, a parent class is initialized (holding its lock) that
-          // refers to a sub-class in its static/class initializer causing it to try to acquire the
-          // sub-class' lock. While on a second thread the sub-class is initialized (holding its lock)
-          // after first initializing its parents, whose locks are acquired. This leads to a
-          // parent-to-child and a child-to-parent lock ordering and consequent potential deadlock.
-          // We need to use an ObjectLock due to potential suspension in the interpreting code. Rather
-          // than use a special Object for the purpose we use the Class of java.lang.Class.
-          Handle<mirror::Class> h_klass(hs.NewHandle(klass->GetClass()));
-          ObjectLock<mirror::Class> lock(soa.Self(), h_klass);
-          // Attempt to initialize allowing initialization of parent classes but still not static
-          // fields.
+    if (klass != nullptr && !SkipClass(manager_->GetClassLoader(), dex_file, klass.Get())) {
+      TryInitializeClass(klass, class_loader);
+    }
+    // Clear any class not found or verification exceptions.
+    soa.Self()->ClearException();
+  }
+
+  // A helper function for initializing klass.
+  void TryInitializeClass(Handle<mirror::Class> klass, Handle<mirror::ClassLoader>& class_loader)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    const DexFile& dex_file = klass->GetDexFile();
+    const DexFile::ClassDef* class_def = klass->GetClassDef();
+    const DexFile::TypeId& class_type_id = dex_file.GetTypeId(class_def->class_idx_);
+    const char* descriptor = dex_file.StringDataByIdx(class_type_id.descriptor_idx_);
+    ScopedObjectAccessUnchecked soa(Thread::Current());
+    StackHandleScope<3> hs(soa.Self());
+
+    mirror::Class::Status old_status = klass->GetStatus();;
+    // Only try to initialize classes that were successfully verified.
+    if (klass->IsVerified()) {
+      // Attempt to initialize the class but bail if we either need to initialize the super-class
+      // or static fields.
+      manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, false);
+      old_status = klass->GetStatus();
+      if (!klass->IsInitialized()) {
+        // We don't want non-trivial class initialization occurring on multiple threads due to
+        // deadlock problems. For example, a parent class is initialized (holding its lock) that
+        // refers to a sub-class in its static/class initializer causing it to try to acquire the
+        // sub-class' lock. While on a second thread the sub-class is initialized (holding its lock)
+        // after first initializing its parents, whose locks are acquired. This leads to a
+        // parent-to-child and a child-to-parent lock ordering and consequent potential deadlock.
+        // We need to use an ObjectLock due to potential suspension in the interpreting code. Rather
+        // than use a special Object for the purpose we use the Class of java.lang.Class.
+        Handle<mirror::Class> h_klass(hs.NewHandle(klass->GetClass()));
+        ObjectLock<mirror::Class> lock(soa.Self(), h_klass);
+        // Attempt to initialize allowing initialization of parent classes but still not static
+        // fields.
+        bool is_superclass_initialized = InitializeDependencies(klass, class_loader, soa.Self());
+        if (is_superclass_initialized) {
           manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, true);
-          if (!klass->IsInitialized()) {
+        }
+        old_status = klass->GetStatus();
+        // If superclass cannot be initialized, no need to proceed.
+        if (!klass->IsInitialized() &&
+            is_superclass_initialized &&
+            manager_->GetCompiler()->IsImageClass(descriptor)) {
+          bool can_init_static_fields = false;
+          if (manager_->GetCompiler()->GetCompilerOptions().IsBootImage()) {
             // We need to initialize static fields, we only do this for image classes that aren't
             // marked with the $NoPreloadHolder (which implies this should not be initialized early).
-            bool can_init_static_fields =
-                manager_->GetCompiler()->GetCompilerOptions().IsBootImage() &&
-                manager_->GetCompiler()->IsImageClass(descriptor) &&
-                !StringPiece(descriptor).ends_with("$NoPreloadHolder;");
-            if (can_init_static_fields) {
-              VLOG(compiler) << "Initializing: " << descriptor;
-              // TODO multithreading support. We should ensure the current compilation thread has
-              // exclusive access to the runtime and the transaction. To achieve this, we could use
-              // a ReaderWriterMutex but we're holding the mutator lock so we fail mutex sanity
-              // checks in Thread::AssertThreadSuspensionIsAllowable.
-              Runtime* const runtime = Runtime::Current();
-              Transaction transaction;
+            can_init_static_fields = !StringPiece(descriptor).ends_with("$NoPreloadHolder;");
+          } else {
+            can_init_static_fields = manager_->GetCompiler()->GetCompilerOptions().IsAppImage() &&
+                !soa.Self()->IsExceptionPending() &&
+                NoClinitInDependency(klass, soa.Self(), &class_loader);
+            // TODO The checking for clinit can be removed since it's already
+            // checked when init superclass. Currently keep it because it contains
+            // processing of intern strings. Will be removed later when intern strings
+            // and clinit are both initialized.
+          }
 
-              // Run the class initializer in transaction mode.
-              runtime->EnterTransactionMode(&transaction);
-              const mirror::Class::Status old_status = klass->GetStatus();
-              bool success = manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, true,
-                                                                           true);
-              // TODO we detach transaction from runtime to indicate we quit the transactional
-              // mode which prevents the GC from visiting objects modified during the transaction.
-              // Ensure GC is not run so don't access freed objects when aborting transaction.
+          if (can_init_static_fields) {
+            VLOG(compiler) << "Initializing: " << descriptor;
+            // TODO multithreading support. We should ensure the current compilation thread has
+            // exclusive access to the runtime and the transaction. To achieve this, we could use
+            // a ReaderWriterMutex but we're holding the mutator lock so we fail mutex sanity
+            // checks in Thread::AssertThreadSuspensionIsAllowable.
+            Runtime* const runtime = Runtime::Current();
+            Transaction transaction;
 
-              {
-                ScopedAssertNoThreadSuspension ants("Transaction end");
-                runtime->ExitTransactionMode();
+            // Run the class initializer in transaction mode.
+            runtime->EnterTransactionMode(&transaction);
+            bool success = manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, true,
+                                                                         true);
+            // TODO we detach transaction from runtime to indicate we quit the transactional
+            // mode which prevents the GC from visiting objects modified during the transaction.
+            // Ensure GC is not run so don't access freed objects when aborting transaction.
 
-                if (!success) {
-                  CHECK(soa.Self()->IsExceptionPending());
-                  mirror::Throwable* exception = soa.Self()->GetException();
-                  VLOG(compiler) << "Initialization of " << descriptor << " aborted because of "
-                      << exception->Dump();
-                  std::ostream* file_log = manager_->GetCompiler()->
-                      GetCompilerOptions().GetInitFailureOutput();
-                  if (file_log != nullptr) {
-                    *file_log << descriptor << "\n";
-                    *file_log << exception->Dump() << "\n";
-                  }
-                  soa.Self()->ClearException();
-                  transaction.Rollback();
-                  CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored";
-                }
-              }
+            {
+              ScopedAssertNoThreadSuspension ants("Transaction end");
+              runtime->ExitTransactionMode();
 
               if (!success) {
-                // On failure, still intern strings of static fields and seen in <clinit>, as these
-                // will be created in the zygote. This is separated from the transaction code just
-                // above as we will allocate strings, so must be allowed to suspend.
+                CHECK(soa.Self()->IsExceptionPending());
+                mirror::Throwable* exception = soa.Self()->GetException();
+                VLOG(compiler) << "Initialization of " << descriptor << " aborted because of "
+                               << exception->Dump();
+                std::ostream* file_log = manager_->GetCompiler()->
+                    GetCompilerOptions().GetInitFailureOutput();
+                if (file_log != nullptr) {
+                  *file_log << descriptor << "\n";
+                  *file_log << exception->Dump() << "\n";
+                }
+                soa.Self()->ClearException();
+                transaction.Rollback();
+                CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored";
+              }
+            }
+
+            if (!success) {
+              // On failure, still intern strings of static fields and seen in <clinit>, as these
+              // will be created in the zygote. This is separated from the transaction code just
+              // above as we will allocate strings, so must be allowed to suspend.
+              if (&klass->GetDexFile() == manager_->GetDexFile()) {
                 InternStrings(klass, class_loader);
               }
             }
           }
-          soa.Self()->AssertNoPendingException();
         }
+        soa.Self()->AssertNoPendingException();
       }
-      // Record the final class status if necessary.
-      ClassReference ref(manager_->GetDexFile(), class_def_index);
-      manager_->GetCompiler()->RecordClassStatus(ref, klass->GetStatus());
     }
-    // Clear any class not found or verification exceptions.
-    soa.Self()->ClearException();
+    // Record the final class status if necessary.
+    ClassReference ref(&dex_file, klass->GetDexClassDefIndex());
+    // Back up the status before doing initialization for static encoded fields,
+    // because the static encoded branch wants to keep the status to uninitialized.
+    manager_->GetCompiler()->RecordClassStatus(ref, old_status);
   }
 
  private:
@@ -2393,6 +2427,162 @@
     }
   }
 
+  bool NoPotentialInternStrings(Handle<mirror::Class> klass,
+                                Handle<mirror::ClassLoader>* class_loader)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    StackHandleScope<1> hs(Thread::Current());
+    Handle<mirror::DexCache> h_dex_cache = hs.NewHandle(klass->GetDexCache());
+    const DexFile* dex_file = h_dex_cache->GetDexFile();
+    const DexFile::ClassDef* class_def = klass->GetClassDef();
+    annotations::RuntimeEncodedStaticFieldValueIterator value_it(*dex_file,
+                                                                 &h_dex_cache,
+                                                                 class_loader,
+                                                                 manager_->GetClassLinker(),
+                                                                 *class_def);
+
+    const auto jString = annotations::RuntimeEncodedStaticFieldValueIterator::kString;
+    for ( ; value_it.HasNext(); value_it.Next()) {
+      if (value_it.GetValueType() == jString) {
+        // We don't want cache the static encoded strings which is a potential intern.
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  bool ResolveTypesOfMethods(Thread* self, ArtMethod* m)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+      auto rtn_type = m->GetReturnType(true);  // return value is discarded because resolve will be done internally.
+      if (rtn_type == nullptr) {
+        self->ClearException();
+        return false;
+      }
+      const DexFile::TypeList* types = m->GetParameterTypeList();
+      if (types != nullptr) {
+        for (uint32_t i = 0; i < types->Size(); ++i) {
+          dex::TypeIndex param_type_idx = types->GetTypeItem(i).type_idx_;
+          auto param_type = m->GetClassFromTypeIndex(param_type_idx, true);
+          if (param_type == nullptr) {
+            self->ClearException();
+            return false;
+          }
+        }
+      }
+      return true;
+  }
+
+  // Pre resolve types mentioned in all method signatures before start a transaction
+  // since ResolveType doesn't work in transaction mode.
+  bool PreResolveTypes(Thread* self, const Handle<mirror::Class>& klass)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+      PointerSize pointer_size = manager_->GetClassLinker()->GetImagePointerSize();
+      for (ArtMethod& m : klass->GetMethods(pointer_size)) {
+        if (!ResolveTypesOfMethods(self, &m)) {
+          return false;
+        }
+      }
+      if (klass->IsInterface()) {
+        return true;
+      } else if (klass->HasSuperClass()) {
+        StackHandleScope<1> hs(self);
+        MutableHandle<mirror::Class> super_klass(hs.NewHandle<mirror::Class>(klass->GetSuperClass()));
+        for (int i = super_klass->GetVTableLength() - 1; i >= 0; --i) {
+          ArtMethod* m = klass->GetVTableEntry(i, pointer_size);
+          ArtMethod* super_m = super_klass->GetVTableEntry(i, pointer_size);
+          if (!ResolveTypesOfMethods(self, m) || !ResolveTypesOfMethods(self, super_m)) {
+            return false;
+          }
+        }
+        for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
+          super_klass.Assign(klass->GetIfTable()->GetInterface(i));
+          if (klass->GetClassLoader() != super_klass->GetClassLoader()) {
+            uint32_t num_methods = super_klass->NumVirtualMethods();
+            for (uint32_t j = 0; j < num_methods; ++j) {
+              ArtMethod* m = klass->GetIfTable()->GetMethodArray(i)->GetElementPtrSize<ArtMethod*>(
+                  j, pointer_size);
+              ArtMethod* super_m = super_klass->GetVirtualMethod(j, pointer_size);
+              if (!ResolveTypesOfMethods(self, m) || !ResolveTypesOfMethods(self, super_m)) {
+                return false;
+              }
+            }
+          }
+        }
+      }
+      return true;
+  }
+
+  // Initialize the klass's dependencies recursively before initializing itself.
+  // Checking for interfaces is also necessary since interfaces can contain
+  // both default methods and static encoded fields.
+  bool InitializeDependencies(const Handle<mirror::Class>& klass,
+                              Handle<mirror::ClassLoader> class_loader,
+                              Thread* self)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (klass->HasSuperClass()) {
+      ObjPtr<mirror::Class> super_class = klass->GetSuperClass();
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> handle_scope_super(hs.NewHandle(super_class));
+      if (!handle_scope_super->IsInitialized()) {
+        this->TryInitializeClass(handle_scope_super, class_loader);
+        if (!handle_scope_super->IsInitialized()) {
+          return false;
+        }
+      }
+    }
+
+    uint32_t num_if = klass->NumDirectInterfaces();
+    for (size_t i = 0; i < num_if; i++) {
+      ObjPtr<mirror::Class>
+          interface = mirror::Class::GetDirectInterface(self, klass.Get(), i);
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> handle_interface(hs.NewHandle(interface));
+
+      TryInitializeClass(handle_interface, class_loader);
+
+      if (!handle_interface->IsInitialized()) {
+        return false;
+      }
+    }
+
+    return PreResolveTypes(self, klass);
+  }
+
+  // In this phase the classes containing class initializers are ignored. Make sure no
+  // clinit appears in kalss's super class chain and interfaces.
+  bool NoClinitInDependency(const Handle<mirror::Class>& klass,
+                            Thread* self,
+                            Handle<mirror::ClassLoader>* class_loader)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    ArtMethod* clinit =
+        klass->FindClassInitializer(manager_->GetClassLinker()->GetImagePointerSize());
+    if (clinit != nullptr) {
+      VLOG(compiler) << klass->PrettyClass() << ' ' << clinit->PrettyMethod(true);
+      return false;
+    }
+    if (klass->HasSuperClass()) {
+      ObjPtr<mirror::Class> super_class = klass->GetSuperClass();
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> handle_scope_super(hs.NewHandle(super_class));
+      if (!NoClinitInDependency(handle_scope_super, self, class_loader)) {
+        return false;
+      }
+    }
+
+    uint32_t num_if = klass->NumDirectInterfaces();
+    for (size_t i = 0; i < num_if; i++) {
+      ObjPtr<mirror::Class>
+          interface = mirror::Class::GetDirectInterface(self, klass.Get(), i);
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> handle_interface(hs.NewHandle(interface));
+      if (!NoClinitInDependency(handle_interface, self, class_loader)) {
+        return false;
+      }
+    }
+
+    return NoPotentialInternStrings(klass, class_loader);
+  }
+
   const ParallelCompilationManager* const manager_;
 };
 
@@ -2412,7 +2602,10 @@
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, dex_files,
                                      init_thread_pool);
-  if (GetCompilerOptions().IsBootImage()) {
+
+  if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsAppImage()) {
+    // Set the concurrency thread to 1 to support initialization for App Images since transaction
+    // doesn't support multithreading now.
     // TODO: remove this when transactional mode supports multithreading.
     init_thread_count = 1U;
   }
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 63b7cad..5d6dd2e 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -22,7 +22,6 @@
 
 #include "base/casts.h"
 #include "base/logging.h"
-#include "base/stl_util.h"
 #include "compiled_method.h"
 #include "debug/elf_debug_writer.h"
 #include "debug/method_debug_info.h"
@@ -137,15 +136,15 @@
                                                 const CompilerOptions* compiler_options,
                                                 File* elf_file) {
   if (Is64BitInstructionSet(instruction_set)) {
-    return MakeUnique<ElfWriterQuick<ElfTypes64>>(instruction_set,
-                                                  features,
-                                                  compiler_options,
-                                                  elf_file);
+    return std::make_unique<ElfWriterQuick<ElfTypes64>>(instruction_set,
+                                                        features,
+                                                        compiler_options,
+                                                        elf_file);
   } else {
-    return MakeUnique<ElfWriterQuick<ElfTypes32>>(instruction_set,
-                                                  features,
-                                                  compiler_options,
-                                                  elf_file);
+    return std::make_unique<ElfWriterQuick<ElfTypes32>>(instruction_set,
+                                                        features,
+                                                        compiler_options,
+                                                        elf_file);
   }
 }
 
@@ -161,7 +160,8 @@
       rodata_size_(0u),
       text_size_(0u),
       bss_size_(0u),
-      output_stream_(MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file))),
+      output_stream_(
+          std::make_unique<BufferedOutputStream>(std::make_unique<FileOutputStream>(elf_file))),
       builder_(new ElfBuilder<ElfTypes>(instruction_set, features, output_stream_.get())) {}
 
 template <typename ElfTypes>
diff --git a/compiler/image_test.h b/compiler/image_test.h
index 394b7f1..3d89757 100644
--- a/compiler/image_test.h
+++ b/compiler/image_test.h
@@ -290,9 +290,9 @@
 
       if (kIsVdexEnabled) {
         for (size_t i = 0, size = vdex_files.size(); i != size; ++i) {
-          std::unique_ptr<BufferedOutputStream> vdex_out(
-              MakeUnique<BufferedOutputStream>(
-                  MakeUnique<FileOutputStream>(vdex_files[i].GetFile())));
+          std::unique_ptr<BufferedOutputStream> vdex_out =
+              std::make_unique<BufferedOutputStream>(
+                  std::make_unique<FileOutputStream>(vdex_files[i].GetFile()));
           oat_writers[i]->WriteVerifierDeps(vdex_out.get(), nullptr);
           oat_writers[i]->WriteChecksumsAndVdexHeader(vdex_out.get());
         }
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 16c9f10..406892e 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -49,7 +49,6 @@
 #include "globals.h"
 #include "image.h"
 #include "imt_conflict_table.h"
-#include "intern_table.h"
 #include "jni_internal.h"
 #include "linear_alloc.h"
 #include "lock_word.h"
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 3db4fab..5e2db7d 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -36,6 +36,7 @@
 #include "class_table.h"
 #include "driver/compiler_driver.h"
 #include "image.h"
+#include "intern_table.h"
 #include "lock_word.h"
 #include "mem_map.h"
 #include "mirror/dex_cache.h"
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index c1ac230..18ff1c9 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -16,6 +16,7 @@
 
 #include "linker/arm/relative_patcher_arm_base.h"
 
+#include "base/stl_util.h"
 #include "compiled_method.h"
 #include "linker/output_stream.h"
 #include "oat.h"
diff --git a/compiler/linker/output_stream_test.cc b/compiler/linker/output_stream_test.cc
index 84c76f2..09fef29 100644
--- a/compiler/linker/output_stream_test.cc
+++ b/compiler/linker/output_stream_test.cc
@@ -19,7 +19,6 @@
 
 #include "base/unix_file/fd_file.h"
 #include "base/logging.h"
-#include "base/stl_util.h"
 #include "buffered_output_stream.h"
 #include "common_runtime_test.h"
 
@@ -79,7 +78,7 @@
 TEST_F(OutputStreamTest, Buffered) {
   ScratchFile tmp;
   {
-    BufferedOutputStream buffered_output_stream(MakeUnique<FileOutputStream>(tmp.GetFile()));
+    BufferedOutputStream buffered_output_stream(std::make_unique<FileOutputStream>(tmp.GetFile()));
     SetOutputStream(buffered_output_stream);
     GenerateTestOutput();
   }
@@ -125,7 +124,7 @@
     bool flush_called;
   };
 
-  std::unique_ptr<CheckingOutputStream> cos = MakeUnique<CheckingOutputStream>();
+  std::unique_ptr<CheckingOutputStream> cos = std::make_unique<CheckingOutputStream>();
   CheckingOutputStream* checking_output_stream = cos.get();
   BufferedOutputStream buffered(std::move(cos));
   ASSERT_FALSE(checking_output_stream->flush_called);
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index fdb21e4..55d0bd9 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -19,6 +19,7 @@
 #include "arch/instruction_set_features.h"
 #include "art_method-inl.h"
 #include "base/enums.h"
+#include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "common_compiler_test.h"
@@ -224,8 +225,8 @@
                                       oat_writer.GetBssRootsOffset());
 
     if (kIsVdexEnabled) {
-      std::unique_ptr<BufferedOutputStream> vdex_out(
-            MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(vdex_file)));
+      std::unique_ptr<BufferedOutputStream> vdex_out =
+            std::make_unique<BufferedOutputStream>(std::make_unique<FileOutputStream>(vdex_file));
       if (!oat_writer.WriteVerifierDeps(vdex_out.get(), nullptr)) {
         return false;
       }
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 581b1ee..59daf5a 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -515,8 +515,8 @@
   ChecksumUpdatingOutputStream checksum_updating_rodata(oat_rodata, oat_header_.get());
 
   if (kIsVdexEnabled) {
-    std::unique_ptr<BufferedOutputStream> vdex_out(
-        MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(vdex_file)));
+    std::unique_ptr<BufferedOutputStream> vdex_out =
+        std::make_unique<BufferedOutputStream>(std::make_unique<FileOutputStream>(vdex_file));
     // Write DEX files into VDEX, mmap and open them.
     if (!WriteDexFiles(vdex_out.get(), vdex_file, update_input_vdex) ||
         !OpenDexFiles(vdex_file, verify, &dex_files_map, &dex_files)) {
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 1364018..93234f9 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -41,6 +41,8 @@
 #include "code_generator_mips64.h"
 #endif
 
+#include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "bytecode_utils.h"
 #include "class_linker.h"
 #include "compiled_method.h"
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 804bc0f..e4efbef 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -19,6 +19,8 @@
 #include "arch/arm/asm_support_arm.h"
 #include "arch/arm/instruction_set_features_arm.h"
 #include "art_method.h"
+#include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "code_generator_utils.h"
 #include "common_arm.h"
 #include "compiled_method.h"
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 9ba38e5..34397e6 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -19,6 +19,8 @@
 #include "arch/arm64/asm_support_arm64.h"
 #include "arch/arm64/instruction_set_features_arm64.h"
 #include "art_method.h"
+#include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "code_generator_utils.h"
 #include "compiled_method.h"
 #include "entrypoints/quick/quick_entrypoints.h"
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 9cd7761..c6bd871 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -19,6 +19,8 @@
 #include "arch/arm/asm_support_arm.h"
 #include "arch/arm/instruction_set_features_arm.h"
 #include "art_method.h"
+#include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "code_generator_utils.h"
 #include "common_arm.h"
 #include "compiled_method.h"
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index af9e89e..0395db1 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -495,7 +495,60 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecMin(HVecMin* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Min_uB(dst, lhs, rhs);
+      } else {
+        __ Min_sB(dst, lhs, rhs);
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Min_uH(dst, lhs, rhs);
+      } else {
+        __ Min_sH(dst, lhs, rhs);
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Min_uW(dst, lhs, rhs);
+      } else {
+        __ Min_sW(dst, lhs, rhs);
+      }
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Min_uD(dst, lhs, rhs);
+      } else {
+        __ Min_sD(dst, lhs, rhs);
+      }
+      break;
+    // When one of arguments is NaN, fmin.df returns other argument, but Java expects a NaN value.
+    // TODO: Fix min(x, NaN) cases for float and double.
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ FminW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ FminD(dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecMax(HVecMax* instruction) {
@@ -503,7 +556,60 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecMax(HVecMax* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Max_uB(dst, lhs, rhs);
+      } else {
+        __ Max_sB(dst, lhs, rhs);
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Max_uH(dst, lhs, rhs);
+      } else {
+        __ Max_sH(dst, lhs, rhs);
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Max_uW(dst, lhs, rhs);
+      } else {
+        __ Max_sW(dst, lhs, rhs);
+      }
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Max_uD(dst, lhs, rhs);
+      } else {
+        __ Max_sD(dst, lhs, rhs);
+      }
+      break;
+    // When one of arguments is NaN, fmax.df returns other argument, but Java expects a NaN value.
+    // TODO: Fix max(x, NaN) cases for float and double.
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ FmaxW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ FmaxD(dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecAnd(HVecAnd* instruction) {
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index 88473f02..84b20f6 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -695,8 +695,8 @@
                                  /*fetch*/ nullptr,
                                  type_);
         default:
-          CHECK(false) << op;
-          break;
+          LOG(FATAL) << op;
+          UNREACHABLE();
       }
     }
   }
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index c3aa976..9c8a632 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -499,6 +499,7 @@
       body = it.Current();
     }
   }
+  CHECK(body != nullptr);
   // Ensure there is only a single exit point.
   if (header->GetSuccessors().size() != 2) {
     return;
@@ -1084,23 +1085,23 @@
         switch (type) {
           case Primitive::kPrimBoolean:
           case Primitive::kPrimByte:
-            *restrictions |= kNoDiv | kNoMinMax;
+            *restrictions |= kNoDiv;
             return TrySetVectorLength(16);
           case Primitive::kPrimChar:
           case Primitive::kPrimShort:
-            *restrictions |= kNoDiv | kNoMinMax | kNoStringCharAt;
+            *restrictions |= kNoDiv | kNoStringCharAt;
             return TrySetVectorLength(8);
           case Primitive::kPrimInt:
-            *restrictions |= kNoDiv | kNoMinMax;
+            *restrictions |= kNoDiv;
             return TrySetVectorLength(4);
           case Primitive::kPrimLong:
-            *restrictions |= kNoDiv | kNoMinMax;
+            *restrictions |= kNoDiv;
             return TrySetVectorLength(2);
           case Primitive::kPrimFloat:
-            *restrictions |= kNoMinMax;
+            *restrictions |= kNoMinMax;  // min/max(x, NaN)
             return TrySetVectorLength(4);
           case Primitive::kPrimDouble:
-            *restrictions |= kNoMinMax;
+            *restrictions |= kNoMinMax;  // min/max(x, NaN)
             return TrySetVectorLength(2);
           default:
             break;
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 0f24e81..bb23a29 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -25,7 +25,7 @@
 #include "base/bit_utils.h"
 #include "base/enums.h"
 #include "base/logging.h"
-#include "base/stl_util.h"
+#include "base/stl_util_identity.h"
 #include "base/value_object.h"
 #include "constants_arm.h"
 #include "utils/arm/assembler_arm_shared.h"
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc
index 6afc3dd..eb3f870 100644
--- a/compiler/utils/arm/assembler_arm_vixl.cc
+++ b/compiler/utils/arm/assembler_arm_vixl.cc
@@ -18,6 +18,8 @@
 #include <type_traits>
 
 #include "assembler_arm_vixl.h"
+#include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "thread.h"
 
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index a99d02d..0b05b75 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -404,6 +404,129 @@
   return encoding;
 }
 
+uint32_t MipsAssembler::EmitMsa3R(int operation,
+                                  int df,
+                                  VectorRegister wt,
+                                  VectorRegister ws,
+                                  VectorRegister wd,
+                                  int minor_opcode) {
+  CHECK_NE(wt, kNoVectorRegister);
+  CHECK_NE(ws, kNoVectorRegister);
+  CHECK_NE(wd, kNoVectorRegister);
+  uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+                      operation << kMsaOperationShift |
+                      df << kDfShift |
+                      static_cast<uint32_t>(wt) << kWtShift |
+                      static_cast<uint32_t>(ws) << kWsShift |
+                      static_cast<uint32_t>(wd) << kWdShift |
+                      minor_opcode;
+  Emit(encoding);
+  return encoding;
+}
+
+uint32_t MipsAssembler::EmitMsaBIT(int operation,
+                                   int df_m,
+                                   VectorRegister ws,
+                                   VectorRegister wd,
+                                   int minor_opcode) {
+  CHECK_NE(ws, kNoVectorRegister);
+  CHECK_NE(wd, kNoVectorRegister);
+  uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+                      operation << kMsaOperationShift |
+                      df_m << kDfMShift |
+                      static_cast<uint32_t>(ws) << kWsShift |
+                      static_cast<uint32_t>(wd) << kWdShift |
+                      minor_opcode;
+  Emit(encoding);
+  return encoding;
+}
+
+uint32_t MipsAssembler::EmitMsaELM(int operation,
+                                   int df_n,
+                                   VectorRegister ws,
+                                   VectorRegister wd,
+                                   int minor_opcode) {
+  CHECK_NE(ws, kNoVectorRegister);
+  CHECK_NE(wd, kNoVectorRegister);
+  uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+                      operation << kMsaELMOperationShift |
+                      df_n << kDfNShift |
+                      static_cast<uint32_t>(ws) << kWsShift |
+                      static_cast<uint32_t>(wd) << kWdShift |
+                      minor_opcode;
+  Emit(encoding);
+  return encoding;
+}
+
+uint32_t MipsAssembler::EmitMsaMI10(int s10,
+                                    Register rs,
+                                    VectorRegister wd,
+                                    int minor_opcode,
+                                    int df) {
+  CHECK_NE(rs, kNoRegister);
+  CHECK_NE(wd, kNoVectorRegister);
+  CHECK(IsUint<10>(s10)) << s10;
+  uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+                      s10 << kS10Shift |
+                      static_cast<uint32_t>(rs) << kWsShift |
+                      static_cast<uint32_t>(wd) << kWdShift |
+                      minor_opcode << kS10MinorShift |
+                      df;
+  Emit(encoding);
+  return encoding;
+}
+
+uint32_t MipsAssembler::EmitMsaI10(int operation,
+                                   int df,
+                                   int i10,
+                                   VectorRegister wd,
+                                   int minor_opcode) {
+  CHECK_NE(wd, kNoVectorRegister);
+  CHECK(IsUint<10>(i10)) << i10;
+  uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+                      operation << kMsaOperationShift |
+                      df << kDfShift |
+                      i10 << kI10Shift |
+                      static_cast<uint32_t>(wd) << kWdShift |
+                      minor_opcode;
+  Emit(encoding);
+  return encoding;
+}
+
+uint32_t MipsAssembler::EmitMsa2R(int operation,
+                                  int df,
+                                  VectorRegister ws,
+                                  VectorRegister wd,
+                                  int minor_opcode) {
+  CHECK_NE(ws, kNoVectorRegister);
+  CHECK_NE(wd, kNoVectorRegister);
+  uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+                      operation << kMsa2ROperationShift |
+                      df << kDf2RShift |
+                      static_cast<uint32_t>(ws) << kWsShift |
+                      static_cast<uint32_t>(wd) << kWdShift |
+                      minor_opcode;
+  Emit(encoding);
+  return encoding;
+}
+
+uint32_t MipsAssembler::EmitMsa2RF(int operation,
+                                   int df,
+                                   VectorRegister ws,
+                                   VectorRegister wd,
+                                   int minor_opcode) {
+  CHECK_NE(ws, kNoVectorRegister);
+  CHECK_NE(wd, kNoVectorRegister);
+  uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+                      operation << kMsa2RFOperationShift |
+                      df << kDf2RShift |
+                      static_cast<uint32_t>(ws) << kWsShift |
+                      static_cast<uint32_t>(wd) << kWdShift |
+                      minor_opcode;
+  Emit(encoding);
+  return encoding;
+}
+
 void MipsAssembler::Addu(Register rd, Register rs, Register rt) {
   DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x21), rd, rs, rt);
 }
@@ -635,9 +758,8 @@
   DsFsmInstrRrr(EmitR(0x1f, rt, rd, static_cast<Register>(pos + size - 1), pos, 0x04), rd, rd, rt);
 }
 
-// TODO: This instruction is available in both R6 and MSA and it should be used when available.
 void MipsAssembler::Lsa(Register rd, Register rs, Register rt, int saPlusOne) {
-  CHECK(IsR6());
+  CHECK(IsR6() || HasMsa());
   CHECK(1 <= saPlusOne && saPlusOne <= 4) << saPlusOne;
   int sa = saPlusOne - 1;
   DsFsmInstrRrr(EmitR(0x0, rs, rt, rd, sa, 0x05), rd, rs, rt);
@@ -653,7 +775,7 @@
   if (shamt == TIMES_1) {
     // Catch the special case where the shift amount is zero (0).
     Addu(dst, src_base, src_idx);
-  } else if (IsR6()) {
+  } else if (IsR6() || HasMsa()) {
     Lsa(dst, src_idx, src_base, shamt);
   } else {
     Sll(tmp, src_idx, shamt);
@@ -1709,6 +1831,1079 @@
   SetReorder(reordering);
 }
 
+void MipsAssembler::AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1e),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1e),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1e),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1e),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Add_aB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Add_aH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Add_aW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Add_aD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Ave_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Ave_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Ave_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Ave_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Ave_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Ave_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Ave_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Ave_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Aver_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Aver_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Aver_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Aver_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Aver_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Max_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x2, 0x0, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Max_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x2, 0x1, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Max_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x2, 0x2, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Max_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x2, 0x3, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Max_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x3, 0x0, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Max_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x3, 0x1, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Max_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x3, 0x2, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Max_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x3, 0x3, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Min_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x0, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Min_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Min_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Min_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Min_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Min_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Min_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Min_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x0, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x1, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x2, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x3, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FmaxW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FmaxD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FminW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FminD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Ffint_sW(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa2RF(0x19e, 0x0, ws, wd, 0x1e),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::Ffint_sD(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa2RF(0x19e, 0x1, ws, wd, 0x1e),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::Ftint_sW(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa2RF(0x19c, 0x0, ws, wd, 0x1e),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::Ftint_sD(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa2RF(0x19c, 0x1, ws, wd, 0x1e),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x2, 0x0, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x2, 0x1, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x2, 0x2, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x2, 0x3, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SlliB(VectorRegister wd, VectorRegister ws, int shamt3) {
+  CHECK(HasMsa());
+  CHECK(IsUint<3>(shamt3)) << shamt3;
+  DsFsmInstrFff(EmitMsaBIT(0x0, shamt3 | kMsaDfMByteMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SlliH(VectorRegister wd, VectorRegister ws, int shamt4) {
+  CHECK(HasMsa());
+  CHECK(IsUint<4>(shamt4)) << shamt4;
+  DsFsmInstrFff(EmitMsaBIT(0x0, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SlliW(VectorRegister wd, VectorRegister ws, int shamt5) {
+  CHECK(HasMsa());
+  CHECK(IsUint<5>(shamt5)) << shamt5;
+  DsFsmInstrFff(EmitMsaBIT(0x0, shamt5 | kMsaDfMWordMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SlliD(VectorRegister wd, VectorRegister ws, int shamt6) {
+  CHECK(HasMsa());
+  CHECK(IsUint<6>(shamt6)) << shamt6;
+  DsFsmInstrFff(EmitMsaBIT(0x0, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SraiB(VectorRegister wd, VectorRegister ws, int shamt3) {
+  CHECK(HasMsa());
+  CHECK(IsUint<3>(shamt3)) << shamt3;
+  DsFsmInstrFff(EmitMsaBIT(0x1, shamt3 | kMsaDfMByteMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SraiH(VectorRegister wd, VectorRegister ws, int shamt4) {
+  CHECK(HasMsa());
+  CHECK(IsUint<4>(shamt4)) << shamt4;
+  DsFsmInstrFff(EmitMsaBIT(0x1, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SraiW(VectorRegister wd, VectorRegister ws, int shamt5) {
+  CHECK(HasMsa());
+  CHECK(IsUint<5>(shamt5)) << shamt5;
+  DsFsmInstrFff(EmitMsaBIT(0x1, shamt5 | kMsaDfMWordMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SraiD(VectorRegister wd, VectorRegister ws, int shamt6) {
+  CHECK(HasMsa());
+  CHECK(IsUint<6>(shamt6)) << shamt6;
+  DsFsmInstrFff(EmitMsaBIT(0x1, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SrliB(VectorRegister wd, VectorRegister ws, int shamt3) {
+  CHECK(HasMsa());
+  CHECK(IsUint<3>(shamt3)) << shamt3;
+  DsFsmInstrFff(EmitMsaBIT(0x2, shamt3 | kMsaDfMByteMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SrliH(VectorRegister wd, VectorRegister ws, int shamt4) {
+  CHECK(HasMsa());
+  CHECK(IsUint<4>(shamt4)) << shamt4;
+  DsFsmInstrFff(EmitMsaBIT(0x2, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SrliW(VectorRegister wd, VectorRegister ws, int shamt5) {
+  CHECK(HasMsa());
+  CHECK(IsUint<5>(shamt5)) << shamt5;
+  DsFsmInstrFff(EmitMsaBIT(0x2, shamt5 | kMsaDfMWordMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SrliD(VectorRegister wd, VectorRegister ws, int shamt6) {
+  CHECK(HasMsa());
+  CHECK(IsUint<6>(shamt6)) << shamt6;
+  DsFsmInstrFff(EmitMsaBIT(0x2, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::MoveV(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsaBIT(0x1, 0x3e, ws, wd, 0x19),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SplatiB(VectorRegister wd, VectorRegister ws, int n4) {
+  CHECK(HasMsa());
+  CHECK(IsUint<4>(n4)) << n4;
+  DsFsmInstrFff(EmitMsaELM(0x1, n4 | kMsaDfNByteMask, ws, wd, 0x19),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SplatiH(VectorRegister wd, VectorRegister ws, int n3) {
+  CHECK(HasMsa());
+  CHECK(IsUint<3>(n3)) << n3;
+  DsFsmInstrFff(EmitMsaELM(0x1, n3 | kMsaDfNHalfwordMask, ws, wd, 0x19),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SplatiW(VectorRegister wd, VectorRegister ws, int n2) {
+  CHECK(HasMsa());
+  CHECK(IsUint<2>(n2)) << n2;
+  DsFsmInstrFff(EmitMsaELM(0x1, n2 | kMsaDfNWordMask, ws, wd, 0x19),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SplatiD(VectorRegister wd, VectorRegister ws, int n1) {
+  CHECK(HasMsa());
+  CHECK(IsUint<1>(n1)) << n1;
+  DsFsmInstrFff(EmitMsaELM(0x1, n1 | kMsaDfNDoublewordMask, ws, wd, 0x19),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::FillB(VectorRegister wd, Register rs) {
+  CHECK(HasMsa());
+  DsFsmInstrFr(EmitMsa2R(0xc0, 0x0, static_cast<VectorRegister>(rs), wd, 0x1e),
+               static_cast<FRegister>(wd),
+               rs);
+}
+
+void MipsAssembler::FillH(VectorRegister wd, Register rs) {
+  CHECK(HasMsa());
+  DsFsmInstrFr(EmitMsa2R(0xc0, 0x1, static_cast<VectorRegister>(rs), wd, 0x1e),
+               static_cast<FRegister>(wd),
+               rs);
+}
+
+void MipsAssembler::FillW(VectorRegister wd, Register rs) {
+  CHECK(HasMsa());
+  DsFsmInstrFr(EmitMsa2R(0xc0, 0x2, static_cast<VectorRegister>(rs), wd, 0x1e),
+               static_cast<FRegister>(wd),
+               rs);
+}
+
+void MipsAssembler::LdiB(VectorRegister wd, int imm8) {
+  CHECK(HasMsa());
+  CHECK(IsInt<8>(imm8)) << imm8;
+  DsFsmInstrFr(EmitMsaI10(0x6, 0x0, imm8 & kMsaS10Mask, wd, 0x7),
+               static_cast<FRegister>(wd),
+               ZERO);
+}
+
+void MipsAssembler::LdiH(VectorRegister wd, int imm10) {
+  CHECK(HasMsa());
+  CHECK(IsInt<10>(imm10)) << imm10;
+  DsFsmInstrFr(EmitMsaI10(0x6, 0x1, imm10 & kMsaS10Mask, wd, 0x7),
+               static_cast<FRegister>(wd),
+               ZERO);
+}
+
+void MipsAssembler::LdiW(VectorRegister wd, int imm10) {
+  CHECK(HasMsa());
+  CHECK(IsInt<10>(imm10)) << imm10;
+  DsFsmInstrFr(EmitMsaI10(0x6, 0x2, imm10 & kMsaS10Mask, wd, 0x7),
+               static_cast<FRegister>(wd),
+               ZERO);
+}
+
+void MipsAssembler::LdiD(VectorRegister wd, int imm10) {
+  CHECK(HasMsa());
+  CHECK(IsInt<10>(imm10)) << imm10;
+  DsFsmInstrFr(EmitMsaI10(0x6, 0x3, imm10 & kMsaS10Mask, wd, 0x7),
+               static_cast<FRegister>(wd),
+               ZERO);
+}
+
+void MipsAssembler::LdB(VectorRegister wd, Register rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<10>(offset)) << offset;
+  DsFsmInstrFr(EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x8, 0x0),
+               static_cast<FRegister>(wd),
+               rs);
+}
+
+void MipsAssembler::LdH(VectorRegister wd, Register rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<11>(offset)) << offset;
+  CHECK_ALIGNED(offset, kMipsHalfwordSize);
+  DsFsmInstrFr(EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x8, 0x1),
+               static_cast<FRegister>(wd),
+               rs);
+}
+
+void MipsAssembler::LdW(VectorRegister wd, Register rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<12>(offset)) << offset;
+  CHECK_ALIGNED(offset, kMipsWordSize);
+  DsFsmInstrFr(EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x8, 0x2),
+               static_cast<FRegister>(wd),
+               rs);
+}
+
+void MipsAssembler::LdD(VectorRegister wd, Register rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<13>(offset)) << offset;
+  CHECK_ALIGNED(offset, kMipsDoublewordSize);
+  DsFsmInstrFr(EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x8, 0x3),
+               static_cast<FRegister>(wd),
+               rs);
+}
+
+void MipsAssembler::StB(VectorRegister wd, Register rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<10>(offset)) << offset;
+  DsFsmInstrFR(EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x9, 0x0), static_cast<FRegister>(wd), rs);
+}
+
+void MipsAssembler::StH(VectorRegister wd, Register rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<11>(offset)) << offset;
+  CHECK_ALIGNED(offset, kMipsHalfwordSize);
+  DsFsmInstrFR(EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x9, 0x1),
+               static_cast<FRegister>(wd),
+               rs);
+}
+
+void MipsAssembler::StW(VectorRegister wd, Register rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<12>(offset)) << offset;
+  CHECK_ALIGNED(offset, kMipsWordSize);
+  DsFsmInstrFR(EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x9, 0x2),
+               static_cast<FRegister>(wd),
+               rs);
+}
+
+void MipsAssembler::StD(VectorRegister wd, Register rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<13>(offset)) << offset;
+  CHECK_ALIGNED(offset, kMipsDoublewordSize);
+  DsFsmInstrFR(EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x9, 0x3),
+               static_cast<FRegister>(wd),
+               rs);
+}
+
+void MipsAssembler::IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x14),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::IlvrH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x14),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x14),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x14),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
 void MipsAssembler::LoadConst32(Register rd, int32_t value) {
   if (IsUint<16>(value)) {
     // Use OR with (unsigned) immediate to encode 16b unsigned int.
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 463daeb..dd4ce6d 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -25,6 +25,7 @@
 #include "base/arena_containers.h"
 #include "base/enums.h"
 #include "base/macros.h"
+#include "base/stl_util_identity.h"
 #include "constants_mips.h"
 #include "globals.h"
 #include "managed_register_mips.h"
@@ -36,6 +37,7 @@
 namespace art {
 namespace mips {
 
+static constexpr size_t kMipsHalfwordSize = 2;
 static constexpr size_t kMipsWordSize = 4;
 static constexpr size_t kMipsDoublewordSize = 8;
 
@@ -194,6 +196,7 @@
         last_position_adjustment_(0),
         last_old_position_(0),
         last_branch_id_(0),
+        has_msa_(instruction_set_features != nullptr ? instruction_set_features->HasMsa() : false),
         isa_features_(instruction_set_features) {
     cfi().DelayEmittingAdvancePCs();
   }
@@ -464,6 +467,149 @@
   void Clear(Register rd);
   void Not(Register rd, Register rs);
 
+  // MSA instructions.
+  void AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
+  void AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Add_aB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Add_aH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Add_aW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Add_aD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
+  void FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FmaxW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FmaxD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FminW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FminD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
+  void Ffint_sW(VectorRegister wd, VectorRegister ws);
+  void Ffint_sD(VectorRegister wd, VectorRegister ws);
+  void Ftint_sW(VectorRegister wd, VectorRegister ws);
+  void Ftint_sD(VectorRegister wd, VectorRegister ws);
+
+  void SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
+  // Immediate shift instructions, where shamtN denotes shift amount (must be between 0 and 2^N-1).
+  void SlliB(VectorRegister wd, VectorRegister ws, int shamt3);
+  void SlliH(VectorRegister wd, VectorRegister ws, int shamt4);
+  void SlliW(VectorRegister wd, VectorRegister ws, int shamt5);
+  void SlliD(VectorRegister wd, VectorRegister ws, int shamt6);
+  void SraiB(VectorRegister wd, VectorRegister ws, int shamt3);
+  void SraiH(VectorRegister wd, VectorRegister ws, int shamt4);
+  void SraiW(VectorRegister wd, VectorRegister ws, int shamt5);
+  void SraiD(VectorRegister wd, VectorRegister ws, int shamt6);
+  void SrliB(VectorRegister wd, VectorRegister ws, int shamt3);
+  void SrliH(VectorRegister wd, VectorRegister ws, int shamt4);
+  void SrliW(VectorRegister wd, VectorRegister ws, int shamt5);
+  void SrliD(VectorRegister wd, VectorRegister ws, int shamt6);
+
+  void MoveV(VectorRegister wd, VectorRegister ws);
+  void SplatiB(VectorRegister wd, VectorRegister ws, int n4);
+  void SplatiH(VectorRegister wd, VectorRegister ws, int n3);
+  void SplatiW(VectorRegister wd, VectorRegister ws, int n2);
+  void SplatiD(VectorRegister wd, VectorRegister ws, int n1);
+  void FillB(VectorRegister wd, Register rs);
+  void FillH(VectorRegister wd, Register rs);
+  void FillW(VectorRegister wd, Register rs);
+
+  void LdiB(VectorRegister wd, int imm8);
+  void LdiH(VectorRegister wd, int imm10);
+  void LdiW(VectorRegister wd, int imm10);
+  void LdiD(VectorRegister wd, int imm10);
+  void LdB(VectorRegister wd, Register rs, int offset);
+  void LdH(VectorRegister wd, Register rs, int offset);
+  void LdW(VectorRegister wd, Register rs, int offset);
+  void LdD(VectorRegister wd, Register rs, int offset);
+  void StB(VectorRegister wd, Register rs, int offset);
+  void StH(VectorRegister wd, Register rs, int offset);
+  void StW(VectorRegister wd, Register rs, int offset);
+  void StD(VectorRegister wd, Register rs, int offset);
+
+  void IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvrH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
   // Higher level composite instructions.
   void LoadConst32(Register rd, int32_t value);
   void LoadConst64(Register reg_hi, Register reg_lo, int64_t value);
@@ -1282,6 +1428,30 @@
   uint32_t EmitFI(int opcode, int fmt, FRegister rt, uint16_t imm);
   void EmitBcondR2(BranchCondition cond, Register rs, Register rt, uint16_t imm16);
   void EmitBcondR6(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21);
+  uint32_t EmitMsa3R(int operation,
+                     int df,
+                     VectorRegister wt,
+                     VectorRegister ws,
+                     VectorRegister wd,
+                     int minor_opcode);
+  uint32_t EmitMsaBIT(int operation,
+                      int df_m,
+                      VectorRegister ws,
+                      VectorRegister wd,
+                      int minor_opcode);
+  uint32_t EmitMsaELM(int operation,
+                      int df_n,
+                      VectorRegister ws,
+                      VectorRegister wd,
+                      int minor_opcode);
+  uint32_t EmitMsaMI10(int s10, Register rs, VectorRegister wd, int minor_opcode, int df);
+  uint32_t EmitMsaI10(int operation, int df, int i10, VectorRegister wd, int minor_opcode);
+  uint32_t EmitMsa2R(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode);
+  uint32_t EmitMsa2RF(int operation,
+                      int df,
+                      VectorRegister ws,
+                      VectorRegister wd,
+                      int minor_opcode);
 
   void Buncond(MipsLabel* label);
   void Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs = ZERO);
@@ -1332,6 +1502,10 @@
   // Emits exception block.
   void EmitExceptionPoll(MipsExceptionSlowPath* exception);
 
+  bool HasMsa() const {
+    return has_msa_;
+  }
+
   bool IsR6() const {
     if (isa_features_ != nullptr) {
       return isa_features_->IsR6();
@@ -1386,6 +1560,8 @@
   uint32_t last_old_position_;
   uint32_t last_branch_id_;
 
+  const bool has_msa_;
+
   const MipsInstructionSetFeatures* isa_features_;
 
   DISALLOW_COPY_AND_ASSIGN(MipsAssembler);
diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc
index 30667ef..d464260 100644
--- a/compiler/utils/mips/assembler_mips32r6_test.cc
+++ b/compiler/utils/mips/assembler_mips32r6_test.cc
@@ -34,9 +34,14 @@
 class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler,
                                                    mips::Register,
                                                    mips::FRegister,
-                                                   uint32_t> {
+                                                   uint32_t,
+                                                   mips::VectorRegister> {
  public:
-  typedef AssemblerTest<mips::MipsAssembler, mips::Register, mips::FRegister, uint32_t> Base;
+  typedef AssemblerTest<mips::MipsAssembler,
+                        mips::Register,
+                        mips::FRegister,
+                        uint32_t,
+                        mips::VectorRegister> Base;
 
   AssemblerMIPS32r6Test() :
     instruction_set_features_(MipsInstructionSetFeatures::FromVariant("mips32r6", nullptr)) {
@@ -61,7 +66,7 @@
     // We use "-modd-spreg" so we can use odd-numbered single precision FPU registers.
     // We put the code at address 0x1000000 (instead of 0) to avoid overlapping with the
     // .MIPS.abiflags section (there doesn't seem to be a way to suppress its generation easily).
-    return " -march=mips32r6 -modd-spreg -Wa,--no-warn"
+    return " -march=mips32r6 -mmsa -modd-spreg -Wa,--no-warn"
         " -Wl,-Ttext=0x1000000 -Wl,-e0x1000000 -nostdlib";
   }
 
@@ -182,6 +187,39 @@
       fp_registers_.push_back(new mips::FRegister(mips::F29));
       fp_registers_.push_back(new mips::FRegister(mips::F30));
       fp_registers_.push_back(new mips::FRegister(mips::F31));
+
+      vec_registers_.push_back(new mips::VectorRegister(mips::W0));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W1));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W2));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W3));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W4));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W5));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W6));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W7));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W8));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W9));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W10));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W11));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W12));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W13));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W14));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W15));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W16));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W17));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W18));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W19));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W20));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W21));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W22));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W23));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W24));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W25));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W26));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W27));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W28));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W29));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W30));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W31));
     }
   }
 
@@ -189,6 +227,7 @@
     AssemblerTest::TearDown();
     STLDeleteElements(&registers_);
     STLDeleteElements(&fp_registers_);
+    STLDeleteElements(&vec_registers_);
   }
 
   std::vector<mips::Register*> GetRegisters() OVERRIDE {
@@ -199,6 +238,10 @@
     return fp_registers_;
   }
 
+  std::vector<mips::VectorRegister*> GetVectorRegisters() OVERRIDE {
+    return vec_registers_;
+  }
+
   uint32_t CreateImmediate(int64_t imm_value) OVERRIDE {
     return imm_value;
   }
@@ -250,6 +293,7 @@
   std::map<mips::Register, std::string, MIPSCpuRegisterCompare> secondary_register_names_;
 
   std::vector<mips::FRegister*> fp_registers_;
+  std::vector<mips::VectorRegister*> vec_registers_;
   std::unique_ptr<const MipsInstructionSetFeatures> instruction_set_features_;
 };
 
@@ -328,13 +372,11 @@
 }
 
 TEST_F(AssemblerMIPS32r6Test, Seleqz) {
-  DriverStr(RepeatRRR(&mips::MipsAssembler::Seleqz, "seleqz ${reg1}, ${reg2}, ${reg3}"),
-            "seleqz");
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Seleqz, "seleqz ${reg1}, ${reg2}, ${reg3}"), "seleqz");
 }
 
 TEST_F(AssemblerMIPS32r6Test, Selnez) {
-  DriverStr(RepeatRRR(&mips::MipsAssembler::Selnez, "selnez ${reg1}, ${reg2}, ${reg3}"),
-            "selnez");
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Selnez, "selnez ${reg1}, ${reg2}, ${reg3}"), "selnez");
 }
 
 TEST_F(AssemblerMIPS32r6Test, ClzR6) {
@@ -914,6 +956,566 @@
 //        AssemblerMIPS32r6Test.Bltu
 //        AssemblerMIPS32r6Test.Bgeu
 
+// MSA instructions.
+
+TEST_F(AssemblerMIPS32r6Test, AndV) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::AndV, "and.v ${reg1}, ${reg2}, ${reg3}"), "and.v");
+}
+
+TEST_F(AssemblerMIPS32r6Test, OrV) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::OrV, "or.v ${reg1}, ${reg2}, ${reg3}"), "or.v");
+}
+
+TEST_F(AssemblerMIPS32r6Test, NorV) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::NorV, "nor.v ${reg1}, ${reg2}, ${reg3}"), "nor.v");
+}
+
+TEST_F(AssemblerMIPS32r6Test, XorV) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::XorV, "xor.v ${reg1}, ${reg2}, ${reg3}"), "xor.v");
+}
+
+TEST_F(AssemblerMIPS32r6Test, AddvB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::AddvB, "addv.b ${reg1}, ${reg2}, ${reg3}"), "addv.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, AddvH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::AddvH, "addv.h ${reg1}, ${reg2}, ${reg3}"), "addv.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, AddvW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::AddvW, "addv.w ${reg1}, ${reg2}, ${reg3}"), "addv.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, AddvD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::AddvD, "addv.d ${reg1}, ${reg2}, ${reg3}"), "addv.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SubvB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SubvB, "subv.b ${reg1}, ${reg2}, ${reg3}"), "subv.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SubvH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SubvH, "subv.h ${reg1}, ${reg2}, ${reg3}"), "subv.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SubvW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SubvW, "subv.w ${reg1}, ${reg2}, ${reg3}"), "subv.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SubvD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SubvD, "subv.d ${reg1}, ${reg2}, ${reg3}"), "subv.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MulvB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::MulvB, "mulv.b ${reg1}, ${reg2}, ${reg3}"), "mulv.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MulvH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::MulvH, "mulv.h ${reg1}, ${reg2}, ${reg3}"), "mulv.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MulvW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::MulvW, "mulv.w ${reg1}, ${reg2}, ${reg3}"), "mulv.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MulvD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::MulvD, "mulv.d ${reg1}, ${reg2}, ${reg3}"), "mulv.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Div_sB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Div_sB, "div_s.b ${reg1}, ${reg2}, ${reg3}"),
+            "div_s.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Div_sH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Div_sH, "div_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "div_s.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Div_sW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Div_sW, "div_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "div_s.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Div_sD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Div_sD, "div_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "div_s.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Div_uB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Div_uB, "div_u.b ${reg1}, ${reg2}, ${reg3}"),
+            "div_u.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Div_uH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Div_uH, "div_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "div_u.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Div_uW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Div_uW, "div_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "div_u.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Div_uD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Div_uD, "div_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "div_u.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Mod_sB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_sB, "mod_s.b ${reg1}, ${reg2}, ${reg3}"),
+            "mod_s.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Mod_sH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_sH, "mod_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "mod_s.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Mod_sW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_sW, "mod_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "mod_s.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Mod_sD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_sD, "mod_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "mod_s.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Mod_uB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_uB, "mod_u.b ${reg1}, ${reg2}, ${reg3}"),
+            "mod_u.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Mod_uH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_uH, "mod_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "mod_u.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Mod_uW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_uW, "mod_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "mod_u.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Mod_uD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_uD, "mod_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "mod_u.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Add_aB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Add_aB, "add_a.b ${reg1}, ${reg2}, ${reg3}"),
+            "add_a.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Add_aH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Add_aH, "add_a.h ${reg1}, ${reg2}, ${reg3}"),
+            "add_a.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Add_aW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Add_aW, "add_a.w ${reg1}, ${reg2}, ${reg3}"),
+            "add_a.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Add_aD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Add_aD, "add_a.d ${reg1}, ${reg2}, ${reg3}"),
+            "add_a.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ave_sB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_sB, "ave_s.b ${reg1}, ${reg2}, ${reg3}"),
+            "ave_s.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ave_sH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_sH, "ave_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "ave_s.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ave_sW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_sW, "ave_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "ave_s.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ave_sD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_sD, "ave_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "ave_s.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ave_uB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_uB, "ave_u.b ${reg1}, ${reg2}, ${reg3}"),
+            "ave_u.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ave_uH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_uH, "ave_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "ave_u.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ave_uW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_uW, "ave_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "ave_u.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ave_uD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_uD, "ave_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "ave_u.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Aver_sB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_sB, "aver_s.b ${reg1}, ${reg2}, ${reg3}"),
+            "aver_s.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Aver_sH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_sH, "aver_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "aver_s.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Aver_sW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_sW, "aver_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "aver_s.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Aver_sD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_sD, "aver_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "aver_s.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Aver_uB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_uB, "aver_u.b ${reg1}, ${reg2}, ${reg3}"),
+            "aver_u.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Aver_uH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_uH, "aver_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "aver_u.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Aver_uW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_uW, "aver_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "aver_u.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Aver_uD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_uD, "aver_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "aver_u.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Max_sB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Max_sB, "max_s.b ${reg1}, ${reg2}, ${reg3}"),
+            "max_s.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Max_sH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Max_sH, "max_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "max_s.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Max_sW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Max_sW, "max_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "max_s.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Max_sD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Max_sD, "max_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "max_s.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Max_uB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Max_uB, "max_u.b ${reg1}, ${reg2}, ${reg3}"),
+            "max_u.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Max_uH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Max_uH, "max_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "max_u.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Max_uW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Max_uW, "max_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "max_u.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Max_uD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Max_uD, "max_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "max_u.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Min_sB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Min_sB, "min_s.b ${reg1}, ${reg2}, ${reg3}"),
+            "min_s.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Min_sH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Min_sH, "min_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "min_s.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Min_sW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Min_sW, "min_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "min_s.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Min_sD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Min_sD, "min_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "min_s.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Min_uB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Min_uB, "min_u.b ${reg1}, ${reg2}, ${reg3}"),
+            "min_u.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Min_uH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Min_uH, "min_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "min_u.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Min_uW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Min_uW, "min_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "min_u.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Min_uD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Min_uD, "min_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "min_u.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FaddW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FaddW, "fadd.w ${reg1}, ${reg2}, ${reg3}"), "fadd.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FaddD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FaddD, "fadd.d ${reg1}, ${reg2}, ${reg3}"), "fadd.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FsubW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FsubW, "fsub.w ${reg1}, ${reg2}, ${reg3}"), "fsub.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FsubD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FsubD, "fsub.d ${reg1}, ${reg2}, ${reg3}"), "fsub.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FmulW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FmulW, "fmul.w ${reg1}, ${reg2}, ${reg3}"), "fmul.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FmulD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FmulD, "fmul.d ${reg1}, ${reg2}, ${reg3}"), "fmul.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FdivW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FdivW, "fdiv.w ${reg1}, ${reg2}, ${reg3}"), "fdiv.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FdivD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FdivD, "fdiv.d ${reg1}, ${reg2}, ${reg3}"), "fdiv.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FmaxW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FmaxW, "fmax.w ${reg1}, ${reg2}, ${reg3}"), "fmax.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FmaxD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FmaxD, "fmax.d ${reg1}, ${reg2}, ${reg3}"), "fmax.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FminW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FminW, "fmin.w ${reg1}, ${reg2}, ${reg3}"), "fmin.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FminD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FminD, "fmin.d ${reg1}, ${reg2}, ${reg3}"), "fmin.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ffint_sW) {
+  DriverStr(RepeatVV(&mips::MipsAssembler::Ffint_sW, "ffint_s.w ${reg1}, ${reg2}"), "ffint_s.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ffint_sD) {
+  DriverStr(RepeatVV(&mips::MipsAssembler::Ffint_sD, "ffint_s.d ${reg1}, ${reg2}"), "ffint_s.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ftint_sW) {
+  DriverStr(RepeatVV(&mips::MipsAssembler::Ftint_sW, "ftint_s.w ${reg1}, ${reg2}"), "ftint_s.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ftint_sD) {
+  DriverStr(RepeatVV(&mips::MipsAssembler::Ftint_sD, "ftint_s.d ${reg1}, ${reg2}"), "ftint_s.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SllB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SllB, "sll.b ${reg1}, ${reg2}, ${reg3}"), "sll.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SllH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SllH, "sll.h ${reg1}, ${reg2}, ${reg3}"), "sll.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SllW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SllW, "sll.w ${reg1}, ${reg2}, ${reg3}"), "sll.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SllD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SllD, "sll.d ${reg1}, ${reg2}, ${reg3}"), "sll.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SraB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SraB, "sra.b ${reg1}, ${reg2}, ${reg3}"), "sra.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SraH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SraH, "sra.h ${reg1}, ${reg2}, ${reg3}"), "sra.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SraW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SraW, "sra.w ${reg1}, ${reg2}, ${reg3}"), "sra.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SraD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SraD, "sra.d ${reg1}, ${reg2}, ${reg3}"), "sra.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SrlB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SrlB, "srl.b ${reg1}, ${reg2}, ${reg3}"), "srl.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SrlH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SrlH, "srl.h ${reg1}, ${reg2}, ${reg3}"), "srl.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SrlW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SrlW, "srl.w ${reg1}, ${reg2}, ${reg3}"), "srl.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SrlD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SrlD, "srl.d ${reg1}, ${reg2}, ${reg3}"), "srl.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SlliB) {
+  DriverStr(RepeatVVIb(&mips::MipsAssembler::SlliB, 3, "slli.b ${reg1}, ${reg2}, {imm}"), "slli.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SlliH) {
+  DriverStr(RepeatVVIb(&mips::MipsAssembler::SlliH, 4, "slli.h ${reg1}, ${reg2}, {imm}"), "slli.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SlliW) {
+  DriverStr(RepeatVVIb(&mips::MipsAssembler::SlliW, 5, "slli.w ${reg1}, ${reg2}, {imm}"), "slli.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SlliD) {
+  DriverStr(RepeatVVIb(&mips::MipsAssembler::SlliD, 6, "slli.d ${reg1}, ${reg2}, {imm}"), "slli.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MoveV) {
+  DriverStr(RepeatVV(&mips::MipsAssembler::MoveV, "move.v ${reg1}, ${reg2}"), "move.v");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SplatiB) {
+  DriverStr(RepeatVVIb(&mips::MipsAssembler::SplatiB, 4, "splati.b ${reg1}, ${reg2}[{imm}]"),
+            "splati.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SplatiH) {
+  DriverStr(RepeatVVIb(&mips::MipsAssembler::SplatiH, 3, "splati.h ${reg1}, ${reg2}[{imm}]"),
+            "splati.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SplatiW) {
+  DriverStr(RepeatVVIb(&mips::MipsAssembler::SplatiW, 2, "splati.w ${reg1}, ${reg2}[{imm}]"),
+            "splati.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SplatiD) {
+  DriverStr(RepeatVVIb(&mips::MipsAssembler::SplatiD, 1, "splati.d ${reg1}, ${reg2}[{imm}]"),
+            "splati.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FillB) {
+  DriverStr(RepeatVR(&mips::MipsAssembler::FillB, "fill.b ${reg1}, ${reg2}"), "fill.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FillH) {
+  DriverStr(RepeatVR(&mips::MipsAssembler::FillH, "fill.h ${reg1}, ${reg2}"), "fill.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FillW) {
+  DriverStr(RepeatVR(&mips::MipsAssembler::FillW, "fill.w ${reg1}, ${reg2}"), "fill.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LdiB) {
+  DriverStr(RepeatVIb(&mips::MipsAssembler::LdiB, -8, "ldi.b ${reg}, {imm}"), "ldi.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LdiH) {
+  DriverStr(RepeatVIb(&mips::MipsAssembler::LdiH, -10, "ldi.h ${reg}, {imm}"), "ldi.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LdiW) {
+  DriverStr(RepeatVIb(&mips::MipsAssembler::LdiW, -10, "ldi.w ${reg}, {imm}"), "ldi.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LdiD) {
+  DriverStr(RepeatVIb(&mips::MipsAssembler::LdiD, -10, "ldi.d ${reg}, {imm}"), "ldi.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LdB) {
+  DriverStr(RepeatVRIb(&mips::MipsAssembler::LdB, -10, "ld.b ${reg1}, {imm}(${reg2})"), "ld.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LdH) {
+  DriverStr(RepeatVRIb(&mips::MipsAssembler::LdH, -10, "ld.h ${reg1}, {imm}(${reg2})", 0, 2),
+            "ld.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LdW) {
+  DriverStr(RepeatVRIb(&mips::MipsAssembler::LdW, -10, "ld.w ${reg1}, {imm}(${reg2})", 0, 4),
+            "ld.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LdD) {
+  DriverStr(RepeatVRIb(&mips::MipsAssembler::LdD, -10, "ld.d ${reg1}, {imm}(${reg2})", 0, 8),
+            "ld.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, StB) {
+  DriverStr(RepeatVRIb(&mips::MipsAssembler::StB, -10, "st.b ${reg1}, {imm}(${reg2})"), "st.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, StH) {
+  DriverStr(RepeatVRIb(&mips::MipsAssembler::StH, -10, "st.h ${reg1}, {imm}(${reg2})", 0, 2),
+            "st.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, StW) {
+  DriverStr(RepeatVRIb(&mips::MipsAssembler::StW, -10, "st.w ${reg1}, {imm}(${reg2})", 0, 4),
+            "st.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, StD) {
+  DriverStr(RepeatVRIb(&mips::MipsAssembler::StD, -10, "st.d ${reg1}, {imm}(${reg2})", 0, 8),
+            "st.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, IlvrB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::IlvrB, "ilvr.b ${reg1}, ${reg2}, ${reg3}"), "ilvr.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, IlvrH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::IlvrH, "ilvr.h ${reg1}, ${reg2}, ${reg3}"), "ilvr.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, IlvrW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::IlvrW, "ilvr.w ${reg1}, ${reg2}, ${reg3}"), "ilvr.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, IlvrD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::IlvrD, "ilvr.d ${reg1}, ${reg2}, ${reg3}"), "ilvr.d");
+}
+
 #undef __
 
 }  // namespace art
diff --git a/compiler/utils/mips/constants_mips.h b/compiler/utils/mips/constants_mips.h
index 44ed5cc..b4dfdbd 100644
--- a/compiler/utils/mips/constants_mips.h
+++ b/compiler/utils/mips/constants_mips.h
@@ -75,8 +75,37 @@
   kFdShift = 6,
   kFdBits = 5,
 
+  kMsaOperationShift = 23,
+  kMsaELMOperationShift = 22,
+  kMsa2ROperationShift = 18,
+  kMsa2RFOperationShift = 17,
+  kDfShift = 21,
+  kDfMShift = 16,
+  kDf2RShift = 16,
+  kDfNShift = 16,
+  kWtShift = 16,
+  kWtBits = 5,
+  kWsShift = 11,
+  kWsBits = 5,
+  kWdShift = 6,
+  kWdBits = 5,
+  kS10Shift = 16,
+  kI10Shift = 11,
+  kS10MinorShift = 2,
+
   kBranchOffsetMask = 0x0000ffff,
   kJumpOffsetMask = 0x03ffffff,
+
+  kMsaMajorOpcode = 0x1e,
+  kMsaDfMByteMask = 0x70,
+  kMsaDfMHalfwordMask = 0x60,
+  kMsaDfMWordMask = 0x40,
+  kMsaDfMDoublewordMask = 0x00,
+  kMsaDfNByteMask = 0x00,
+  kMsaDfNHalfwordMask = 0x20,
+  kMsaDfNWordMask = 0x30,
+  kMsaDfNDoublewordMask = 0x38,
+  kMsaS10Mask = 0x3ff,
 };
 
 enum ScaleFactor {
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index b212958..773db9b 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -25,6 +25,7 @@
 #include "base/arena_containers.h"
 #include "base/enums.h"
 #include "base/macros.h"
+#include "base/stl_util_identity.h"
 #include "constants_mips64.h"
 #include "globals.h"
 #include "managed_register_mips64.h"
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 9c13f1e..3dd0703 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -525,13 +525,14 @@
     CHECK_WATCH_DOG_PTHREAD_CALL(pthread_mutex_unlock, (&mutex_), reason);
   }
 
-  const int64_t timeout_in_milliseconds_;
-  bool shutting_down_;
   // TODO: Switch to Mutex when we can guarantee it won't prevent shutdown in error cases.
   pthread_mutex_t mutex_;
   pthread_cond_t cond_;
   pthread_attr_t attr_;
   pthread_t pthread_;
+
+  const int64_t timeout_in_milliseconds_;
+  bool shutting_down_;
 };
 
 class Dex2Oat FINAL {
@@ -1401,8 +1402,8 @@
     // Note: we're only invalidating the magic data in the file, as dex2oat needs the rest of
     // the information to remain valid.
     if (update_input_vdex_) {
-      std::unique_ptr<BufferedOutputStream> vdex_out(MakeUnique<BufferedOutputStream>(
-          MakeUnique<FileOutputStream>(vdex_files_.back().get())));
+      std::unique_ptr<BufferedOutputStream> vdex_out = std::make_unique<BufferedOutputStream>(
+          std::make_unique<FileOutputStream>(vdex_files_.back().get()));
       if (!vdex_out->WriteFully(&VdexFile::Header::kVdexInvalidMagic,
                                 arraysize(VdexFile::Header::kVdexInvalidMagic))) {
         PLOG(ERROR) << "Failed to invalidate vdex header. File: " << vdex_out->GetLocation();
@@ -1899,8 +1900,8 @@
       verifier::VerifierDeps* verifier_deps = callbacks_->GetVerifierDeps();
       for (size_t i = 0, size = oat_files_.size(); i != size; ++i) {
         File* vdex_file = vdex_files_[i].get();
-        std::unique_ptr<BufferedOutputStream> vdex_out(
-            MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(vdex_file)));
+        std::unique_ptr<BufferedOutputStream> vdex_out =
+            std::make_unique<BufferedOutputStream>(std::make_unique<FileOutputStream>(vdex_file));
 
         if (!oat_writers_[i]->WriteVerifierDeps(vdex_out.get(), verifier_deps)) {
           LOG(ERROR) << "Failed to write verifier dependencies into VDEX " << vdex_file->GetPath();
@@ -2932,7 +2933,7 @@
   // might produce a stack frame too large for this function or for
   // functions inlining it (such as main), that would not fit the
   // requirements of the `-Wframe-larger-than` option.
-  std::unique_ptr<Dex2Oat> dex2oat = MakeUnique<Dex2Oat>(&timings);
+  std::unique_ptr<Dex2Oat> dex2oat = std::make_unique<Dex2Oat>(&timings);
 
   // Parse arguments. Argument mistakes will lead to exit(EXIT_FAILURE) in UsageError.
   dex2oat->ParseArgs(argc, argv);
diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc
index 5656ddd..1541d7b 100644
--- a/dexdump/dexdump.cc
+++ b/dexdump/dexdump.cc
@@ -1747,9 +1747,8 @@
       case EncodedArrayValueIterator::ValueType::kArray:
       case EncodedArrayValueIterator::ValueType::kAnnotation:
         // Unreachable based on current EncodedArrayValueIterator::Next().
-        UNIMPLEMENTED(FATAL) << " type " << type;
+        UNIMPLEMENTED(FATAL) << " type " << it.GetValueType();
         UNREACHABLE();
-        break;
       case EncodedArrayValueIterator::ValueType::kNull:
         type = "Null";
         value = "null";
diff --git a/dexlayout/dex_ir.h b/dexlayout/dex_ir.h
index 5692eb2..95e64bf 100644
--- a/dexlayout/dex_ir.h
+++ b/dexlayout/dex_ir.h
@@ -23,6 +23,7 @@
 #include <vector>
 #include <stdint.h>
 
+#include "base/stl_util.h"
 #include "dex_file-inl.h"
 #include "leb128.h"
 #include "utf.h"
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 0c64b9f..9b95de2 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -130,8 +130,8 @@
     if (elf_file == nullptr) {
       return false;
     }
-    std::unique_ptr<BufferedOutputStream> output_stream(
-        MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file.get())));
+    std::unique_ptr<BufferedOutputStream> output_stream =
+        std::make_unique<BufferedOutputStream>(std::make_unique<FileOutputStream>(elf_file.get()));
     builder_.reset(new ElfBuilder<ElfTypes>(isa, features.get(), output_stream.get()));
 
     builder_->Start();
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index e750ede..ec3481b 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -40,6 +40,7 @@
 #include "elf_file_impl.h"
 #include "gc/space/image_space.h"
 #include "image-inl.h"
+#include "intern_table.h"
 #include "mirror/dex_cache.h"
 #include "mirror/executable.h"
 #include "mirror/object-inl.h"
diff --git a/runtime/Android.bp b/runtime/Android.bp
index d1e124f..26e52e0 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -245,7 +245,6 @@
         "entrypoints/quick/quick_entrypoints_enum.cc",
         "entrypoints/quick/quick_field_entrypoints.cc",
         "entrypoints/quick/quick_fillarray_entrypoints.cc",
-        "entrypoints/quick/quick_instrumentation_entrypoints.cc",
         "entrypoints/quick/quick_jni_entrypoints.cc",
         "entrypoints/quick/quick_lock_entrypoints.cc",
         "entrypoints/quick/quick_math_entrypoints.cc",
diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc
index ce575f7..838ae40 100644
--- a/runtime/arch/arch_test.cc
+++ b/runtime/arch/arch_test.cc
@@ -20,9 +20,27 @@
 #include "base/callee_save_type.h"
 #include "common_runtime_test.h"
 #include "quick/quick_method_frame_info.h"
-// Common tests are declared next to the constants.
-#define ADD_TEST_EQ(x, y) EXPECT_EQ(x, y);
-#include "asm_support.h"
+
+
+// asm_support.h declares tests next to the #defines. We use asm_support_check.h to (safely)
+// generate CheckAsmSupportOffsetsAndSizes using gtest's EXPECT for the tests. We also use the
+// RETURN_TYPE, HEADER and FOOTER defines from asm_support_check.h to try to ensure that any
+// tests are actually generated.
+
+// Let CheckAsmSupportOffsetsAndSizes return a size_t (the count).
+#define ASM_SUPPORT_CHECK_RETURN_TYPE size_t
+
+// Declare the counter that will be updated per test.
+#define ASM_SUPPORT_CHECK_HEADER size_t count = 0;
+
+// Use EXPECT_EQ for tests, and increment the counter.
+#define ADD_TEST_EQ(x, y) EXPECT_EQ(x, y); count++;
+
+// Return the counter at the end of CheckAsmSupportOffsetsAndSizes.
+#define ASM_SUPPORT_CHECK_FOOTER return count;
+
+// Generate CheckAsmSupportOffsetsAndSizes().
+#include "asm_support_check.h"
 
 namespace art {
 
@@ -58,7 +76,8 @@
 };
 
 TEST_F(ArchTest, CheckCommonOffsetsAndSizes) {
-  CheckAsmSupportOffsetsAndSizes();
+  size_t test_count = CheckAsmSupportOffsetsAndSizes();
+  EXPECT_GT(test_count, 0u);
 }
 
 // Grab architecture specific constants.
diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc
index 817dcf5..0db14fb 100644
--- a/runtime/arch/arm/context_arm.cc
+++ b/runtime/arch/arm/context_arm.cc
@@ -17,6 +17,7 @@
 #include "context_arm.h"
 
 #include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "quick/quick_method_frame_info.h"
 #include "thread-current-inl.h"
 
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 919b0af..8a8d264 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -18,6 +18,7 @@
 #include <string.h>
 
 #include "arch/arm/asm_support_arm.h"
+#include "base/bit_utils.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 31a7f6a..307f9f0 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1630,8 +1630,10 @@
     @ preserve r0 (not normally an arg) knowing there is a spare slot in kSaveRefsAndArgs.
     str   r0, [sp, #4]
     mov   r2, r9         @ pass Thread::Current
-    mov   r3, lr         @ pass LR
-    blx   artInstrumentationMethodEntryFromCode  @ (Method*, Object*, Thread*, LR)
+    mov   r3, sp         @ pass SP
+    blx   artInstrumentationMethodEntryFromCode  @ (Method*, Object*, Thread*, SP)
+    cbz   r0, .Ldeliver_instrumentation_entry_exception
+                         @ Deliver exception if we got nullptr as function.
     mov   r12, r0        @ r12 holds reference to code
     ldr   r0, [sp, #4]   @ restore r0
     RESTORE_SAVE_REFS_AND_ARGS_FRAME
@@ -1647,19 +1649,13 @@
     .cfi_adjust_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset r1, 4
+    mov   r2, sp         @ store gpr_res pointer.
     vpush {d0}           @ save fp return value
     .cfi_adjust_cfa_offset 8
-    sub   sp, #8         @ space for return value argument. Note: AAPCS stack alignment is 8B, no
-                         @ need to align by 16.
-    .cfi_adjust_cfa_offset 8
-    vstr  d0, [sp]       @ d0 -> [sp] for fpr_res
-    mov   r2, r0         @ pass return value as gpr_res
-    mov   r3, r1
-    mov   r0, r9         @ pass Thread::Current
+    mov   r3, sp         @ store fpr_res pointer
     mov   r1, r12        @ pass SP
-    blx   artInstrumentationMethodExitFromCode  @ (Thread*, SP, gpr_res, fpr_res)
-    add   sp, #8
-    .cfi_adjust_cfa_offset -8
+    mov   r0, r9         @ pass Thread::Current
+    blx   artInstrumentationMethodExitFromCode  @ (Thread*, SP, gpr_res*, fpr_res*)
 
     mov   r2, r0         @ link register saved by instrumentation
     mov   lr, r1         @ r1 is holding link register if we're to bounce to deoptimize
@@ -1669,9 +1665,16 @@
     .cfi_adjust_cfa_offset -8
     .cfi_restore r0
     .cfi_restore r1
-    add sp, #32          @ remove callee save frame
-    .cfi_adjust_cfa_offset -32
-    bx    r2             @ return
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    cbz   r2, .Ldo_deliver_instrumentation_exception
+                         @ Deliver exception if we got nullptr as function.
+    bx    r2             @ Otherwise, return
+.Ldeliver_instrumentation_entry_exception:
+    @ Deliver exception for art_quick_instrumentation_entry placed after
+    @ art_quick_instrumentation_exit so that the fallthrough works.
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
+.Ldo_deliver_instrumentation_exception:
+    DELIVER_PENDING_EXCEPTION
 END art_quick_instrumentation_entry
 
     /*
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index a8f034e..0465c1e 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -19,6 +19,7 @@
 #include "context_arm64.h"
 
 #include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "quick/quick_method_frame_info.h"
 #include "thread-current-inl.h"
 
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 610cdee..9bbcef3 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -18,6 +18,7 @@
 #include <string.h>
 
 #include "arch/arm64/asm_support_arm64.h"
+#include "base/bit_utils.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 18015b5..c9ead54 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2168,15 +2168,19 @@
     mov   x20, x0             // Preserve method reference in a callee-save.
 
     mov   x2, xSELF
-    mov   x3, xLR
-    bl    artInstrumentationMethodEntryFromCode  // (Method*, Object*, Thread*, LR)
+    mov   x3, sp  // Pass SP
+    bl    artInstrumentationMethodEntryFromCode  // (Method*, Object*, Thread*, SP)
 
     mov   xIP0, x0            // x0 = result of call.
     mov   x0, x20             // Reload method reference.
 
     RESTORE_SAVE_REFS_AND_ARGS_FRAME  // Note: will restore xSELF
+    cbz   xIP0, 1f            // Deliver the pending exception if method is null.
     adr   xLR, art_quick_instrumentation_exit
     br    xIP0                // Tail-call method with lr set to art_quick_instrumentation_exit.
+
+1:
+    DELIVER_PENDING_EXCEPTION
 END art_quick_instrumentation_entry
 
     .extern artInstrumentationMethodExitFromCode
@@ -2185,30 +2189,28 @@
 
     SETUP_SAVE_REFS_ONLY_FRAME
 
-    // We need to save x0 and d0. We could use a callee-save from SETUP_REF_ONLY, but then
-    // we would need to fully restore it. As there are a lot of callee-save registers, it seems
-    // easier to have an extra small stack area.
-
     str x0, [sp, #-16]!       // Save integer result.
     .cfi_adjust_cfa_offset 16
-    str d0,  [sp, #8]         // Save floating-point result.
+    str d0, [sp, #8]          // Save floating-point result.
 
+    add   x3, sp, #8          // Pass floating-point result pointer.
+    mov   x2, sp              // Pass integer result pointer.
     add   x1, sp, #16         // Pass SP.
-    mov   x2, x0              // Pass integer result.
-    fmov  x3, d0              // Pass floating-point result.
     mov   x0, xSELF           // Pass Thread.
-    bl   artInstrumentationMethodExitFromCode    // (Thread*, SP, gpr_res, fpr_res)
+    bl   artInstrumentationMethodExitFromCode    // (Thread*, SP, gpr_res*, fpr_res*)
 
     mov   xIP0, x0            // Return address from instrumentation call.
     mov   xLR, x1             // r1 is holding link register if we're to bounce to deoptimize
 
     ldr   d0, [sp, #8]        // Restore floating-point result.
     ldr   x0, [sp], #16       // Restore integer result, and drop stack area.
-    .cfi_adjust_cfa_offset 16
+    .cfi_adjust_cfa_offset -16
 
-    POP_SAVE_REFS_ONLY_FRAME
-
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    cbz   xIP0, 1f            // Handle error
     br    xIP0                // Tail-call out.
+1:
+    DELIVER_PENDING_EXCEPTION
 END art_quick_instrumentation_exit
 
     /*
diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc
index 98ed5e6..ca1de0a 100644
--- a/runtime/arch/mips/context_mips.cc
+++ b/runtime/arch/mips/context_mips.cc
@@ -17,6 +17,7 @@
 #include "context_mips.h"
 
 #include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "quick/quick_method_frame_info.h"
 
 namespace art {
diff --git a/runtime/arch/mips/registers_mips.cc b/runtime/arch/mips/registers_mips.cc
index 5d31f2f..92c2746 100644
--- a/runtime/arch/mips/registers_mips.cc
+++ b/runtime/arch/mips/registers_mips.cc
@@ -45,5 +45,14 @@
   return os;
 }
 
+std::ostream& operator<<(std::ostream& os, const VectorRegister& rhs) {
+  if (rhs >= W0 && rhs < kNumberOfVectorRegisters) {
+    os << "w" << static_cast<int>(rhs);
+  } else {
+    os << "VectorRegister[" << static_cast<int>(rhs) << "]";
+  }
+  return os;
+}
+
 }  // namespace mips
 }  // namespace art
diff --git a/runtime/arch/mips/registers_mips.h b/runtime/arch/mips/registers_mips.h
index 555f3f0..57af150 100644
--- a/runtime/arch/mips/registers_mips.h
+++ b/runtime/arch/mips/registers_mips.h
@@ -106,6 +106,45 @@
 };
 std::ostream& operator<<(std::ostream& os, const FRegister& rhs);
 
+// Values for vector registers.
+enum VectorRegister {
+  W0  =  0,
+  W1  =  1,
+  W2  =  2,
+  W3  =  3,
+  W4  =  4,
+  W5  =  5,
+  W6  =  6,
+  W7  =  7,
+  W8  =  8,
+  W9  =  9,
+  W10 = 10,
+  W11 = 11,
+  W12 = 12,
+  W13 = 13,
+  W14 = 14,
+  W15 = 15,
+  W16 = 16,
+  W17 = 17,
+  W18 = 18,
+  W19 = 19,
+  W20 = 20,
+  W21 = 21,
+  W22 = 22,
+  W23 = 23,
+  W24 = 24,
+  W25 = 25,
+  W26 = 26,
+  W27 = 27,
+  W28 = 28,
+  W29 = 29,
+  W30 = 30,
+  W31 = 31,
+  kNumberOfVectorRegisters = 32,
+  kNoVectorRegister = -1,
+};
+std::ostream& operator<<(std::ostream& os, const VectorRegister& rhs);
+
 }  // namespace mips
 }  // namespace art
 
diff --git a/runtime/arch/mips64/context_mips64.cc b/runtime/arch/mips64/context_mips64.cc
index bd1ac3b..b14908f 100644
--- a/runtime/arch/mips64/context_mips64.cc
+++ b/runtime/arch/mips64/context_mips64.cc
@@ -17,6 +17,7 @@
 #include "context_mips64.h"
 
 #include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "quick/quick_method_frame_info.h"
 
 namespace art {
diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc
index cb3dfec..5c31712 100644
--- a/runtime/arch/x86/context_x86.cc
+++ b/runtime/arch/x86/context_x86.cc
@@ -17,6 +17,7 @@
 #include "context_x86.h"
 
 #include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "base/memory_tool.h"
 #include "quick/quick_method_frame_info.h"
 
diff --git a/runtime/arch/x86/instruction_set_features_x86.cc b/runtime/arch/x86/instruction_set_features_x86.cc
index 5788122..cc0bdf2 100644
--- a/runtime/arch/x86/instruction_set_features_x86.cc
+++ b/runtime/arch/x86/instruction_set_features_x86.cc
@@ -33,23 +33,28 @@
 
 static constexpr const char* x86_known_variants[] = {
     "atom",
+    "sandybridge",
     "silvermont",
 };
 
 static constexpr const char* x86_variants_with_ssse3[] = {
     "atom",
+    "sandybridge",
     "silvermont",
 };
 
 static constexpr const char* x86_variants_with_sse4_1[] = {
+    "sandybridge",
     "silvermont",
 };
 
 static constexpr const char* x86_variants_with_sse4_2[] = {
+    "sandybridge",
     "silvermont",
 };
 
 static constexpr const char* x86_variants_with_popcnt[] = {
+    "sandybridge",
     "silvermont",
 };
 
diff --git a/runtime/arch/x86/instruction_set_features_x86_test.cc b/runtime/arch/x86/instruction_set_features_x86_test.cc
index 7e6ad3e..c67b4dd 100644
--- a/runtime/arch/x86/instruction_set_features_x86_test.cc
+++ b/runtime/arch/x86/instruction_set_features_x86_test.cc
@@ -69,6 +69,43 @@
   EXPECT_FALSE(x86_features->Equals(x86_default_features.get()));
 }
 
+TEST(X86InstructionSetFeaturesTest, X86FeaturesFromSandybridgeVariant) {
+  // Build features for a 32-bit x86 sandybridge processor.
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> x86_features(
+      InstructionSetFeatures::FromVariant(kX86, "sandybridge", &error_msg));
+  ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
+  EXPECT_TRUE(x86_features->Equals(x86_features.get()));
+  EXPECT_STREQ("ssse3,sse4.1,sse4.2,-avx,-avx2,popcnt",
+               x86_features->GetFeatureString().c_str());
+  EXPECT_EQ(x86_features->AsBitmap(), 39U);
+
+  // Build features for a 32-bit x86 default processor.
+  std::unique_ptr<const InstructionSetFeatures> x86_default_features(
+      InstructionSetFeatures::FromVariant(kX86, "default", &error_msg));
+  ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
+  EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
+  EXPECT_STREQ("-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-popcnt",
+               x86_default_features->GetFeatureString().c_str());
+  EXPECT_EQ(x86_default_features->AsBitmap(), 0U);
+
+  // Build features for a 64-bit x86-64 sandybridge processor.
+  std::unique_ptr<const InstructionSetFeatures> x86_64_features(
+      InstructionSetFeatures::FromVariant(kX86_64, "sandybridge", &error_msg));
+  ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
+  EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
+  EXPECT_STREQ("ssse3,sse4.1,sse4.2,-avx,-avx2,popcnt",
+               x86_64_features->GetFeatureString().c_str());
+  EXPECT_EQ(x86_64_features->AsBitmap(), 39U);
+
+  EXPECT_FALSE(x86_64_features->Equals(x86_features.get()));
+  EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get()));
+  EXPECT_FALSE(x86_features->Equals(x86_default_features.get()));
+}
+
 TEST(X86InstructionSetFeaturesTest, X86FeaturesFromSilvermontVariant) {
   // Build features for a 32-bit x86 silvermont processor.
   std::string error_msg;
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 2222f5c..4fb3fe5 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1948,17 +1948,23 @@
 DEFINE_FUNCTION art_quick_instrumentation_entry
     SETUP_SAVE_REFS_AND_ARGS_FRAME ebx, edx
     PUSH eax                      // Save eax which will be clobbered by the callee-save method.
-    subl LITERAL(12), %esp        // Align stack.
-    CFI_ADJUST_CFA_OFFSET(12)
-    pushl FRAME_SIZE_SAVE_REFS_AND_ARGS-4+16(%esp)  // Pass LR.
-    CFI_ADJUST_CFA_OFFSET(4)
+    subl LITERAL(16), %esp        // Align stack (12 bytes) and reserve space for the SP argument
+    CFI_ADJUST_CFA_OFFSET(16)     // (4 bytes). We lack the scratch registers to calculate the SP
+                                  // right now, so we will just fill it in later.
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                      // Pass receiver.
     PUSH eax                      // Pass Method*.
-    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, LR)
+    leal 32(%esp), %eax           // Put original SP into eax
+    movl %eax, 12(%esp)           // set SP
+    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP)
+
     addl LITERAL(28), %esp        // Pop arguments upto saved Method*.
     CFI_ADJUST_CFA_OFFSET(-28)
+
+    testl %eax, %eax
+    jz 1f                         // Test for null return (indicating exception) and handle it.
+
     movl 60(%esp), %edi           // Restore edi.
     movl %eax, 60(%esp)           // Place code* over edi, just under return pc.
     movl SYMBOL(art_quick_instrumentation_exit)@GOT(%ebx), %ebx
@@ -1980,6 +1986,12 @@
     addl LITERAL(60), %esp        // Wind stack back upto code*.
     CFI_ADJUST_CFA_OFFSET(-60)
     ret                           // Call method (and pop).
+1:
+    // Make caller handle exception
+    addl LITERAL(4), %esp
+    CFI_ADJUST_CFA_OFFSET(-4)
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
+    DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_instrumentation_entry
 
 DEFINE_FUNCTION art_quick_instrumentation_exit
@@ -1992,18 +2004,19 @@
     movq %xmm0, (%esp)
     PUSH edx                      // Save gpr return value.
     PUSH eax
-    subl LITERAL(16), %esp        // Align stack
-    CFI_ADJUST_CFA_OFFSET(16)
-    movq %xmm0, (%esp)            // Pass float return value.
-    PUSH edx                      // Pass gpr return value.
-    PUSH eax
+    leal 8(%esp), %eax            // Get pointer to fpr_result
+    movl %esp, %edx               // Get pointer to gpr_result
+    PUSH eax                      // Pass fpr_result
+    PUSH edx                      // Pass gpr_result
     PUSH ecx                      // Pass SP.
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current.
     CFI_ADJUST_CFA_OFFSET(4)
-    call SYMBOL(artInstrumentationMethodExitFromCode)  // (Thread*, SP, gpr_result, fpr_result)
+    call SYMBOL(artInstrumentationMethodExitFromCode)  // (Thread*, SP, gpr_result*, fpr_result*)
+    testl %eax, %eax              // Check if we returned error.
+    jz 1f
     mov   %eax, %ecx              // Move returned link register.
-    addl LITERAL(32), %esp        // Pop arguments.
-    CFI_ADJUST_CFA_OFFSET(-32)
+    addl LITERAL(16), %esp        // Pop arguments.
+    CFI_ADJUST_CFA_OFFSET(-16)
     movl %edx, %ebx               // Move returned link register for deopt
                                   // (ebx is pretending to be our LR).
     POP eax                       // Restore gpr return value.
@@ -2015,6 +2028,11 @@
     addl LITERAL(4), %esp         // Remove fake return pc.
     CFI_ADJUST_CFA_OFFSET(-4)
     jmp   *%ecx                   // Return.
+1:
+    addl LITERAL(32), %esp
+    CFI_ADJUST_CFA_OFFSET(-32)
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_instrumentation_exit
 
     /*
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
index 7c49e9c..a4db223 100644
--- a/runtime/arch/x86_64/context_x86_64.cc
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -17,6 +17,7 @@
 #include "context_x86_64.h"
 
 #include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "quick/quick_method_frame_info.h"
 
 namespace art {
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 41651d8..46d4f41 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1920,19 +1920,24 @@
     movq %rdi, %r12               // Preserve method pointer in a callee-save.
 
     movq %gs:THREAD_SELF_OFFSET, %rdx   // Pass thread.
-    movq FRAME_SIZE_SAVE_REFS_AND_ARGS-8(%rsp), %rcx   // Pass return PC.
+    movq %rsp, %rcx                     // Pass SP.
 
-    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, LR)
+    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP)
 
                                   // %rax = result of call.
-    movq %r12, %rdi               // Reload method pointer.
+    testq %rax, %rax
+    jz 1f
 
+    movq %r12, %rdi               // Reload method pointer.
     leaq art_quick_instrumentation_exit(%rip), %r12   // Set up return through instrumentation
     movq %r12, FRAME_SIZE_SAVE_REFS_AND_ARGS-8(%rsp)  // exit.
 
     RESTORE_SAVE_REFS_AND_ARGS_FRAME
 
     jmp *%rax                     // Tail call to intended method.
+1:
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
+    DELIVER_PENDING_EXCEPTION
 #endif  // __APPLE__
 END_FUNCTION art_quick_instrumentation_entry
 
@@ -1948,15 +1953,16 @@
     movq  %rsp, %rsi                          // Pass SP.
 
     PUSH rax                  // Save integer result.
+    movq %rsp, %rdx           // Pass integer result pointer.
+
     subq LITERAL(8), %rsp     // Save floating-point result.
     CFI_ADJUST_CFA_OFFSET(8)
     movq %xmm0, (%rsp)
+    movq %rsp, %rcx           // Pass floating-point result pointer.
 
     movq  %gs:THREAD_SELF_OFFSET, %rdi        // Pass Thread.
-    movq  %rax, %rdx                          // Pass integer result.
-    movq  %xmm0, %rcx                         // Pass floating-point result.
 
-    call SYMBOL(artInstrumentationMethodExitFromCode)   // (Thread*, SP, gpr_res, fpr_res)
+    call SYMBOL(artInstrumentationMethodExitFromCode)   // (Thread*, SP, gpr_res*, fpr_res*)
 
     movq  %rax, %rdi          // Store return PC
     movq  %rdx, %rsi          // Store second return PC in hidden arg.
@@ -1968,9 +1974,15 @@
 
     RESTORE_SAVE_REFS_ONLY_FRAME
 
+    testq %rdi, %rdi          // Check if we have a return-pc to go to. If we don't then there was
+                              // an exception
+    jz 1f
+
     addq LITERAL(8), %rsp     // Drop fake return pc.
 
     jmp   *%rdi               // Return.
+1:
+    DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_instrumentation_exit
 
     /*
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 7de8916..d591e09 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -664,7 +664,9 @@
     }
     if (existing_entry_point == GetQuickInstrumentationEntryPoint()) {
       // We are running the generic jni stub, but the method is being instrumented.
-      DCHECK_EQ(pc, 0u) << "Should be a downcall";
+      // NB We would normally expect the pc to be zero but we can have non-zero pc's if
+      // instrumentation is installed or removed during the call which is using the generic jni
+      // trampoline.
       DCHECK(IsNative());
       return nullptr;
     }
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index f8096fe..44c0661 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -17,24 +17,6 @@
 #ifndef ART_RUNTIME_ASM_SUPPORT_H_
 #define ART_RUNTIME_ASM_SUPPORT_H_
 
-#if defined(__cplusplus)
-#include "art_method.h"
-#include "base/bit_utils.h"
-#include "base/callee_save_type.h"
-#include "gc/accounting/card_table.h"
-#include "gc/allocator/rosalloc.h"
-#include "gc/heap.h"
-#include "jit/jit.h"
-#include "lock_word.h"
-#include "mirror/class.h"
-#include "mirror/dex_cache.h"
-#include "mirror/string.h"
-#include "utils/dex_cache_arrays_layout.h"
-#include "runtime.h"
-#include "stack.h"
-#include "thread.h"
-#endif
-
 #include "read_barrier_c.h"
 
 #if defined(__arm__) || defined(__mips__)
@@ -51,14 +33,10 @@
 #define SUSPEND_CHECK_INTERVAL 96
 #endif
 
-#if defined(__cplusplus)
-
+// To generate tests related to the constants in this header, either define ADD_TEST_EQ before
+// including, or use asm_support_check.h.
 #ifndef ADD_TEST_EQ  // Allow #include-r to replace with their own.
-#define ADD_TEST_EQ(x, y) CHECK_EQ(x, y);
-#endif
-
-static inline void CheckAsmSupportOffsetsAndSizes() {
-#else
+#define DEFINED_ADD_TEST_EQ 1
 #define ADD_TEST_EQ(x, y)
 #endif
 
@@ -76,6 +54,7 @@
 // Export new defines (for assembly use) by editing cpp-define-generator def files.
 #define DEFINE_CHECK_EQ ADD_TEST_EQ
 #include "asm_support_gen.h"
+#undef DEFINE_CHECK_EQ
 
 // Offset of field Thread::tlsPtr_.exception.
 #define THREAD_EXCEPTION_OFFSET (THREAD_CARD_TABLE_OFFSET + __SIZEOF_POINTER__)
@@ -252,8 +231,9 @@
 #define STRING_COMPRESSION_FEATURE 1
 ADD_TEST_EQ(STRING_COMPRESSION_FEATURE, art::mirror::kUseStringCompression);
 
-#if defined(__cplusplus)
-}  // End of CheckAsmSupportOffsets.
+#ifdef DEFINED_ADD_TEST_EQ
+#undef ADD_TEST_EQ
+#undef DEFINED_ADD_TEST_EQ
 #endif
 
 #endif  // ART_RUNTIME_ASM_SUPPORT_H_
diff --git a/runtime/asm_support_check.h b/runtime/asm_support_check.h
new file mode 100644
index 0000000..cc6a578
--- /dev/null
+++ b/runtime/asm_support_check.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ASM_SUPPORT_CHECK_H_
+#define ART_RUNTIME_ASM_SUPPORT_CHECK_H_
+
+#include "art_method.h"
+#include "base/bit_utils.h"
+#include "base/callee_save_type.h"
+#include "gc/accounting/card_table.h"
+#include "gc/allocator/rosalloc.h"
+#include "gc/heap.h"
+#include "jit/jit.h"
+#include "lock_word.h"
+#include "mirror/class.h"
+#include "mirror/dex_cache.h"
+#include "mirror/string.h"
+#include "utils/dex_cache_arrays_layout.h"
+#include "runtime.h"
+#include "stack.h"
+#include "thread.h"
+
+#ifndef ADD_TEST_EQ
+#define ADD_TEST_EQ(x, y) CHECK_EQ(x, y);
+#endif
+
+#ifndef ASM_SUPPORT_CHECK_RETURN_TYPE
+#define ASM_SUPPORT_CHECK_RETURN_TYPE void
+#endif
+
+// Prepare for re-include of asm_support.h.
+#ifdef ART_RUNTIME_ASM_SUPPORT_H_
+#undef ART_RUNTIME_ASM_SUPPORT_H_
+#endif
+
+namespace art {
+
+static inline ASM_SUPPORT_CHECK_RETURN_TYPE CheckAsmSupportOffsetsAndSizes() {
+#ifdef ASM_SUPPORT_CHECK_HEADER
+  ASM_SUPPORT_CHECK_HEADER
+#endif
+
+#include "asm_support.h"
+
+#ifdef ASM_SUPPORT_CHECK_FOOTER
+  ASM_SUPPORT_CHECK_FOOTER
+#endif
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_ASM_SUPPORT_CHECK_H_
diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h
index f536c72..0844678 100644
--- a/runtime/base/bit_utils.h
+++ b/runtime/base/bit_utils.h
@@ -17,13 +17,11 @@
 #ifndef ART_RUNTIME_BASE_BIT_UTILS_H_
 #define ART_RUNTIME_BASE_BIT_UTILS_H_
 
-#include <iterator>
 #include <limits>
 #include <type_traits>
 
-#include "base/iteration_range.h"
 #include "base/logging.h"
-#include "base/stl_util.h"
+#include "base/stl_util_identity.h"
 
 namespace art {
 
@@ -312,85 +310,6 @@
           : static_cast<T>(0);
 }
 
-// Using the Curiously Recurring Template Pattern to implement everything shared
-// by LowToHighBitIterator and HighToLowBitIterator, i.e. everything but operator*().
-template <typename T, typename Iter>
-class BitIteratorBase
-    : public std::iterator<std::forward_iterator_tag, uint32_t, ptrdiff_t, void, void> {
-  static_assert(std::is_integral<T>::value, "T must be integral");
-  static_assert(std::is_unsigned<T>::value, "T must be unsigned");
-
-  static_assert(sizeof(T) == sizeof(uint32_t) || sizeof(T) == sizeof(uint64_t), "Unsupported size");
-
- public:
-  BitIteratorBase() : bits_(0u) { }
-  explicit BitIteratorBase(T bits) : bits_(bits) { }
-
-  Iter& operator++() {
-    DCHECK_NE(bits_, 0u);
-    uint32_t bit = *static_cast<Iter&>(*this);
-    bits_ &= ~(static_cast<T>(1u) << bit);
-    return static_cast<Iter&>(*this);
-  }
-
-  Iter& operator++(int) {
-    Iter tmp(static_cast<Iter&>(*this));
-    ++*this;
-    return tmp;
-  }
-
- protected:
-  T bits_;
-
-  template <typename U, typename I>
-  friend bool operator==(const BitIteratorBase<U, I>& lhs, const BitIteratorBase<U, I>& rhs);
-};
-
-template <typename T, typename Iter>
-bool operator==(const BitIteratorBase<T, Iter>& lhs, const BitIteratorBase<T, Iter>& rhs) {
-  return lhs.bits_ == rhs.bits_;
-}
-
-template <typename T, typename Iter>
-bool operator!=(const BitIteratorBase<T, Iter>& lhs, const BitIteratorBase<T, Iter>& rhs) {
-  return !(lhs == rhs);
-}
-
-template <typename T>
-class LowToHighBitIterator : public BitIteratorBase<T, LowToHighBitIterator<T>> {
- public:
-  using BitIteratorBase<T, LowToHighBitIterator<T>>::BitIteratorBase;
-
-  uint32_t operator*() const {
-    DCHECK_NE(this->bits_, 0u);
-    return CTZ(this->bits_);
-  }
-};
-
-template <typename T>
-class HighToLowBitIterator : public BitIteratorBase<T, HighToLowBitIterator<T>> {
- public:
-  using BitIteratorBase<T, HighToLowBitIterator<T>>::BitIteratorBase;
-
-  uint32_t operator*() const {
-    DCHECK_NE(this->bits_, 0u);
-    static_assert(std::numeric_limits<T>::radix == 2, "Unexpected radix!");
-    return std::numeric_limits<T>::digits - 1u - CLZ(this->bits_);
-  }
-};
-
-template <typename T>
-IterationRange<LowToHighBitIterator<T>> LowToHighBits(T bits) {
-  return IterationRange<LowToHighBitIterator<T>>(
-      LowToHighBitIterator<T>(bits), LowToHighBitIterator<T>());
-}
-
-template <typename T>
-IterationRange<HighToLowBitIterator<T>> HighToLowBits(T bits) {
-  return IterationRange<HighToLowBitIterator<T>>(
-      HighToLowBitIterator<T>(bits), HighToLowBitIterator<T>());
-}
-
 // Returns value with bit set in lowest one-bit position or 0 if 0.  (java.lang.X.lowestOneBit).
 template <typename kind>
 inline static kind LowestOneBitValue(kind opnd) {
diff --git a/runtime/base/bit_utils_iterator.h b/runtime/base/bit_utils_iterator.h
new file mode 100644
index 0000000..8514de6
--- /dev/null
+++ b/runtime/base/bit_utils_iterator.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_BIT_UTILS_ITERATOR_H_
+#define ART_RUNTIME_BASE_BIT_UTILS_ITERATOR_H_
+
+#include <iterator>
+#include <limits>
+#include <type_traits>
+
+#include "base/bit_utils.h"
+#include "base/iteration_range.h"
+#include "base/logging.h"
+#include "base/stl_util.h"
+
+namespace art {
+
+// Using the Curiously Recurring Template Pattern to implement everything shared
+// by LowToHighBitIterator and HighToLowBitIterator, i.e. everything but operator*().
+template <typename T, typename Iter>
+class BitIteratorBase
+    : public std::iterator<std::forward_iterator_tag, uint32_t, ptrdiff_t, void, void> {
+  static_assert(std::is_integral<T>::value, "T must be integral");
+  static_assert(std::is_unsigned<T>::value, "T must be unsigned");
+
+  static_assert(sizeof(T) == sizeof(uint32_t) || sizeof(T) == sizeof(uint64_t), "Unsupported size");
+
+ public:
+  BitIteratorBase() : bits_(0u) { }
+  explicit BitIteratorBase(T bits) : bits_(bits) { }
+
+  Iter& operator++() {
+    DCHECK_NE(bits_, 0u);
+    uint32_t bit = *static_cast<Iter&>(*this);
+    bits_ &= ~(static_cast<T>(1u) << bit);
+    return static_cast<Iter&>(*this);
+  }
+
+  Iter& operator++(int) {
+    Iter tmp(static_cast<Iter&>(*this));
+    ++*this;
+    return tmp;
+  }
+
+ protected:
+  T bits_;
+
+  template <typename U, typename I>
+  friend bool operator==(const BitIteratorBase<U, I>& lhs, const BitIteratorBase<U, I>& rhs);
+};
+
+template <typename T, typename Iter>
+bool operator==(const BitIteratorBase<T, Iter>& lhs, const BitIteratorBase<T, Iter>& rhs) {
+  return lhs.bits_ == rhs.bits_;
+}
+
+template <typename T, typename Iter>
+bool operator!=(const BitIteratorBase<T, Iter>& lhs, const BitIteratorBase<T, Iter>& rhs) {
+  return !(lhs == rhs);
+}
+
+template <typename T>
+class LowToHighBitIterator : public BitIteratorBase<T, LowToHighBitIterator<T>> {
+ public:
+  using BitIteratorBase<T, LowToHighBitIterator<T>>::BitIteratorBase;
+
+  uint32_t operator*() const {
+    DCHECK_NE(this->bits_, 0u);
+    return CTZ(this->bits_);
+  }
+};
+
+template <typename T>
+class HighToLowBitIterator : public BitIteratorBase<T, HighToLowBitIterator<T>> {
+ public:
+  using BitIteratorBase<T, HighToLowBitIterator<T>>::BitIteratorBase;
+
+  uint32_t operator*() const {
+    DCHECK_NE(this->bits_, 0u);
+    static_assert(std::numeric_limits<T>::radix == 2, "Unexpected radix!");
+    return std::numeric_limits<T>::digits - 1u - CLZ(this->bits_);
+  }
+};
+
+template <typename T>
+IterationRange<LowToHighBitIterator<T>> LowToHighBits(T bits) {
+  return IterationRange<LowToHighBitIterator<T>>(
+      LowToHighBitIterator<T>(bits), LowToHighBitIterator<T>());
+}
+
+template <typename T>
+IterationRange<HighToLowBitIterator<T>> HighToLowBits(T bits) {
+  return IterationRange<HighToLowBitIterator<T>>(
+      HighToLowBitIterator<T>(bits), HighToLowBitIterator<T>());
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_BIT_UTILS_ITERATOR_H_
diff --git a/runtime/base/bit_utils_test.cc b/runtime/base/bit_utils_test.cc
index 77bd0b8..9f22fb4 100644
--- a/runtime/base/bit_utils_test.cc
+++ b/runtime/base/bit_utils_test.cc
@@ -17,6 +17,7 @@
 #include <vector>
 
 #include "bit_utils.h"
+#include "bit_utils_iterator.h"
 
 #include "gtest/gtest.h"
 
diff --git a/runtime/base/stl_util.h b/runtime/base/stl_util.h
index cfe27f3..b272972 100644
--- a/runtime/base/stl_util.h
+++ b/runtime/base/stl_util.h
@@ -25,13 +25,6 @@
 
 namespace art {
 
-// Sort and remove duplicates of an STL vector or deque.
-template<class T>
-void STLSortAndRemoveDuplicates(T* v) {
-  std::sort(v->begin(), v->end());
-  v->erase(std::unique(v->begin(), v->end()), v->end());
-}
-
 // STLDeleteContainerPointers()
 //  For a range within a container of pointers, calls delete
 //  (non-array version) on these pointers.
@@ -83,20 +76,6 @@
   }
 }
 
-template <class T>
-std::string ToString(const T& v) {
-  std::ostringstream os;
-  os << "[";
-  for (size_t i = 0; i < v.size(); ++i) {
-    os << v[i];
-    if (i < v.size() - 1) {
-      os << ", ";
-    }
-  }
-  os << "]";
-  return os.str();
-}
-
 // Deleter using free() for use with std::unique_ptr<>. See also UniqueCPtr<> below.
 struct FreeDelete {
   // NOTE: Deleting a const object is valid but free() takes a non-const pointer.
@@ -109,13 +88,6 @@
 template <typename T>
 using UniqueCPtr = std::unique_ptr<T, FreeDelete>;
 
-// C++14 from-the-future import (std::make_unique)
-// Invoke the constructor of 'T' with the provided args, and wrap the result in a unique ptr.
-template <typename T, typename ... Args>
-std::unique_ptr<T> MakeUnique(Args&& ... args) {
-  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
-}
-
 // Find index of the first element with the specified value known to be in the container.
 template <typename Container, typename T>
 size_t IndexOfElement(const Container& container, const T& value) {
@@ -150,13 +122,6 @@
   return it != container.end();
 }
 
-// const char* compare function suitable for std::map or std::set.
-struct CStringLess {
-  bool operator()(const char* lhs, const char* rhs) const {
-    return strcmp(lhs, rhs) < 0;
-  }
-};
-
 // 32-bit FNV-1a hash function suitable for std::unordered_map.
 // It can be used with any container which works with range-based for loop.
 // See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
@@ -171,23 +136,6 @@
   }
 };
 
-// Use to suppress type deduction for a function argument.
-// See std::identity<> for more background:
-// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1856.html#20.2.2 - move/forward helpers
-//
-// e.g. "template <typename X> void bar(identity<X>::type foo);
-//     bar(5); // compilation error
-//     bar<int>(5); // ok
-// or "template <typename T> void foo(T* x, typename Identity<T*>::type y);
-//     Base b;
-//     Derived d;
-//     foo(&b, &d);  // Use implicit Derived* -> Base* conversion.
-// If T was deduced from both &b and &d, there would be a mismatch, i.e. deduction failure.
-template <typename T>
-struct Identity {
-  using type = T;
-};
-
 // Merge `other` entries into `to_update`.
 template <typename T>
 static inline void MergeSets(std::set<T>& to_update, const std::set<T>& other) {
diff --git a/runtime/base/stl_util_identity.h b/runtime/base/stl_util_identity.h
new file mode 100644
index 0000000..40a93f7
--- /dev/null
+++ b/runtime/base/stl_util_identity.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_STL_UTIL_IDENTITY_H_
+#define ART_RUNTIME_BASE_STL_UTIL_IDENTITY_H_
+
+namespace art {
+
+// Use to suppress type deduction for a function argument.
+// See std::identity<> for more background:
+// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1856.html#20.2.2 - move/forward helpers
+//
+// e.g. "template <typename X> void bar(identity<X>::type foo);
+//     bar(5); // compilation error
+//     bar<int>(5); // ok
+// or "template <typename T> void foo(T* x, typename Identity<T*>::type y);
+//     Base b;
+//     Derived d;
+//     foo(&b, &d);  // Use implicit Derived* -> Base* conversion.
+// If T was deduced from both &b and &d, there would be a mismatch, i.e. deduction failure.
+template <typename T>
+struct Identity {
+  using type = T;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_STL_UTIL_IDENTITY_H_
diff --git a/runtime/base/strlcpy.h b/runtime/base/strlcpy.h
new file mode 100644
index 0000000..de135ea
--- /dev/null
+++ b/runtime/base/strlcpy.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_STRLCPY_H_
+#define ART_RUNTIME_BASE_STRLCPY_H_
+
+#include <cstdio>
+#include <cstring>
+
+// Expose a simple implementation of strlcpy when we're not compiling against bionic. This is to
+// make static analyzers happy not using strcpy.
+//
+// Bionic exposes this function, but the host glibc does not. Remove this shim when we compile
+// against bionic on the host, also.
+
+#if !defined(__BIONIC__) && !defined(__APPLE__)
+
+static inline size_t strlcpy(char* dst, const char* src, size_t size) {
+  // Extra-lazy implementation: this is only a host shim, and we don't have to call this often.
+  return snprintf(dst, size, "%s", src);
+}
+
+#endif
+
+#endif  // ART_RUNTIME_BASE_STRLCPY_H_
diff --git a/runtime/base/variant_map.h b/runtime/base/variant_map.h
index 531cb37..fdb60c4 100644
--- a/runtime/base/variant_map.h
+++ b/runtime/base/variant_map.h
@@ -22,7 +22,7 @@
 #include <type_traits>
 #include <utility>
 
-#include "base/stl_util.h"
+#include "base/stl_util_identity.h"
 
 namespace art {
 
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index b71610a..c45bbe5 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -16,6 +16,7 @@
 
 #include "class_table-inl.h"
 
+#include "base/stl_util.h"
 #include "mirror/class-inl.h"
 #include "oat_file.h"
 
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 01c6641..f925994 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -728,13 +728,3 @@
 }
 
 }  // namespace art
-
-namespace std {
-
-template <typename T>
-std::ostream& operator<<(std::ostream& os, const std::vector<T>& rhs) {
-os << ::art::ToString(rhs);
-return os;
-}
-
-}  // namespace std
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 24dbd05..1274a36 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -258,12 +258,4 @@
 
 }  // namespace art
 
-namespace std {
-
-// TODO: isn't gtest supposed to be able to print STL types for itself?
-template <typename T>
-std::ostream& operator<<(std::ostream& os, const std::vector<T>& rhs);
-
-}  // namespace std
-
 #endif  // ART_RUNTIME_COMMON_RUNTIME_TEST_H_
diff --git a/runtime/dex2oat_environment_test.h b/runtime/dex2oat_environment_test.h
index 6765407..93daa45 100644
--- a/runtime/dex2oat_environment_test.h
+++ b/runtime/dex2oat_environment_test.h
@@ -23,6 +23,7 @@
 
 #include <gtest/gtest.h>
 
+#include "base/stl_util.h"
 #include "common_runtime_test.h"
 #include "compiler_callbacks.h"
 #include "exec_utils.h"
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 1301cc2..b267e5f 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -35,6 +35,7 @@
 #include "base/enums.h"
 #include "base/file_magic.h"
 #include "base/logging.h"
+#include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/unix_file/fd_file.h"
 #include "dex_file-inl.h"
diff --git a/runtime/dex_reference_collection.h b/runtime/dex_reference_collection.h
new file mode 100644
index 0000000..76355d6
--- /dev/null
+++ b/runtime/dex_reference_collection.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_DEX_REFERENCE_COLLECTION_H_
+#define ART_RUNTIME_DEX_REFERENCE_COLLECTION_H_
+
+#include "base/macros.h"
+
+#include <vector>
+#include <map>
+
+namespace art {
+
+class DexFile;
+
+// Collection of dex references that is more memory efficient than a vector of <dex, index> pairs.
+// Also allows quick lookups of all of the references for a single dex.
+template <class IndexType, template<typename Type> class Allocator>
+class DexReferenceCollection {
+ public:
+  using VectorAllocator = Allocator<IndexType>;
+  using IndexVector = std::vector<IndexType, VectorAllocator>;
+  using MapAllocator = Allocator<std::pair<const DexFile*, IndexVector>>;
+  using DexFileMap = std::map<
+      const DexFile*,
+      IndexVector,
+      std::less<const DexFile*>,
+      Allocator<std::pair<const DexFile* const, IndexVector>>>;
+
+  DexReferenceCollection(const MapAllocator& map_allocator = MapAllocator(),
+                         const VectorAllocator& vector_allocator = VectorAllocator())
+      : map_(map_allocator),
+        vector_allocator_(vector_allocator) {}
+
+  void AddReference(const DexFile* dex, IndexType index) {
+    GetOrInsertVector(dex)->push_back(index);
+  }
+
+  DexFileMap& GetMap() {
+    return map_;
+  }
+
+  size_t NumReferences() const {
+    size_t ret = 0;
+    for (auto&& pair : map_) {
+      ret += pair.second.size();
+    }
+    return ret;
+  }
+
+ private:
+  DexFileMap map_;
+  // Optimize for adding to same vector in succession.
+  const DexFile* current_dex_file_ = nullptr;
+  IndexVector* current_vector_ = nullptr;
+  VectorAllocator vector_allocator_;
+
+  ALWAYS_INLINE IndexVector* GetOrInsertVector(const DexFile* dex) {
+    if (UNLIKELY(current_dex_file_ != dex)) {
+      // There is an assumption that constructing an empty vector wont do any allocations. If this
+      // incorrect, this might leak for the arena case.
+      current_vector_ = &map_.emplace(dex, IndexVector(vector_allocator_)).first->second;
+      current_dex_file_ = dex;
+    }
+    return current_vector_;
+  }
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_DEX_REFERENCE_COLLECTION_H_
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
deleted file mode 100644
index 0d2f6d0..0000000
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "art_method.h"
-#include "base/callee_save_type.h"
-#include "base/enums.h"
-#include "callee_save_frame.h"
-#include "entrypoints/runtime_asm_entrypoints.h"
-#include "instrumentation.h"
-#include "mirror/object-inl.h"
-#include "runtime.h"
-#include "thread-current-inl.h"
-
-namespace art {
-
-extern "C" const void* artInstrumentationMethodEntryFromCode(ArtMethod* method,
-                                                             mirror::Object* this_object,
-                                                             Thread* self,
-                                                             uintptr_t lr)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  // Instrumentation changes the stack. Thus, when exiting, the stack cannot be verified, so skip
-  // that part.
-  ScopedQuickEntrypointChecks sqec(self, kIsDebugBuild, false);
-  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-  const void* result;
-  if (instrumentation->IsDeoptimized(method)) {
-    result = GetQuickToInterpreterBridge();
-  } else {
-    result = instrumentation->GetQuickCodeFor(method, kRuntimePointerSize);
-    DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickToInterpreterBridge(result));
-  }
-  bool interpreter_entry = (result == GetQuickToInterpreterBridge());
-  instrumentation->PushInstrumentationStackFrame(self, method->IsStatic() ? nullptr : this_object,
-                                                 method, lr, interpreter_entry);
-  CHECK(result != nullptr) << method->PrettyMethod();
-  return result;
-}
-
-extern "C" TwoWordReturn artInstrumentationMethodExitFromCode(Thread* self, ArtMethod** sp,
-                                                              uint64_t gpr_result,
-                                                              uint64_t fpr_result)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  // Instrumentation exit stub must not be entered with a pending exception.
-  CHECK(!self->IsExceptionPending()) << "Enter instrumentation exit stub with pending exception "
-                                     << self->GetException()->Dump();
-  // Compute address of return PC and sanity check that it currently holds 0.
-  size_t return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, CalleeSaveType::kSaveRefsOnly);
-  uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(sp) +
-                                                      return_pc_offset);
-  CHECK_EQ(*return_pc, 0U);
-
-  // Pop the frame filling in the return pc. The low half of the return value is 0 when
-  // deoptimization shouldn't be performed with the high-half having the return address. When
-  // deoptimization should be performed the low half is zero and the high-half the address of the
-  // deoptimization entry point.
-  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-  TwoWordReturn return_or_deoptimize_pc = instrumentation->PopInstrumentationStackFrame(
-      self, return_pc, gpr_result, fpr_result);
-  return return_or_deoptimize_pc;
-}
-
-}  // namespace art
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 6fcb711..b7cd39f 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -28,6 +28,7 @@
 #include "imt_conflict_table.h"
 #include "imtable-inl.h"
 #include "interpreter/interpreter.h"
+#include "instrumentation.h"
 #include "linear_alloc.h"
 #include "method_bss_mapping.h"
 #include "method_handles.h"
@@ -895,7 +896,6 @@
     soa_->Env()->DeleteLocalRef(pair.first);
   }
 }
-
 // Handler for invocation on proxy methods. On entry a frame will exist for the proxy object method
 // which is responsible for recording callee save registers. We explicitly place into jobjects the
 // incoming reference arguments (so they survive GC). We invoke the invocation handler, which is a
@@ -988,6 +988,77 @@
   }
 }
 
+extern "C" const void* artInstrumentationMethodEntryFromCode(ArtMethod* method,
+                                                             mirror::Object* this_object,
+                                                             Thread* self,
+                                                             ArtMethod** sp)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  const void* result;
+  // Instrumentation changes the stack. Thus, when exiting, the stack cannot be verified, so skip
+  // that part.
+  ScopedQuickEntrypointChecks sqec(self, kIsDebugBuild, false);
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (instrumentation->IsDeoptimized(method)) {
+    result = GetQuickToInterpreterBridge();
+  } else {
+    result = instrumentation->GetQuickCodeFor(method, kRuntimePointerSize);
+    DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickToInterpreterBridge(result));
+  }
+
+  bool interpreter_entry = (result == GetQuickToInterpreterBridge());
+  bool is_static = method->IsStatic();
+  uint32_t shorty_len;
+  const char* shorty =
+      method->GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetShorty(&shorty_len);
+
+  ScopedObjectAccessUnchecked soa(self);
+  RememberForGcArgumentVisitor visitor(sp, is_static, shorty, shorty_len, &soa);
+  visitor.VisitArguments();
+
+  instrumentation->PushInstrumentationStackFrame(self,
+                                                 is_static ? nullptr : this_object,
+                                                 method,
+                                                 QuickArgumentVisitor::GetCallingPc(sp),
+                                                 interpreter_entry);
+
+  visitor.FixupReferences();
+  if (UNLIKELY(self->IsExceptionPending())) {
+    return nullptr;
+  }
+  CHECK(result != nullptr) << method->PrettyMethod();
+  return result;
+}
+
+extern "C" TwoWordReturn artInstrumentationMethodExitFromCode(Thread* self,
+                                                              ArtMethod** sp,
+                                                              uint64_t* gpr_result,
+                                                              uint64_t* fpr_result)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK_EQ(reinterpret_cast<uintptr_t>(self), reinterpret_cast<uintptr_t>(Thread::Current()));
+  CHECK(gpr_result != nullptr);
+  CHECK(fpr_result != nullptr);
+  // Instrumentation exit stub must not be entered with a pending exception.
+  CHECK(!self->IsExceptionPending()) << "Enter instrumentation exit stub with pending exception "
+                                     << self->GetException()->Dump();
+  // Compute address of return PC and sanity check that it currently holds 0.
+  size_t return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, CalleeSaveType::kSaveRefsOnly);
+  uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(sp) +
+                                                      return_pc_offset);
+  CHECK_EQ(*return_pc, 0U);
+
+  // Pop the frame filling in the return pc. The low half of the return value is 0 when
+  // deoptimization shouldn't be performed with the high-half having the return address. When
+  // deoptimization should be performed the low half is zero and the high-half the address of the
+  // deoptimization entry point.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  TwoWordReturn return_or_deoptimize_pc = instrumentation->PopInstrumentationStackFrame(
+      self, return_pc, gpr_result, fpr_result);
+  if (self->IsExceptionPending()) {
+    return GetTwoWordFailureValue();
+  }
+  return return_or_deoptimize_pc;
+}
+
 // Lazily resolve a method for quick. Called by stub code.
 extern "C" const void* artQuickResolutionTrampoline(
     ArtMethod* called, mirror::Object* receiver, Thread* self, ArtMethod** sp)
diff --git a/runtime/exec_utils.cc b/runtime/exec_utils.cc
index 9efb1a3..db1baa7 100644
--- a/runtime/exec_utils.cc
+++ b/runtime/exec_utils.cc
@@ -28,7 +28,6 @@
 
 namespace art {
 
-using android::base::StringAppendF;
 using android::base::StringPrintf;
 
 int ExecAndReturnCode(std::vector<std::string>& arg_vector, std::string* error_msg) {
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 1af3b57..d944ce4 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -90,8 +90,6 @@
 
 namespace gc {
 
-using android::base::StringPrintf;
-
 static constexpr size_t kCollectorTransitionStressIterations = 0;
 static constexpr size_t kCollectorTransitionStressWait = 10 * 1000;  // Microseconds
 // Minimum amount of remaining bytes before a concurrent GC is triggered.
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 9b9c70f..8120cc4 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -1123,25 +1123,40 @@
 void Instrumentation::PushInstrumentationStackFrame(Thread* self, mirror::Object* this_object,
                                                     ArtMethod* method,
                                                     uintptr_t lr, bool interpreter_entry) {
-  // We have a callee-save frame meaning this value is guaranteed to never be 0.
-  size_t frame_id = StackVisitor::ComputeNumFrames(self, kInstrumentationStackWalk);
+  DCHECK(!self->IsExceptionPending());
   std::deque<instrumentation::InstrumentationStackFrame>* stack = self->GetInstrumentationStack();
   if (kVerboseInstrumentation) {
     LOG(INFO) << "Entering " << ArtMethod::PrettyMethod(method) << " from PC "
               << reinterpret_cast<void*>(lr);
   }
-  instrumentation::InstrumentationStackFrame instrumentation_frame(this_object, method, lr,
+
+  // We send the enter event before pushing the instrumentation frame to make cleanup easier. If the
+  // event causes an exception we can simply send the unwind event and return.
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Object> h_this(hs.NewHandle(this_object));
+  if (!interpreter_entry) {
+    MethodEnterEvent(self, h_this.Get(), method, 0);
+    if (self->IsExceptionPending()) {
+      MethodUnwindEvent(self, h_this.Get(), method, 0);
+      return;
+    }
+  }
+
+  // We have a callee-save frame meaning this value is guaranteed to never be 0.
+  DCHECK(!self->IsExceptionPending());
+  size_t frame_id = StackVisitor::ComputeNumFrames(self, kInstrumentationStackWalk);
+
+  instrumentation::InstrumentationStackFrame instrumentation_frame(h_this.Get(), method, lr,
                                                                    frame_id, interpreter_entry);
   stack->push_front(instrumentation_frame);
-
-  if (!interpreter_entry) {
-    MethodEnterEvent(self, this_object, method, 0);
-  }
 }
 
-TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, uintptr_t* return_pc,
-                                                            uint64_t gpr_result,
-                                                            uint64_t fpr_result) {
+TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self,
+                                                            uintptr_t* return_pc,
+                                                            uint64_t* gpr_result,
+                                                            uint64_t* fpr_result) {
+  DCHECK(gpr_result != nullptr);
+  DCHECK(fpr_result != nullptr);
   // Do the pop.
   std::deque<instrumentation::InstrumentationStackFrame>* stack = self->GetInstrumentationStack();
   CHECK_GT(stack->size(), 0U);
@@ -1157,13 +1172,20 @@
   uint32_t length;
   const PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   char return_shorty = method->GetInterfaceMethodIfProxy(pointer_size)->GetShorty(&length)[0];
+  bool is_ref = return_shorty == '[' || return_shorty == 'L';
+  StackHandleScope<1> hs(self);
+  MutableHandle<mirror::Object> res(hs.NewHandle<mirror::Object>(nullptr));
   JValue return_value;
   if (return_shorty == 'V') {
     return_value.SetJ(0);
   } else if (return_shorty == 'F' || return_shorty == 'D') {
-    return_value.SetJ(fpr_result);
+    return_value.SetJ(*fpr_result);
   } else {
-    return_value.SetJ(gpr_result);
+    return_value.SetJ(*gpr_result);
+  }
+  if (is_ref) {
+    // Take a handle to the return value so we won't lose it if we suspend.
+    res.Assign(return_value.GetL());
   }
   // TODO: improve the dex pc information here, requires knowledge of current PC as opposed to
   //       return_pc.
@@ -1180,6 +1202,10 @@
   bool deoptimize = (visitor.caller != nullptr) &&
                     (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller) ||
                     Dbg::IsForcedInterpreterNeededForUpcall(self, visitor.caller));
+  if (is_ref) {
+    // Restore the return value if it's a reference since it might have moved.
+    *reinterpret_cast<mirror::Object**>(gpr_result) = res.Get();
+  }
   if (deoptimize && Runtime::Current()->IsAsyncDeoptimizeable(*return_pc)) {
     if (kVerboseInstrumentation) {
       LOG(INFO) << "Deoptimizing "
@@ -1214,9 +1240,8 @@
   // Do the pop.
   std::deque<instrumentation::InstrumentationStackFrame>* stack = self->GetInstrumentationStack();
   CHECK_GT(stack->size(), 0U);
+  size_t idx = stack->size();
   InstrumentationStackFrame instrumentation_frame = stack->front();
-  // TODO: bring back CheckStackDepth(self, instrumentation_frame, 2);
-  stack->pop_front();
 
   ArtMethod* method = instrumentation_frame.method_;
   if (is_deoptimization) {
@@ -1234,6 +1259,10 @@
     uint32_t dex_pc = DexFile::kDexNoIndex;
     MethodUnwindEvent(self, instrumentation_frame.this_object_, method, dex_pc);
   }
+  // TODO: bring back CheckStackDepth(self, instrumentation_frame, 2);
+  CHECK_EQ(stack->size(), idx);
+  DCHECK(instrumentation_frame.method_ == stack->front().method_);
+  stack->pop_front();
   return instrumentation_frame.return_pc_;
 }
 
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 363985f..90b5def 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -432,9 +432,13 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Called when an instrumented method is exited. Removes the pushed instrumentation frame
-  // returning the intended link register. Generates method exit events.
+  // returning the intended link register. Generates method exit events. The gpr_result and
+  // fpr_result pointers are pointers to the locations where the integer/pointer and floating point
+  // result values of the function are stored. Both pointers must always be valid but the values
+  // held there will only be meaningful if interpreted as the appropriate type given the function
+  // being returned from.
   TwoWordReturn PopInstrumentationStackFrame(Thread* self, uintptr_t* return_pc,
-                                             uint64_t gpr_result, uint64_t fpr_result)
+                                             uint64_t* gpr_result, uint64_t* fpr_result)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!deoptimized_methods_lock_);
 
   // Pops an instrumentation frame from the current thread and generate an unwind event.
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 4bc0f2f..85cf73b 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -254,6 +254,13 @@
     if (UNLIKELY(instrumentation->HasMethodEntryListeners())) {
       instrumentation->MethodEnterEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                         method, 0);
+      if (UNLIKELY(self->IsExceptionPending())) {
+        instrumentation->MethodUnwindEvent(self,
+                                           shadow_frame.GetThisObject(code_item->ins_size_),
+                                           method,
+                                           0);
+        return JValue();
+      }
     }
 
     if (!stay_in_interpreter) {
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 32a2378..45788e7 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -26,13 +26,13 @@
 namespace art {
 namespace interpreter {
 
-#define HANDLE_PENDING_EXCEPTION()                                                              \
+#define HANDLE_PENDING_EXCEPTION_WITH_INSTRUMENTATION(instr)                                    \
   do {                                                                                          \
     DCHECK(self->IsExceptionPending());                                                         \
     self->AllowThreadSuspension();                                                              \
     uint32_t found_dex_pc = FindNextInstructionFollowingException(self, shadow_frame,           \
                                                                   inst->GetDexPc(insns),        \
-                                                                  instrumentation);             \
+                                                                  instr);                       \
     if (found_dex_pc == DexFile::kDexNoIndex) {                                                 \
       /* Structured locking is to be enforced for abnormal termination, too. */                 \
       DoMonitorCheckOnExit<do_assignability_check>(self, &shadow_frame);                        \
@@ -47,6 +47,8 @@
     }                                                                                           \
   } while (false)
 
+#define HANDLE_PENDING_EXCEPTION() HANDLE_PENDING_EXCEPTION_WITH_INSTRUMENTATION(instrumentation)
+
 #define POSSIBLY_HANDLE_PENDING_EXCEPTION(_is_exception_pending, _next_function)  \
   do {                                                                            \
     if (UNLIKELY(_is_exception_pending)) {                                        \
@@ -218,6 +220,10 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+          if (UNLIKELY(self->IsExceptionPending())) {
+            // Don't send another method exit event.
+            HANDLE_PENDING_EXCEPTION_WITH_INSTRUMENTATION(nullptr);
+          }
         }
         if (interpret_one_instruction) {
           /* Signal mterp to return to caller */
@@ -235,6 +241,10 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+          if (UNLIKELY(self->IsExceptionPending())) {
+            // Don't send another method exit event.
+            HANDLE_PENDING_EXCEPTION_WITH_INSTRUMENTATION(nullptr);
+          }
         }
         if (interpret_one_instruction) {
           /* Signal mterp to return to caller */
@@ -253,6 +263,10 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+          if (UNLIKELY(self->IsExceptionPending())) {
+            // Don't send another method exit event.
+            HANDLE_PENDING_EXCEPTION_WITH_INSTRUMENTATION(nullptr);
+          }
         }
         if (interpret_one_instruction) {
           /* Signal mterp to return to caller */
@@ -270,6 +284,10 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+          if (UNLIKELY(self->IsExceptionPending())) {
+            // Don't send another method exit event.
+            HANDLE_PENDING_EXCEPTION_WITH_INSTRUMENTATION(nullptr);
+          }
         }
         if (interpret_one_instruction) {
           /* Signal mterp to return to caller */
@@ -307,6 +325,10 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+          if (UNLIKELY(self->IsExceptionPending())) {
+            // Don't send another method exit event.
+            HANDLE_PENDING_EXCEPTION_WITH_INSTRUMENTATION(nullptr);
+          }
           // Re-load since it might have moved during the MethodExitEvent.
           result.SetL(shadow_frame.GetVRegReference(ref_idx));
         }
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 96249f9..4ab3d69 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -500,8 +500,8 @@
       }
       break;
     case MK_CONDITIONAL:
-      CHECK(false);  // should not be getting these
-      break;
+      LOG(FATAL) << "Unexpected MK_CONDITIONAL";  // should not be getting these
+      UNREACHABLE();
     case MK_THREAD_ONLY:
       if (!Dbg::MatchThread(pMod->threadOnly.threadId, basket.thread)) {
         return false;
diff --git a/runtime/jdwp/jdwp_expand_buf.cc b/runtime/jdwp/jdwp_expand_buf.cc
index 961dd36..f0b8c91 100644
--- a/runtime/jdwp/jdwp_expand_buf.cc
+++ b/runtime/jdwp/jdwp_expand_buf.cc
@@ -152,7 +152,9 @@
 
 static void SetUtf8String(uint8_t* buf, const char* str, size_t strLen) {
   Set4BE(buf, strLen);
-  memcpy(buf + sizeof(uint32_t), str, strLen);
+  if (str != nullptr) {
+    memcpy(buf + sizeof(uint32_t), str, strLen);
+  }
 }
 
 /*
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 4cd2125..2744c4f 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -29,6 +29,7 @@
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/bitmap-inl.h"
 #include "gc/scoped_gc_critical_section.h"
+#include "intern_table.h"
 #include "jit/jit.h"
 #include "jit/profiling_info.h"
 #include "linear_alloc.h"
diff --git a/runtime/jit/profile_compilation_info-inl.h b/runtime/jit/profile_compilation_info-inl.h
new file mode 100644
index 0000000..8a067a5
--- /dev/null
+++ b/runtime/jit/profile_compilation_info-inl.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JIT_PROFILE_COMPILATION_INFO_INL_H_
+#define ART_RUNTIME_JIT_PROFILE_COMPILATION_INFO_INL_H_
+
+#include "profile_compilation_info.h"
+
+namespace art {
+
+template <class Iterator>
+inline bool ProfileCompilationInfo::AddSampledMethodsForDex(bool startup,
+                                                            const DexFile* dex_file,
+                                                            Iterator index_begin,
+                                                            Iterator index_end) {
+  DexFileData* data = GetOrAddDexFileData(dex_file);
+  if (data == nullptr) {
+    return false;
+  }
+  for (auto it = index_begin; it != index_end; ++it) {
+    DCHECK_LT(*it, data->num_method_ids);
+    data->AddSampledMethod(startup, *it);
+  }
+  return true;
+}
+
+template <class Iterator>
+inline bool ProfileCompilationInfo::AddHotMethodsForDex(const DexFile* dex_file,
+                                                        Iterator index_begin,
+                                                        Iterator index_end) {
+  DexFileData* data = GetOrAddDexFileData(dex_file);
+  if (data == nullptr) {
+    return false;
+  }
+  for (auto it = index_begin; it != index_end; ++it) {
+    DCHECK_LT(*it, data->num_method_ids);
+    data->FindOrAddMethod(*it);
+  }
+  return true;
+}
+
+template <class Iterator>
+inline bool ProfileCompilationInfo::AddClassesForDex(const DexFile* dex_file,
+                                                     Iterator index_begin,
+                                                     Iterator index_end) {
+  DexFileData* data = GetOrAddDexFileData(dex_file);
+  if (data == nullptr) {
+    return false;
+  }
+  data->class_set.insert(index_begin, index_end);
+  return true;
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_JIT_PROFILE_COMPILATION_INFO_INL_H_
diff --git a/runtime/jit/profile_compilation_info.cc b/runtime/jit/profile_compilation_info.cc
index 3852a5b..ea27d3b 100644
--- a/runtime/jit/profile_compilation_info.cc
+++ b/runtime/jit/profile_compilation_info.cc
@@ -147,18 +147,6 @@
   return true;
 }
 
-bool ProfileCompilationInfo::AddSampledMethods(bool startup,
-                                               std::vector<MethodReference>& methods) {
-  for (const MethodReference& ref : methods) {
-    DexFileData* data = GetOrAddDexFileData(ref.dex_file);
-    if (data == nullptr) {
-      return false;
-    }
-    data->AddSampledMethod(startup, ref.dex_method_index);
-  }
-  return true;
-}
-
 bool ProfileCompilationInfo::AddMethodsAndClasses(
     const std::vector<ProfileMethodInfo>& methods,
     const std::set<DexCacheResolvedClasses>& resolved_classes) {
diff --git a/runtime/jit/profile_compilation_info.h b/runtime/jit/profile_compilation_info.h
index a9f2fb6..bd1b9d6 100644
--- a/runtime/jit/profile_compilation_info.h
+++ b/runtime/jit/profile_compilation_info.h
@@ -197,6 +197,10 @@
   bool AddMethodsAndClasses(const std::vector<ProfileMethodInfo>& methods,
                             const std::set<DexCacheResolvedClasses>& resolved_classes);
 
+  // Iterator is type for ids not class defs.
+  template <class Iterator>
+  bool AddClassesForDex(const DexFile* dex_file, Iterator index_begin, Iterator index_end);
+
   // Add a method index to the profile (without inline caches).
   bool AddMethodIndex(const std::string& dex_location,
                       uint32_t checksum,
@@ -207,13 +211,24 @@
   bool AddMethod(const ProfileMethodInfo& pmi);
 
   // Add methods that have samples but are are not necessarily hot. These are partitioned into two
-  // possibly interesecting sets startup and post startup.
-  bool AddSampledMethods(bool startup, std::vector<MethodReference>& methods);
+  // possibly intersecting sets startup and post startup.
   bool AddSampledMethod(bool startup,
                         const std::string& dex_location,
                         uint32_t checksum,
                         uint16_t method_idx,
                         uint32_t num_method_ids);
+  // Bulk add sampled methods for a single dex, fast since it only has one GetOrAddDexFileData call.
+  template <class Iterator>
+  bool AddSampledMethodsForDex(bool startup,
+                               const DexFile* dex_file,
+                               Iterator index_begin,
+                               Iterator index_end);
+
+  // Bulk add hot methods for a single dex, fast since it only has one GetOrAddDexFileData call.
+  template <class Iterator>
+  bool AddHotMethodsForDex(const DexFile* dex_file,
+                           Iterator index_begin,
+                           Iterator index_end);
 
   // Load profile information from the given file descriptor.
   // If the current profile is non-empty the load will fail.
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index 6128d82..edce9cd 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -25,12 +25,16 @@
 
 #include "art_method-inl.h"
 #include "base/enums.h"
+#include "base/scoped_arena_containers.h"
+#include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/time_utils.h"
 #include "compiler_filter.h"
+#include "dex_reference_collection.h"
 #include "gc/collector_type.h"
 #include "gc/gc_cause.h"
 #include "gc/scoped_gc_critical_section.h"
+#include "jit/profile_compilation_info-inl.h"
 #include "oat_file_manager.h"
 #include "scoped_thread_state_change-inl.h"
 
@@ -179,33 +183,45 @@
   }
 }
 
+using MethodReferenceCollection = DexReferenceCollection<uint16_t, ScopedArenaAllocatorAdapter>;
+using TypeReferenceCollection = DexReferenceCollection<dex::TypeIndex,
+                                                       ScopedArenaAllocatorAdapter>;
+
 // Get resolved methods that have a profile info or more than kStartupMethodSamples samples.
 // Excludes native methods and classes in the boot image.
-class GetMethodsVisitor : public ClassVisitor {
+class GetClassesAndMethodsVisitor : public ClassVisitor {
  public:
-  GetMethodsVisitor(std::vector<MethodReference>* hot_methods,
-                    std::vector<MethodReference>* sampled_methods,
-                    uint32_t hot_method_sample_threshold)
+  GetClassesAndMethodsVisitor(MethodReferenceCollection* hot_methods,
+                              MethodReferenceCollection* sampled_methods,
+                              TypeReferenceCollection* resolved_classes,
+                              uint32_t hot_method_sample_threshold)
     : hot_methods_(hot_methods),
       sampled_methods_(sampled_methods),
+      resolved_classes_(resolved_classes),
       hot_method_sample_threshold_(hot_method_sample_threshold) {}
 
   virtual bool operator()(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass)) {
+    if (klass->IsProxyClass() ||
+        klass->IsArrayClass() ||
+        !klass->IsResolved() ||
+        klass->IsErroneousResolved() ||
+        klass->GetClassLoader() == nullptr) {
       return true;
     }
+    DCHECK(klass->GetDexCache() != nullptr) << klass->PrettyClass();
+    resolved_classes_->AddReference(&klass->GetDexFile(), klass->GetDexTypeIndex());
     for (ArtMethod& method : klass->GetMethods(kRuntimePointerSize)) {
-      if (!method.IsNative() && !method.IsProxyMethod()) {
+      if (!method.IsNative()) {
+        DCHECK(!method.IsProxyMethod());
         const uint16_t counter = method.GetCounter();
-        MethodReference ref(method.GetDexFile(), method.GetDexMethodIndex());
         // Mark startup methods as hot if they have more than hot_method_sample_threshold_ samples.
         // This means they will get compiled by the compiler driver.
         if (method.GetProfilingInfo(kRuntimePointerSize) != nullptr ||
             (method.GetAccessFlags() & kAccPreviouslyWarm) != 0 ||
             counter >= hot_method_sample_threshold_) {
-          hot_methods_->push_back(ref);
+          hot_methods_->AddReference(method.GetDexFile(), method.GetDexMethodIndex());
         } else if (counter != 0) {
-          sampled_methods_->push_back(ref);
+          sampled_methods_->AddReference(method.GetDexFile(), method.GetDexMethodIndex());
         }
       } else {
         CHECK_EQ(method.GetCounter(), 0u);
@@ -215,85 +231,96 @@
   }
 
  private:
-  std::vector<MethodReference>* const hot_methods_;
-  std::vector<MethodReference>* const sampled_methods_;
+  MethodReferenceCollection* const hot_methods_;
+  MethodReferenceCollection* const sampled_methods_;
+  TypeReferenceCollection* const resolved_classes_;
   uint32_t hot_method_sample_threshold_;
 };
 
 void ProfileSaver::FetchAndCacheResolvedClassesAndMethods() {
   ScopedTrace trace(__PRETTY_FUNCTION__);
+  const uint64_t start_time = NanoTime();
 
   // Resolve any new registered locations.
   ResolveTrackedLocations();
 
   Thread* const self = Thread::Current();
-  std::vector<MethodReference> hot_methods;
-  std::vector<MethodReference> startup_methods;
-  std::set<DexCacheResolvedClasses> resolved_classes;
+  Runtime* const runtime = Runtime::Current();
+  ArenaStack stack(runtime->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
+  MethodReferenceCollection hot_methods(allocator.Adapter(), allocator.Adapter());
+  MethodReferenceCollection startup_methods(allocator.Adapter(), allocator.Adapter());
+  TypeReferenceCollection resolved_classes(allocator.Adapter(), allocator.Adapter());
+  const bool is_low_ram = Runtime::Current()->GetHeap()->IsLowMemoryMode();
+  const size_t hot_threshold = options_.GetHotStartupMethodSamples(is_low_ram);
   {
     ScopedObjectAccess soa(self);
     gc::ScopedGCCriticalSection sgcs(self,
                                      gc::kGcCauseProfileSaver,
                                      gc::kCollectorTypeCriticalSection);
-
-    ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-    resolved_classes = class_linker->GetResolvedClasses(/*ignore boot classes*/ true);
-
     {
       ScopedTrace trace2("Get hot methods");
-      GetMethodsVisitor visitor(&hot_methods,
-                                &startup_methods,
-                                options_.GetHotStartupMethodSamples());
-      class_linker->VisitClasses(&visitor);
-      VLOG(profiler) << "Profile saver recorded " << hot_methods.size() << " hot methods and "
-                     << startup_methods.size() << " startup methods with threshold "
-                     << options_.GetHotStartupMethodSamples();
+      GetClassesAndMethodsVisitor visitor(&hot_methods,
+                                          &startup_methods,
+                                          &resolved_classes,
+                                          hot_threshold);
+      runtime->GetClassLinker()->VisitClasses(&visitor);
     }
   }
+
   MutexLock mu(self, *Locks::profiler_lock_);
   uint64_t total_number_of_profile_entries_cached = 0;
 
   for (const auto& it : tracked_dex_base_locations_) {
     std::set<DexCacheResolvedClasses> resolved_classes_for_location;
     const std::string& filename = it.first;
-    const std::set<std::string>& locations = it.second;
-    std::vector<ProfileMethodInfo> profile_methods_for_location;
-    std::vector<MethodReference> startup_methods_for_locations;
-    for (const MethodReference& ref : hot_methods) {
-      if (locations.find(ref.dex_file->GetBaseLocation()) != locations.end()) {
-        profile_methods_for_location.emplace_back(ref.dex_file, ref.dex_method_index);
-        // Hot methods are also startup methods since this function is only invoked during startup.
-        startup_methods_for_locations.push_back(ref);
-      }
-    }
-    for (const MethodReference& ref : startup_methods) {
-      if (locations.find(ref.dex_file->GetBaseLocation()) != locations.end()) {
-        startup_methods_for_locations.push_back(ref);
-      }
-    }
-
-    for (const DexCacheResolvedClasses& classes : resolved_classes) {
-      if (locations.find(classes.GetBaseLocation()) != locations.end()) {
-        VLOG(profiler) << "Added " << classes.GetClasses().size() << " classes for location "
-                       << classes.GetBaseLocation() << " (" << classes.GetDexLocation() << ")";
-        resolved_classes_for_location.insert(classes);
-      } else {
-        VLOG(profiler) << "Location not found " << classes.GetBaseLocation()
-                       << " (" << classes.GetDexLocation() << ")";
-      }
-    }
     auto info_it = profile_cache_.Put(
         filename,
         new ProfileCompilationInfo(Runtime::Current()->GetArenaPool()));
-
     ProfileCompilationInfo* cached_info = info_it->second;
-    cached_info->AddMethodsAndClasses(profile_methods_for_location, resolved_classes_for_location);
-    cached_info->AddSampledMethods(/*startup*/ true, startup_methods_for_locations);
+
+    const std::set<std::string>& locations = it.second;
+    for (const auto& pair : hot_methods.GetMap()) {
+      const DexFile* const dex_file = pair.first;
+      if (locations.find(dex_file->GetBaseLocation()) != locations.end()) {
+        cached_info->AddSampledMethodsForDex(/*startup*/ true,
+                                             dex_file,
+                                             pair.second.begin(),
+                                             pair.second.end());
+        // Adding hot methods is a bit slow, TODO: optimize.
+        cached_info->AddHotMethodsForDex(dex_file, pair.second.begin(), pair.second.end());
+      }
+    }
+    for (const auto& pair : startup_methods.GetMap()) {
+      const DexFile* const dex_file = pair.first;
+      if (locations.find(dex_file->GetBaseLocation()) != locations.end()) {
+        cached_info->AddSampledMethodsForDex(/*startup*/ true,
+                                             dex_file,
+                                             pair.second.begin(),
+                                             pair.second.end());
+      }
+    }
+    for (const auto& pair : resolved_classes.GetMap()) {
+      const DexFile* const dex_file = pair.first;
+      if (locations.find(dex_file->GetBaseLocation()) != locations.end()) {
+        const TypeReferenceCollection::IndexVector& classes = pair.second;
+        VLOG(profiler) << "Added " << classes.size() << " classes for location "
+                       << dex_file->GetBaseLocation()
+                       << " (" << dex_file->GetLocation() << ")";
+        cached_info->AddClassesForDex(dex_file, classes.begin(), classes.end());
+      } else {
+        VLOG(profiler) << "Location not found " << dex_file->GetBaseLocation()
+                       << " (" << dex_file->GetLocation() << ")";
+      }
+    }
     total_number_of_profile_entries_cached += resolved_classes_for_location.size();
   }
   max_number_of_profile_entries_cached_ = std::max(
       max_number_of_profile_entries_cached_,
       total_number_of_profile_entries_cached);
+  VLOG(profiler) << "Profile saver recorded " << hot_methods.NumReferences() << " hot methods and "
+                 << startup_methods.NumReferences() << " startup methods with threshold "
+                 << hot_threshold << " in " << PrettyDuration(NanoTime() - start_time);
 }
 
 bool ProfileSaver::ProcessProfilingInfo(bool force_save, /*out*/uint16_t* number_of_new_methods) {
diff --git a/runtime/jit/profile_saver_options.h b/runtime/jit/profile_saver_options.h
index 455bc1a..44550f4 100644
--- a/runtime/jit/profile_saver_options.h
+++ b/runtime/jit/profile_saver_options.h
@@ -24,16 +24,18 @@
   static constexpr uint32_t kSaveResolvedClassesDelayMs = 5 * 1000;  // 5 seconds
   // Minimum number of JIT samples during launch to mark a method as hot in the profile.
   static constexpr uint32_t kHotStartupMethodSamples = 1;
+  static constexpr uint32_t kHotStartupMethodSamplesLowRam = 256;
   static constexpr uint32_t kMinMethodsToSave = 10;
   static constexpr uint32_t kMinClassesToSave = 10;
   static constexpr uint32_t kMinNotificationBeforeWake = 10;
   static constexpr uint32_t kMaxNotificationBeforeWake = 50;
+  static constexpr uint32_t kHotStartupMethodSamplesNotSet = std::numeric_limits<uint32_t>::max();
 
   ProfileSaverOptions() :
     enabled_(false),
     min_save_period_ms_(kMinSavePeriodMs),
     save_resolved_classes_delay_ms_(kSaveResolvedClassesDelayMs),
-    hot_startup_method_samples_(kHotStartupMethodSamples),
+    hot_startup_method_samples_(kHotStartupMethodSamplesNotSet),
     min_methods_to_save_(kMinMethodsToSave),
     min_classes_to_save_(kMinClassesToSave),
     min_notification_before_wake_(kMinNotificationBeforeWake),
@@ -73,8 +75,12 @@
   uint32_t GetSaveResolvedClassesDelayMs() const {
     return save_resolved_classes_delay_ms_;
   }
-  uint32_t GetHotStartupMethodSamples() const {
-    return hot_startup_method_samples_;
+  uint32_t GetHotStartupMethodSamples(bool is_low_ram) const {
+    uint32_t ret = hot_startup_method_samples_;
+    if (ret == kHotStartupMethodSamplesNotSet) {
+      ret = is_low_ram ? kHotStartupMethodSamplesLowRam : kHotStartupMethodSamples;
+    }
+    return ret;
   }
   uint32_t GetMinMethodsToSave() const {
     return min_methods_to_save_;
@@ -107,6 +113,8 @@
   bool enabled_;
   uint32_t min_save_period_ms_;
   uint32_t save_resolved_classes_delay_ms_;
+  // Do not access hot_startup_method_samples_ directly for reading since it may be set to the
+  // placeholder default.
   uint32_t hot_startup_method_samples_;
   uint32_t min_methods_to_save_;
   uint32_t min_classes_to_save_;
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 8b94404..c847942 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -193,7 +193,7 @@
     *error_msg = StringPrintf("Failed to build process map");
     return false;
   }
-  ScopedBacktraceMapIteratorLock(map.get());
+  ScopedBacktraceMapIteratorLock lock(map.get());
   for (BacktraceMap::const_iterator it = map->begin(); it != map->end(); ++it) {
     if ((begin >= it->start && begin < it->end)      // start of new within old
         || (end > it->start && end < it->end)        // end of new within old
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 7287a92..99565c6 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -188,6 +188,16 @@
   DISALLOW_IMPLICIT_CONSTRUCTORS(PrimitiveArray);
 };
 
+// Declare the different primitive arrays. Instantiations will be in array.cc.
+extern template class PrimitiveArray<uint8_t>;   // BooleanArray
+extern template class PrimitiveArray<int8_t>;    // ByteArray
+extern template class PrimitiveArray<uint16_t>;  // CharArray
+extern template class PrimitiveArray<double>;    // DoubleArray
+extern template class PrimitiveArray<float>;     // FloatArray
+extern template class PrimitiveArray<int32_t>;   // IntArray
+extern template class PrimitiveArray<int64_t>;   // LongArray
+extern template class PrimitiveArray<int16_t>;   // ShortArray
+
 // Either an IntArray or a LongArray.
 class PointerArray : public Array {
  public:
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 06ee3d3..e4b5320 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -1143,9 +1143,7 @@
 dex::TypeIndex Class::FindTypeIndexInOtherDexFile(const DexFile& dex_file) {
   std::string temp;
   const DexFile::TypeId* type_id = dex_file.FindTypeId(GetDescriptor(&temp));
-  return (type_id == nullptr)
-      ? dex::TypeIndex(DexFile::kDexNoIndex)
-      : dex_file.GetIndexForTypeId(*type_id);
+  return (type_id == nullptr) ? dex::TypeIndex() : dex_file.GetIndexForTypeId(*type_id);
 }
 
 template <PointerSize kPointerSize, bool kTransactionActive>
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index 57b20a1..7560639 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -26,7 +26,6 @@
 #include "common_throws.h"
 #include "gc/heap-inl.h"
 #include "globals.h"
-#include "intern_table.h"
 #include "runtime.h"
 #include "thread.h"
 #include "utf.h"
@@ -161,10 +160,6 @@
   const int32_t offset_;
 };
 
-inline ObjPtr<String> String::Intern() {
-  return Runtime::Current()->GetInternTable()->InternWeak(this);
-}
-
 inline uint16_t String::CharAt(int32_t index) {
   int32_t count = GetLength();
   if (UNLIKELY((index < 0) || (index >= count))) {
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 80745d2..82ff6dd 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -19,6 +19,7 @@
 #include "arch/memcmp16.h"
 #include "array.h"
 #include "base/array_ref.h"
+#include "base/stl_util.h"
 #include "class-inl.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc_root-inl.h"
@@ -420,5 +421,9 @@
   return PrettyDescriptor(ToModifiedUtf8().c_str());
 }
 
+ObjPtr<String> String::Intern() {
+  return Runtime::Current()->GetInternTable()->InternWeak(this);
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/oat.cc b/runtime/oat.cc
index 267a975..21e20e9 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -23,6 +23,7 @@
 
 #include "arch/instruction_set_features.h"
 #include "base/bit_utils.h"
+#include "base/strlcpy.h"
 
 namespace art {
 
@@ -520,9 +521,9 @@
     SafeMap<std::string, std::string>::const_iterator it = key_value_store->begin();
     SafeMap<std::string, std::string>::const_iterator end = key_value_store->end();
     for ( ; it != end; ++it) {
-      strcpy(data_ptr, it->first.c_str());
+      strlcpy(data_ptr, it->first.c_str(), it->first.length() + 1);
       data_ptr += it->first.length() + 1;
-      strcpy(data_ptr, it->second.c_str());
+      strlcpy(data_ptr, it->second.c_str(), it->second.length() + 1);
       data_ptr += it->second.length() + 1;
     }
   }
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index f0912cf..2e2e8c3 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -24,6 +24,7 @@
 #include "android-base/strings.h"
 
 #include "base/logging.h"
+#include "base/stl_util.h"
 #include "compiler_filter.h"
 #include "class_linker.h"
 #include "exec_utils.h"
diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
index 45773fd..3ec5b32 100644
--- a/runtime/openjdkjvmti/OpenjdkJvmTi.cc
+++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
@@ -1731,6 +1731,7 @@
 }
 
 extern "C" bool ArtPlugin_Deinitialize() {
+  gEventHandler.Shutdown();
   PhaseUtil::Unregister();
   ThreadUtil::Unregister();
   ClassUtil::Unregister();
diff --git a/runtime/openjdkjvmti/art_jvmti.h b/runtime/openjdkjvmti/art_jvmti.h
index 2a2aa4c..af85fb0 100644
--- a/runtime/openjdkjvmti/art_jvmti.h
+++ b/runtime/openjdkjvmti/art_jvmti.h
@@ -41,6 +41,7 @@
 #include "base/casts.h"
 #include "base/logging.h"
 #include "base/macros.h"
+#include "base/strlcpy.h"
 #include "events.h"
 #include "java_vm_ext.h"
 #include "jni_env_ext.h"
@@ -187,7 +188,7 @@
   size_t len = strlen(src) + 1;
   JvmtiUniquePtr<char[]> ret = AllocJvmtiUniquePtr<char[]>(env, len, error);
   if (ret != nullptr) {
-    strcpy(ret.get(), src);
+    strlcpy(ret.get(), src, len);
   }
   return ret;
 }
@@ -217,8 +218,8 @@
     .can_redefine_any_class                          = 0,
     .can_get_current_thread_cpu_time                 = 0,
     .can_get_thread_cpu_time                         = 0,
-    .can_generate_method_entry_events                = 0,
-    .can_generate_method_exit_events                 = 0,
+    .can_generate_method_entry_events                = 1,
+    .can_generate_method_exit_events                 = 1,
     .can_generate_all_class_hook_events              = 0,
     .can_generate_compiled_method_load_events        = 0,
     .can_generate_monitor_events                     = 0,
diff --git a/runtime/openjdkjvmti/events-inl.h b/runtime/openjdkjvmti/events-inl.h
index 57abf31..cb7e6a9 100644
--- a/runtime/openjdkjvmti/events-inl.h
+++ b/runtime/openjdkjvmti/events-inl.h
@@ -20,6 +20,7 @@
 #include <array>
 
 #include "events.h"
+#include "ScopedLocalRef.h"
 
 #include "art_jvmti.h"
 
@@ -135,6 +136,8 @@
       continue;
     }
     if (ShouldDispatch<kEvent>(env, thread)) {
+      ScopedLocalRef<jthrowable> thr(jnienv, jnienv->ExceptionOccurred());
+      jnienv->ExceptionClear();
       jint new_len = 0;
       unsigned char* new_data = nullptr;
       auto callback = impl::GetCallback<kEvent>(env);
@@ -148,6 +151,9 @@
                current_class_data,
                &new_len,
                &new_data);
+      if (thr.get() != nullptr && !jnienv->ExceptionCheck()) {
+        jnienv->Throw(thr.get());
+      }
       if (new_data != nullptr && new_data != current_class_data) {
         // Destroy the data the last transformer made. We skip this if the previous state was the
         // initial one since we don't know here which jvmtiEnv allocated it.
@@ -180,6 +186,25 @@
   }
 }
 
+// Events with JNIEnvs need to stash pending exceptions since they can cause new ones to be thrown.
+// In accordance with the JVMTI specification we allow exceptions originating from events to
+// overwrite the current exception, including exceptions originating from earlier events.
+// TODO It would be nice to add the overwritten exceptions to the suppressed exceptions list of the
+// newest exception.
+template <ArtJvmtiEvent kEvent, typename ...Args>
+inline void EventHandler::DispatchEvent(art::Thread* thread, JNIEnv* jnienv, Args... args) const {
+  for (ArtJvmTiEnv* env : envs) {
+    if (env != nullptr) {
+      ScopedLocalRef<jthrowable> thr(jnienv, jnienv->ExceptionOccurred());
+      jnienv->ExceptionClear();
+      DispatchEvent<kEvent, JNIEnv*, Args...>(env, thread, jnienv, args...);
+      if (thr.get() != nullptr && !jnienv->ExceptionCheck()) {
+        jnienv->Throw(thr.get());
+      }
+    }
+  }
+}
+
 template <ArtJvmtiEvent kEvent, typename ...Args>
 inline void EventHandler::DispatchEvent(ArtJvmTiEnv* env, art::Thread* thread, Args... args) const {
   using FnType = void(jvmtiEnv*, Args...);
diff --git a/runtime/openjdkjvmti/events.cc b/runtime/openjdkjvmti/events.cc
index 320c59c..90bc122 100644
--- a/runtime/openjdkjvmti/events.cc
+++ b/runtime/openjdkjvmti/events.cc
@@ -32,19 +32,24 @@
 #include "events-inl.h"
 
 #include "art_jvmti.h"
+#include "art_method-inl.h"
 #include "base/logging.h"
 #include "gc/allocation_listener.h"
 #include "gc/gc_pause_listener.h"
 #include "gc/heap.h"
+#include "gc/scoped_gc_critical_section.h"
 #include "handle_scope-inl.h"
 #include "instrumentation.h"
 #include "jni_env_ext-inl.h"
+#include "jni_internal.h"
 #include "mirror/class.h"
 #include "mirror/object-inl.h"
 #include "runtime.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-current-inl.h"
+#include "thread-inl.h"
+#include "thread_list.h"
+#include "ti_phase.h"
 
 namespace openjdkjvmti {
 
@@ -294,6 +299,222 @@
   }
 }
 
+template<typename Type>
+static Type AddLocalRef(art::JNIEnvExt* e, art::mirror::Object* obj)
+    REQUIRES_SHARED(art::Locks::mutator_lock_) {
+  return (obj == nullptr) ? nullptr : e->AddLocalReference<Type>(obj);
+}
+
+class JvmtiMethodTraceListener FINAL : public art::instrumentation::InstrumentationListener {
+ public:
+  explicit JvmtiMethodTraceListener(EventHandler* handler) : event_handler_(handler) {}
+
+  template<ArtJvmtiEvent kEvent, typename ...Args>
+  void RunEventCallback(art::Thread* self, art::JNIEnvExt* jnienv, Args... args)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    ScopedLocalRef<jthread> thread_jni(jnienv, AddLocalRef<jthread>(jnienv, self->GetPeer()));
+    // Just give the event a good sized JNI frame. 100 should be fine.
+    jnienv->PushFrame(100);
+    {
+      // Need to do trampoline! :(
+      art::ScopedThreadSuspension sts(self, art::ThreadState::kNative);
+      event_handler_->DispatchEvent<kEvent>(self,
+                                            static_cast<JNIEnv*>(jnienv),
+                                            thread_jni.get(),
+                                            args...);
+    }
+    jnienv->PopFrame();
+  }
+
+  // Call-back for when a method is entered.
+  void MethodEntered(art::Thread* self,
+                     art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                     art::ArtMethod* method,
+                     uint32_t dex_pc ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    if (!method->IsRuntimeMethod() &&
+        event_handler_->IsEventEnabledAnywhere(ArtJvmtiEvent::kMethodEntry)) {
+      art::JNIEnvExt* jnienv = self->GetJniEnv();
+      RunEventCallback<ArtJvmtiEvent::kMethodEntry>(self,
+                                                    jnienv,
+                                                    art::jni::EncodeArtMethod(method));
+    }
+  }
+
+  // Callback for when a method is exited with a reference return value.
+  void MethodExited(art::Thread* self,
+                    art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                    art::ArtMethod* method,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED,
+                    art::Handle<art::mirror::Object> return_value)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    if (!method->IsRuntimeMethod() &&
+        event_handler_->IsEventEnabledAnywhere(ArtJvmtiEvent::kMethodExit)) {
+      DCHECK_EQ(method->GetReturnTypePrimitive(), art::Primitive::kPrimNot)
+          << method->PrettyMethod();
+      DCHECK(!self->IsExceptionPending());
+      jvalue val;
+      art::JNIEnvExt* jnienv = self->GetJniEnv();
+      ScopedLocalRef<jobject> return_jobj(jnienv, AddLocalRef<jobject>(jnienv, return_value.Get()));
+      val.l = return_jobj.get();
+      RunEventCallback<ArtJvmtiEvent::kMethodExit>(
+          self,
+          jnienv,
+          art::jni::EncodeArtMethod(method),
+          /*was_popped_by_exception*/ static_cast<jboolean>(JNI_FALSE),
+          val);
+    }
+  }
+
+  // Call-back for when a method is exited.
+  void MethodExited(art::Thread* self,
+                    art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                    art::ArtMethod* method,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED,
+                    const art::JValue& return_value)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    if (!method->IsRuntimeMethod() &&
+        event_handler_->IsEventEnabledAnywhere(ArtJvmtiEvent::kMethodExit)) {
+      DCHECK_NE(method->GetReturnTypePrimitive(), art::Primitive::kPrimNot)
+          << method->PrettyMethod();
+      DCHECK(!self->IsExceptionPending());
+      jvalue val;
+      art::JNIEnvExt* jnienv = self->GetJniEnv();
+      // 64bit integer is the largest value in the union so we should be fine simply copying it into
+      // the union.
+      val.j = return_value.GetJ();
+      RunEventCallback<ArtJvmtiEvent::kMethodExit>(
+          self,
+          jnienv,
+          art::jni::EncodeArtMethod(method),
+          /*was_popped_by_exception*/ static_cast<jboolean>(JNI_FALSE),
+          val);
+    }
+  }
+
+  // Call-back for when a method is popped due to an exception throw. A method will either cause a
+  // MethodExited call-back or a MethodUnwind call-back when its activation is removed.
+  void MethodUnwind(art::Thread* self,
+                    art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                    art::ArtMethod* method,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    if (!method->IsRuntimeMethod() &&
+        event_handler_->IsEventEnabledAnywhere(ArtJvmtiEvent::kMethodExit)) {
+      jvalue val;
+      // Just set this to 0xffffffffffffffff so it's not uninitialized.
+      val.j = static_cast<jlong>(-1);
+      art::JNIEnvExt* jnienv = self->GetJniEnv();
+      art::StackHandleScope<1> hs(self);
+      art::Handle<art::mirror::Throwable> old_exception(hs.NewHandle(self->GetException()));
+      CHECK(!old_exception.IsNull());
+      self->ClearException();
+      RunEventCallback<ArtJvmtiEvent::kMethodExit>(
+          self,
+          jnienv,
+          art::jni::EncodeArtMethod(method),
+          /*was_popped_by_exception*/ static_cast<jboolean>(JNI_TRUE),
+          val);
+      // Match RI behavior of just throwing away original exception if a new one is thrown.
+      if (LIKELY(!self->IsExceptionPending())) {
+        self->SetException(old_exception.Get());
+      }
+    }
+  }
+
+  // Call-back for when the dex pc moves in a method. We don't currently have any events associated
+  // with this.
+  void DexPcMoved(art::Thread* self ATTRIBUTE_UNUSED,
+                  art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                  art::ArtMethod* method ATTRIBUTE_UNUSED,
+                  uint32_t new_dex_pc ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    return;
+  }
+
+  // Call-back for when we read from a field.
+  void FieldRead(art::Thread* self ATTRIBUTE_UNUSED,
+                 art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                 art::ArtMethod* method ATTRIBUTE_UNUSED,
+                 uint32_t dex_pc ATTRIBUTE_UNUSED,
+                 art::ArtField* field ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    return;
+  }
+
+  // Call-back for when we write into a field.
+  void FieldWritten(art::Thread* self ATTRIBUTE_UNUSED,
+                    art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                    art::ArtMethod* method ATTRIBUTE_UNUSED,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED,
+                    art::ArtField* field ATTRIBUTE_UNUSED,
+                    const art::JValue& field_value ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    return;
+  }
+
+  // Call-back when an exception is caught.
+  void ExceptionCaught(art::Thread* self ATTRIBUTE_UNUSED,
+                       art::Handle<art::mirror::Throwable> exception_object ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    return;
+  }
+
+  // Call-back for when we execute a branch.
+  void Branch(art::Thread* self ATTRIBUTE_UNUSED,
+              art::ArtMethod* method ATTRIBUTE_UNUSED,
+              uint32_t dex_pc ATTRIBUTE_UNUSED,
+              int32_t dex_pc_offset ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    return;
+  }
+
+  // Call-back for when we get an invokevirtual or an invokeinterface.
+  void InvokeVirtualOrInterface(art::Thread* self ATTRIBUTE_UNUSED,
+                                art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                                art::ArtMethod* caller ATTRIBUTE_UNUSED,
+                                uint32_t dex_pc ATTRIBUTE_UNUSED,
+                                art::ArtMethod* callee ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    return;
+  }
+
+ private:
+  EventHandler* const event_handler_;
+};
+
+static uint32_t GetInstrumentationEventsFor(ArtJvmtiEvent event) {
+  switch (event) {
+    case ArtJvmtiEvent::kMethodEntry:
+      return art::instrumentation::Instrumentation::kMethodEntered;
+    case ArtJvmtiEvent::kMethodExit:
+      return art::instrumentation::Instrumentation::kMethodExited |
+             art::instrumentation::Instrumentation::kMethodUnwind;
+    default:
+      LOG(FATAL) << "Unknown event ";
+      return 0;
+  }
+}
+
+static void SetupMethodTraceListener(JvmtiMethodTraceListener* listener,
+                                     ArtJvmtiEvent event,
+                                     bool enable) {
+  uint32_t new_events = GetInstrumentationEventsFor(event);
+  art::instrumentation::Instrumentation* instr = art::Runtime::Current()->GetInstrumentation();
+  art::gc::ScopedGCCriticalSection gcs(art::Thread::Current(),
+                                       art::gc::kGcCauseInstrumentation,
+                                       art::gc::kCollectorTypeInstrumentation);
+  art::ScopedSuspendAll ssa("jvmti method tracing installation");
+  if (enable) {
+    if (!instr->AreAllMethodsDeoptimized()) {
+      instr->EnableMethodTracing("jvmti-tracing", /*needs_interpreter*/true);
+    }
+    instr->AddListener(listener, new_events);
+  } else {
+    instr->RemoveListener(listener, new_events);
+  }
+}
+
 // Handle special work for the given event type, if necessary.
 void EventHandler::HandleEventType(ArtJvmtiEvent event, bool enable) {
   switch (event) {
@@ -306,6 +527,11 @@
       SetupGcPauseTracking(gc_pause_listener_.get(), event, enable);
       return;
 
+    case ArtJvmtiEvent::kMethodEntry:
+    case ArtJvmtiEvent::kMethodExit:
+      SetupMethodTraceListener(method_trace_listener_.get(), event, enable);
+      return;
+
     default:
       break;
   }
@@ -419,9 +645,21 @@
   return ERR(NONE);
 }
 
+void EventHandler::Shutdown() {
+  // Need to remove the method_trace_listener_ if it's there.
+  art::Thread* self = art::Thread::Current();
+  art::gc::ScopedGCCriticalSection gcs(self,
+                                       art::gc::kGcCauseInstrumentation,
+                                       art::gc::kCollectorTypeInstrumentation);
+  art::ScopedSuspendAll ssa("jvmti method tracing uninstallation");
+  // Just remove every possible event.
+  art::Runtime::Current()->GetInstrumentation()->RemoveListener(method_trace_listener_.get(), ~0);
+}
+
 EventHandler::EventHandler() {
   alloc_listener_.reset(new JvmtiAllocationListener(this));
   gc_pause_listener_.reset(new JvmtiGcPauseListener(this));
+  method_trace_listener_.reset(new JvmtiMethodTraceListener(this));
 }
 
 EventHandler::~EventHandler() {
diff --git a/runtime/openjdkjvmti/events.h b/runtime/openjdkjvmti/events.h
index b9e3cf0..5f37dcf 100644
--- a/runtime/openjdkjvmti/events.h
+++ b/runtime/openjdkjvmti/events.h
@@ -29,6 +29,7 @@
 struct ArtJvmTiEnv;
 class JvmtiAllocationListener;
 class JvmtiGcPauseListener;
+class JvmtiMethodTraceListener;
 
 // an enum for ArtEvents. This differs from the JVMTI events only in that we distinguish between
 // retransformation capable and incapable loading
@@ -137,6 +138,9 @@
   EventHandler();
   ~EventHandler();
 
+  // do cleanup for the event handler.
+  void Shutdown();
+
   // Register an env. It is assumed that this happens on env creation, that is, no events are
   // enabled, yet.
   void RegisterArtJvmTiEnv(ArtJvmTiEnv* env);
@@ -160,6 +164,12 @@
   template <ArtJvmtiEvent kEvent, typename ...Args>
   ALWAYS_INLINE
   inline void DispatchEvent(art::Thread* thread, Args... args) const;
+  // Dispatch event to all registered environments stashing exceptions as needed. This works since
+  // JNIEnv* is always the second argument if it is passed to an event. Needed since C++ does not
+  // allow partial template function specialization.
+  template <ArtJvmtiEvent kEvent, typename ...Args>
+  ALWAYS_INLINE
+  void DispatchEvent(art::Thread* thread, JNIEnv* jnienv, Args... args) const;
   // Dispatch event to the given environment, only.
   template <ArtJvmtiEvent kEvent, typename ...Args>
   ALWAYS_INLINE
@@ -211,6 +221,7 @@
 
   std::unique_ptr<JvmtiAllocationListener> alloc_listener_;
   std::unique_ptr<JvmtiGcPauseListener> gc_pause_listener_;
+  std::unique_ptr<JvmtiMethodTraceListener> method_trace_listener_;
 };
 
 }  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/ti_class.cc b/runtime/openjdkjvmti/ti_class.cc
index 0aa93df..ed54cd1 100644
--- a/runtime/openjdkjvmti/ti_class.cc
+++ b/runtime/openjdkjvmti/ti_class.cc
@@ -103,7 +103,8 @@
     return nullptr;
   }
   uint32_t checksum = reinterpret_cast<const art::DexFile::Header*>(map->Begin())->checksum_;
-  std::unique_ptr<const art::DexFile> dex_file(art::DexFile::Open(map->GetName(),
+  std::string map_name = map->GetName();
+  std::unique_ptr<const art::DexFile> dex_file(art::DexFile::Open(map_name,
                                                                   checksum,
                                                                   std::move(map),
                                                                   /*verify*/true,
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 743d6f4..c11e4bd 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -57,6 +57,7 @@
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "asm_support.h"
+#include "asm_support_check.h"
 #include "atomic.h"
 #include "base/arena_allocator.h"
 #include "base/dumpable.h"
diff --git a/runtime/ti/agent.cc b/runtime/ti/agent.cc
index 86f5282..82b9af3 100644
--- a/runtime/ti/agent.cc
+++ b/runtime/ti/agent.cc
@@ -18,6 +18,7 @@
 
 #include "android-base/stringprintf.h"
 
+#include "base/strlcpy.h"
 #include "java_vm_ext.h"
 #include "runtime.h"
 
@@ -57,7 +58,7 @@
   }
   // Need to let the function fiddle with the array.
   std::unique_ptr<char[]> copied_args(new char[args_.size() + 1]);
-  strcpy(copied_args.get(), args_.c_str());
+  strlcpy(copied_args.get(), args_.c_str(), args_.size() + 1);
   // TODO Need to do some checks that we are at a good spot etc.
   *call_res = callback(Runtime::Current()->GetJavaVM(),
                        copied_args.get(),
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 20a53b7..c4b0441 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -303,7 +303,7 @@
   if (NeedsEscaping(ch)) {
     StringAppendF(&result, "\\u%04x", ch);
   } else {
-    result += ch;
+    result += static_cast<std::string::value_type>(ch);
   }
   result += '\'';
   return result;
@@ -330,7 +330,7 @@
       if (NeedsEscaping(leading)) {
         StringAppendF(&result, "\\u%04x", leading);
       } else {
-        result += leading;
+        result += static_cast<std::string::value_type>(leading);
       }
 
       const uint32_t trailing = GetTrailingUtf16Char(ch);
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index cb208f4..46fdc54 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -24,7 +24,6 @@
 #include "base/arena_allocator.h"
 #include "base/macros.h"
 #include "base/scoped_arena_containers.h"
-#include "base/stl_util.h"
 #include "base/value_object.h"
 #include "dex_file.h"
 #include "dex_file_types.h"
diff --git a/test/596-app-images/app_images.cc b/test/596-app-images/app_images.cc
index 42211f7..fa9c902 100644
--- a/test/596-app-images/app_images.cc
+++ b/test/596-app-images/app_images.cc
@@ -63,6 +63,12 @@
   return JNI_FALSE;
 }
 
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_checkInitialized(JNIEnv*, jclass, jclass c) {
+  ScopedObjectAccess soa(Thread::Current());
+  ObjPtr<mirror::Class> klass_ptr = soa.Decode<mirror::Class>(c);
+  return klass_ptr->IsInitialized();
+}
+
 }  // namespace
 
 }  // namespace art
diff --git a/test/596-app-images/src/Main.java b/test/596-app-images/src/Main.java
index 75b31b8..8ee3c88 100644
--- a/test/596-app-images/src/Main.java
+++ b/test/596-app-images/src/Main.java
@@ -16,7 +16,11 @@
 
 class Main {
   static class Inner {
-    public static int abc = 0;
+    final public static int abc = 10;
+  }
+
+  static class Nested {
+
   }
 
   public static void main(String[] args) {
@@ -26,8 +30,44 @@
     } else if (!checkAppImageContains(Inner.class)) {
       System.out.println("App image does not contain Inner!");
     }
+
+    if (!checkInitialized(Inner.class))
+      System.out.println("Inner class is not initialized!");
+
+    if (!checkInitialized(Nested.class))
+      System.out.println("Nested class is not initialized!");
+
+    if (!checkInitialized(StaticFields.class))
+      System.out.println("StaticFields class is not initialized!");
+
+    if (!checkInitialized(StaticFieldsInitSub.class))
+      System.out.println("StaticFieldsInitSub class is not initialized!");
+
+    if (!checkInitialized(StaticFieldsInit.class))
+      System.out.println("StaticFieldsInit class is not initialized!");
+
+    if (checkInitialized(StaticInternString.class))
+      System.out.println("StaticInternString class is initialized!");
   }
 
   public static native boolean checkAppImageLoaded();
   public static native boolean checkAppImageContains(Class<?> klass);
+  public static native boolean checkInitialized(Class<?> klass);
 }
+
+class StaticFields{
+  public static int abc;
+}
+
+class StaticFieldsInitSub extends StaticFieldsInit {
+  final public static int def = 10;
+}
+
+class StaticFieldsInit{
+  final public static int abc = 10;
+}
+
+class StaticInternString {
+  final public static String intern = "java.abc.Action";
+}
+
diff --git a/test/651-checker-byte-simd-minmax/src/Main.java b/test/651-checker-byte-simd-minmax/src/Main.java
index fe45807..4711214 100644
--- a/test/651-checker-byte-simd-minmax/src/Main.java
+++ b/test/651-checker-byte-simd-minmax/src/Main.java
@@ -33,6 +33,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin(byte[] x, byte[] y, byte[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -57,6 +64,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMinUnsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMinUnsigned(byte[] x, byte[] y, byte[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -78,6 +92,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMax(byte[] x, byte[] y, byte[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -102,6 +123,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMaxUnsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMaxUnsigned(byte[] x, byte[] y, byte[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
diff --git a/test/651-checker-char-simd-minmax/src/Main.java b/test/651-checker-char-simd-minmax/src/Main.java
index e2998da..79795ee 100644
--- a/test/651-checker-char-simd-minmax/src/Main.java
+++ b/test/651-checker-char-simd-minmax/src/Main.java
@@ -33,6 +33,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMin(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin(char[] x, char[] y, char[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -54,6 +61,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMax(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMax(char[] x, char[] y, char[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
diff --git a/test/651-checker-double-simd-minmax/src/Main.java b/test/651-checker-double-simd-minmax/src/Main.java
index cf04f85..23a6d54 100644
--- a/test/651-checker-double-simd-minmax/src/Main.java
+++ b/test/651-checker-double-simd-minmax/src/Main.java
@@ -27,6 +27,7 @@
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   //
   // TODO x86: 0.0 vs -0.0?
+  // TODO MIPS64: min(x, NaN)?
   //
   /// CHECK-START-ARM64: void Main.doitMin(double[], double[], double[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
@@ -49,6 +50,7 @@
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   //
   // TODO x86: 0.0 vs -0.0?
+  // TODO MIPS64: max(x, NaN)?
   //
   /// CHECK-START-ARM64: void Main.doitMax(double[], double[], double[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
diff --git a/test/651-checker-float-simd-minmax/src/Main.java b/test/651-checker-float-simd-minmax/src/Main.java
index bd412e0..3959c82 100644
--- a/test/651-checker-float-simd-minmax/src/Main.java
+++ b/test/651-checker-float-simd-minmax/src/Main.java
@@ -27,6 +27,7 @@
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   //
   // TODO x86: 0.0 vs -0.0?
+  // TODO MIPS64: min(x, NaN)?
   //
   /// CHECK-START-ARM64: void Main.doitMin(float[], float[], float[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
@@ -49,6 +50,7 @@
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   //
   // TODO x86: 0.0 vs -0.0?
+  // TODO MIPS64: max(x, NaN)?
   //
   /// CHECK-START-ARM64: void Main.doitMax(float[], float[], float[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
diff --git a/test/651-checker-int-simd-minmax/src/Main.java b/test/651-checker-int-simd-minmax/src/Main.java
index 6cee7b5..2a97009 100644
--- a/test/651-checker-int-simd-minmax/src/Main.java
+++ b/test/651-checker-int-simd-minmax/src/Main.java
@@ -32,6 +32,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMin(int[], int[], int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin(int[] x, int[] y, int[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -52,6 +59,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMax(int[], int[], int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMax(int[] x, int[] y, int[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
diff --git a/test/651-checker-long-simd-minmax/src/Main.java b/test/651-checker-long-simd-minmax/src/Main.java
index 51cf67e..6289a1e 100644
--- a/test/651-checker-long-simd-minmax/src/Main.java
+++ b/test/651-checker-long-simd-minmax/src/Main.java
@@ -28,8 +28,16 @@
   //
   // Not directly supported for longs.
   //
-  /// CHECK-START: void Main.doitMin(long[], long[], long[]) loop_optimization (after)
+  /// CHECK-START-ARM64: void Main.doitMin(long[], long[], long[]) loop_optimization (after)
   /// CHECK-NOT: VecMin
+  //
+  /// CHECK-START-MIPS64: void Main.doitMin(long[], long[], long[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+
   private static void doitMin(long[] x, long[] y, long[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -46,8 +54,15 @@
   //
   // Not directly supported for longs.
   //
-  /// CHECK-START: void Main.doitMax(long[], long[], long[]) loop_optimization (after)
+  /// CHECK-START-ARM64: void Main.doitMax(long[], long[], long[]) loop_optimization (after)
   /// CHECK-NOT: VecMax
+  //
+  /// CHECK-START-MIPS64: void Main.doitMax(long[], long[], long[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMax(long[] x, long[] y, long[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
diff --git a/test/651-checker-short-simd-minmax/src/Main.java b/test/651-checker-short-simd-minmax/src/Main.java
index 09485a2..3bd1305 100644
--- a/test/651-checker-short-simd-minmax/src/Main.java
+++ b/test/651-checker-short-simd-minmax/src/Main.java
@@ -33,6 +33,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMin(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin(short[] x, short[] y, short[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -57,6 +64,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMinUnsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMinUnsigned(short[] x, short[] y, short[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -78,6 +92,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMax(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMax(short[] x, short[] y, short[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -102,6 +123,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMaxUnsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMaxUnsigned(short[] x, short[] y, short[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
diff --git a/test/906-iterate-heap/expected.txt b/test/906-iterate-heap/expected.txt
index b6af843..73b7129 100644
--- a/test/906-iterate-heap/expected.txt
+++ b/test/906-iterate-heap/expected.txt
@@ -18,14 +18,14 @@
 2
 1@0 (32, 2xD '0000000000000000000000000000f03f')
 2
+doTestPrimitiveFieldsClasses
 10000@0 (static, int, index=3) 0000000000000000
 10001
 10000@0 (static, int, index=11) 0000000000000000
 10001
-10000@0 (static, int, index=0) 0000000000000000
 10001
-10000@0 (static, int, index=1) 0000000000000000
 10001
+doTestPrimitiveFieldsIntegral
 10000@0 (instance, int, index=2) 0000000000000000
 10001@0 (instance, byte, index=4) 0000000000000001
 10002@0 (instance, char, index=5) 0000000000000061
@@ -33,6 +33,7 @@
 10004@0 (instance, long, index=7) 0000000000000004
 10005@0 (instance, short, index=9) 0000000000000002
 10006
+doTestPrimitiveFieldsFloat
 10000@0 (instance, int, index=3) 0000000000000000
 10001@0 (instance, byte, index=5) 0000000000000001
 10002@0 (instance, char, index=6) 0000000000000061
diff --git a/test/906-iterate-heap/iterate_heap.cc b/test/906-iterate-heap/iterate_heap.cc
index 6534b4c..02ac699 100644
--- a/test/906-iterate-heap/iterate_heap.cc
+++ b/test/906-iterate-heap/iterate_heap.cc
@@ -408,5 +408,15 @@
   return env->NewStringUTF(ffc.data.c_str());
 }
 
+extern "C" JNIEXPORT jboolean JNICALL Java_art_Test906_checkInitialized(
+    JNIEnv* env, jclass, jclass c) {
+  jint status;
+  jvmtiError error = jvmti_env->GetClassStatus(c, &status);
+  if (JvmtiErrorToException(env, jvmti_env, error)) {
+    return false;
+  }
+  return (status & JVMTI_CLASS_STATUS_INITIALIZED) != 0;
+}
+
 }  // namespace Test906IterateHeap
 }  // namespace art
diff --git a/test/906-iterate-heap/src/art/Test906.java b/test/906-iterate-heap/src/art/Test906.java
index fe18e38..65c2c8c 100644
--- a/test/906-iterate-heap/src/art/Test906.java
+++ b/test/906-iterate-heap/src/art/Test906.java
@@ -142,6 +142,7 @@
   }
 
   private static void doTestPrimitiveFieldsClasses() {
+    System.out.println("doTestPrimitiveFieldsClasses");
     setTag(IntObject.class, 10000);
     System.out.println(iterateThroughHeapPrimitiveFields(10000));
     System.out.println(getTag(IntObject.class));
@@ -152,18 +153,40 @@
     System.out.println(getTag(FloatObject.class));
     setTag(FloatObject.class, 0);
 
+    boolean correctHeapValue = false;
     setTag(Inf1.class, 10000);
-    System.out.println(iterateThroughHeapPrimitiveFields(10000));
+    String heapTrace = iterateThroughHeapPrimitiveFields(10000);
+
+    if (!checkInitialized(Inf1.class)) {
+      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=0) 0000000000000000");
+    } else {
+      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=0) 0000000000000001");
+    }
+
+    if (!correctHeapValue)
+      System.out.println("Heap Trace for Inf1 is not as expected:\n" + heapTrace);
+
     System.out.println(getTag(Inf1.class));
     setTag(Inf1.class, 0);
 
     setTag(Inf2.class, 10000);
-    System.out.println(iterateThroughHeapPrimitiveFields(10000));
+    heapTrace = iterateThroughHeapPrimitiveFields(10000);
+
+    if (!checkInitialized(Inf2.class)) {
+      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=1) 0000000000000000");
+    } else {
+      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=1) 0000000000000001");
+    }
+
+    if (!correctHeapValue)
+      System.out.println("Heap Trace for Inf2 is not as expected:\n" + heapTrace);
     System.out.println(getTag(Inf2.class));
+
     setTag(Inf2.class, 0);
   }
 
   private static void doTestPrimitiveFieldsIntegral() {
+    System.out.println("doTestPrimitiveFieldsIntegral");
     IntObject intObject = new IntObject();
     setTag(intObject, 10000);
     System.out.println(iterateThroughHeapPrimitiveFields(10000));
@@ -171,6 +194,7 @@
   }
 
   private static void doTestPrimitiveFieldsFloat() {
+    System.out.println("doTestPrimitiveFieldsFloat");
     FloatObject floatObject = new FloatObject();
     setTag(floatObject, 10000);
     System.out.println(iterateThroughHeapPrimitiveFields(10000));
@@ -265,6 +289,7 @@
     return Main.getTag(o);
   }
 
+  private static native boolean checkInitialized(Class<?> klass);
   private static native int iterateThroughHeapCount(int heapFilter,
       Class<?> klassFilter, int stopAfter);
   private static native int iterateThroughHeapData(int heapFilter,
diff --git a/test/913-heaps/expected.txt b/test/913-heaps/expected.txt
index b128d1c..80f8b9e 100644
--- a/test/913-heaps/expected.txt
+++ b/test/913-heaps/expected.txt
@@ -140,9 +140,7 @@
 10001
 10000@0 (static, int, index=11) 0000000000000000
 10001
-10000@0 (static, int, index=0) 0000000000000000
 10001
-10000@0 (static, int, index=1) 0000000000000000
 10001
 10000@0 (instance, int, index=2) 0000000000000000
 10001@0 (instance, byte, index=4) 0000000000000001
diff --git a/test/913-heaps/heaps.cc b/test/913-heaps/heaps.cc
index ec36ceb..bf3f7b6 100644
--- a/test/913-heaps/heaps.cc
+++ b/test/913-heaps/heaps.cc
@@ -1078,5 +1078,14 @@
   CHECK(gFoundExt);
 }
 
+extern "C" JNIEXPORT jboolean JNICALL Java_art_Test913_checkInitialized(JNIEnv* env, jclass, jclass c) {
+  jint status;
+  jvmtiError error = jvmti_env->GetClassStatus(c, &status);
+  if (JvmtiErrorToException(env, jvmti_env, error)) {
+    return false;
+  }
+  return (status & JVMTI_CLASS_STATUS_INITIALIZED) != 0;
+}
+
 }  // namespace Test913Heaps
 }  // namespace art
diff --git a/test/913-heaps/src/art/Test913.java b/test/913-heaps/src/art/Test913.java
index 97f48ee..b999001 100644
--- a/test/913-heaps/src/art/Test913.java
+++ b/test/913-heaps/src/art/Test913.java
@@ -195,13 +195,33 @@
     System.out.println(getTag(FloatObject.class));
     setTag(FloatObject.class, 0);
 
+    boolean correctHeapValue = false;
     setTag(Inf1.class, 10000);
-    System.out.println(followReferencesPrimitiveFields(Inf1.class));
+    String heapTrace = followReferencesPrimitiveFields(Inf1.class);
+
+    if (!checkInitialized(Inf1.class)) {
+      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=0) 0000000000000000");
+    } else {
+      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=0) 0000000000000001");
+    }
+
+    if (!correctHeapValue)
+      System.out.println("Heap Trace for Inf1 is not as expected:\n" + heapTrace);
+
     System.out.println(getTag(Inf1.class));
     setTag(Inf1.class, 0);
 
     setTag(Inf2.class, 10000);
-    System.out.println(followReferencesPrimitiveFields(Inf2.class));
+    heapTrace = followReferencesPrimitiveFields(Inf2.class);
+
+    if (!checkInitialized(Inf2.class)) {
+      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=1) 0000000000000000");
+    } else {
+      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=1) 0000000000000001");
+    }
+
+    if (!correctHeapValue)
+      System.out.println("Heap Trace for Inf2 is not as expected:\n" + heapTrace);
     System.out.println(getTag(Inf2.class));
     setTag(Inf2.class, 0);
   }
@@ -712,6 +732,7 @@
     return Main.getTag(o);
   }
 
+  private static native boolean checkInitialized(Class<?> klass);
   private static native void setupGcCallback();
   private static native void enableGcTracking(boolean enable);
   private static native int getGcStarts();
diff --git a/test/988-method-trace/expected.txt b/test/988-method-trace/expected.txt
new file mode 100644
index 0000000..8c67d66
--- /dev/null
+++ b/test/988-method-trace/expected.txt
@@ -0,0 +1,276 @@
+<= public static native void art.Trace.enableMethodTracing(java.lang.Class,java.lang.reflect.Method,java.lang.reflect.Method,java.lang.Thread) -> <null: null>
+=> art.Test988$IterOp()
+.=> public java.lang.Object()
+.<= public java.lang.Object() -> <null: null>
+<= art.Test988$IterOp() -> <null: null>
+=> public static void art.Test988.doFibTest(int,java.util.function.IntUnaryOperator)
+.=> public int art.Test988$IterOp.applyAsInt(int)
+..=> static int art.Test988.iter_fibonacci(int)
+..<= static int art.Test988.iter_fibonacci(int) -> <class java.lang.Integer: 832040>
+.<= public int art.Test988$IterOp.applyAsInt(int) -> <class java.lang.Integer: 832040>
+.=> public art.Test988$FibResult(java.lang.String,int,int)
+..=> public java.lang.Object()
+..<= public java.lang.Object() -> <null: null>
+.<= public art.Test988$FibResult(java.lang.String,int,int) -> <null: null>
+.=> public boolean java.util.ArrayList.add(java.lang.Object)
+..=> private void java.util.ArrayList.ensureCapacityInternal(int)
+...=> private void java.util.ArrayList.ensureExplicitCapacity(int)
+...<= private void java.util.ArrayList.ensureExplicitCapacity(int) -> <null: null>
+..<= private void java.util.ArrayList.ensureCapacityInternal(int) -> <null: null>
+fibonacci(30)=832040
+.<= public boolean java.util.ArrayList.add(java.lang.Object) -> <class java.lang.Boolean: true>
+<= public static void art.Test988.doFibTest(int,java.util.function.IntUnaryOperator) -> <null: null>
+=> art.Test988$RecurOp()
+.=> public java.lang.Object()
+.<= public java.lang.Object() -> <null: null>
+<= art.Test988$RecurOp() -> <null: null>
+=> public static void art.Test988.doFibTest(int,java.util.function.IntUnaryOperator)
+.=> public int art.Test988$RecurOp.applyAsInt(int)
+..=> static int art.Test988.fibonacci(int)
+...=> static int art.Test988.fibonacci(int)
+....=> static int art.Test988.fibonacci(int)
+.....=> static int art.Test988.fibonacci(int)
+......=> static int art.Test988.fibonacci(int)
+......<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 1>
+......=> static int art.Test988.fibonacci(int)
+......<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 0>
+.....<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 1>
+.....=> static int art.Test988.fibonacci(int)
+.....<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 1>
+....<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 2>
+....=> static int art.Test988.fibonacci(int)
+.....=> static int art.Test988.fibonacci(int)
+.....<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 1>
+.....=> static int art.Test988.fibonacci(int)
+.....<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 0>
+....<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 1>
+...<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 3>
+...=> static int art.Test988.fibonacci(int)
+....=> static int art.Test988.fibonacci(int)
+.....=> static int art.Test988.fibonacci(int)
+.....<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 1>
+.....=> static int art.Test988.fibonacci(int)
+.....<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 0>
+....<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 1>
+....=> static int art.Test988.fibonacci(int)
+....<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 1>
+...<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 2>
+..<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 5>
+.<= public int art.Test988$RecurOp.applyAsInt(int) -> <class java.lang.Integer: 5>
+.=> public art.Test988$FibResult(java.lang.String,int,int)
+..=> public java.lang.Object()
+..<= public java.lang.Object() -> <null: null>
+.<= public art.Test988$FibResult(java.lang.String,int,int) -> <null: null>
+.=> public boolean java.util.ArrayList.add(java.lang.Object)
+..=> private void java.util.ArrayList.ensureCapacityInternal(int)
+...=> private void java.util.ArrayList.ensureExplicitCapacity(int)
+...<= private void java.util.ArrayList.ensureExplicitCapacity(int) -> <null: null>
+..<= private void java.util.ArrayList.ensureCapacityInternal(int) -> <null: null>
+fibonacci(5)=5
+.<= public boolean java.util.ArrayList.add(java.lang.Object) -> <class java.lang.Boolean: true>
+<= public static void art.Test988.doFibTest(int,java.util.function.IntUnaryOperator) -> <null: null>
+=> art.Test988$IterOp()
+.=> public java.lang.Object()
+.<= public java.lang.Object() -> <null: null>
+<= art.Test988$IterOp() -> <null: null>
+=> public static void art.Test988.doFibTest(int,java.util.function.IntUnaryOperator)
+.=> public int art.Test988$IterOp.applyAsInt(int)
+..=> static int art.Test988.iter_fibonacci(int)
+...=> public java.lang.StringBuilder()
+....=> java.lang.AbstractStringBuilder(int)
+.....=> public java.lang.Object()
+.....<= public java.lang.Object() -> <null: null>
+....<= java.lang.AbstractStringBuilder(int) -> <null: null>
+...<= public java.lang.StringBuilder() -> <null: null>
+...=> public java.lang.StringBuilder java.lang.StringBuilder.append(java.lang.String)
+....=> public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(java.lang.String)
+.....=> public int java.lang.String.length()
+.....<= public int java.lang.String.length() -> <class java.lang.Integer: 14>
+.....=> private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int)
+.....<= private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int) -> <null: null>
+.....=> public void java.lang.String.getChars(int,int,char[],int)
+......=> public int java.lang.String.length()
+......<= public int java.lang.String.length() -> <class java.lang.Integer: 14>
+......=> native void java.lang.String.getCharsNoCheck(int,int,char[],int)
+......<= native void java.lang.String.getCharsNoCheck(int,int,char[],int) -> <null: null>
+.....<= public void java.lang.String.getChars(int,int,char[],int) -> <null: null>
+....<= public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(java.lang.String) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...<= public java.lang.StringBuilder java.lang.StringBuilder.append(java.lang.String) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...=> public java.lang.StringBuilder java.lang.StringBuilder.append(int)
+....=> public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(int)
+.....=> static int java.lang.Integer.stringSize(int)
+.....<= static int java.lang.Integer.stringSize(int) -> <class java.lang.Integer: 2>
+.....=> private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int)
+......=> private int java.lang.AbstractStringBuilder.newCapacity(int)
+......<= private int java.lang.AbstractStringBuilder.newCapacity(int) -> <class java.lang.Integer: 34>
+......=> public static char[] java.util.Arrays.copyOf(char[],int)
+.......=> public static int java.lang.Math.min(int,int)
+.......<= public static int java.lang.Math.min(int,int) -> <class java.lang.Integer: 16>
+.......=> public static void java.lang.System.arraycopy(char[],int,char[],int,int)
+.......<= public static void java.lang.System.arraycopy(char[],int,char[],int,int) -> <null: null>
+......<= public static char[] java.util.Arrays.copyOf(char[],int) -> <class [C: [B, a, d,  , a, r, g, u, m, e, n, t, :,  , -, 1, 9,  , <,  , 0, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>]>
+.....<= private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int) -> <null: null>
+.....=> static void java.lang.Integer.getChars(int,int,char[])
+.....<= static void java.lang.Integer.getChars(int,int,char[]) -> <null: null>
+....<= public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(int) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...<= public java.lang.StringBuilder java.lang.StringBuilder.append(int) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...=> public java.lang.StringBuilder java.lang.StringBuilder.append(java.lang.String)
+....=> public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(java.lang.String)
+.....=> public int java.lang.String.length()
+.....<= public int java.lang.String.length() -> <class java.lang.Integer: 4>
+.....=> private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int)
+.....<= private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int) -> <null: null>
+.....=> public void java.lang.String.getChars(int,int,char[],int)
+......=> public int java.lang.String.length()
+......<= public int java.lang.String.length() -> <class java.lang.Integer: 4>
+......=> native void java.lang.String.getCharsNoCheck(int,int,char[],int)
+......<= native void java.lang.String.getCharsNoCheck(int,int,char[],int) -> <null: null>
+.....<= public void java.lang.String.getChars(int,int,char[],int) -> <null: null>
+....<= public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(java.lang.String) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...<= public java.lang.StringBuilder java.lang.StringBuilder.append(java.lang.String) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...=> public java.lang.String java.lang.StringBuilder.toString()
+....=> static native java.lang.String java.lang.StringFactory.newStringFromChars(int,int,char[])
+....<= static native java.lang.String java.lang.StringFactory.newStringFromChars(int,int,char[]) -> <class java.lang.String: Bad argument: -19 < 0>
+...<= public java.lang.String java.lang.StringBuilder.toString() -> <class java.lang.String: Bad argument: -19 < 0>
+...=> public java.lang.Error(java.lang.String)
+....=> public java.lang.Throwable(java.lang.String)
+.....=> public java.lang.Object()
+.....<= public java.lang.Object() -> <null: null>
+.....=> public static final java.util.List java.util.Collections.emptyList()
+.....<= public static final java.util.List java.util.Collections.emptyList() -> <class java.util.Collections$EmptyList: []>
+.....=> public synchronized java.lang.Throwable java.lang.Throwable.fillInStackTrace()
+......=> private static native java.lang.Object java.lang.Throwable.nativeFillInStackTrace()
+......<= private static native java.lang.Object java.lang.Throwable.nativeFillInStackTrace() -> <class [Ljava.lang.Object;: <non-deterministic>>
+.....<= public synchronized java.lang.Throwable java.lang.Throwable.fillInStackTrace() -> <class java.lang.Error: java.lang.Error: Bad argument: -19 < 0
+	at art.Test988.iter_fibonacci(Test988.java:203)
+	at art.Test988$IterOp.applyAsInt(Test988.java:198)
+	at art.Test988.doFibTest(Test988.java:291)
+	at art.Test988.run(Test988.java:261)
+	at Main.main(Main.java:19)
+>
+....<= public java.lang.Throwable(java.lang.String) -> <null: null>
+...<= public java.lang.Error(java.lang.String) -> <null: null>
+..<= static int art.Test988.iter_fibonacci(int) EXCEPTION
+.<= public int art.Test988$IterOp.applyAsInt(int) EXCEPTION
+.=> public art.Test988$FibThrow(java.lang.String,int,java.lang.Throwable)
+..=> public java.lang.Object()
+..<= public java.lang.Object() -> <null: null>
+.<= public art.Test988$FibThrow(java.lang.String,int,java.lang.Throwable) -> <null: null>
+.=> public boolean java.util.ArrayList.add(java.lang.Object)
+..=> private void java.util.ArrayList.ensureCapacityInternal(int)
+...=> private void java.util.ArrayList.ensureExplicitCapacity(int)
+...<= private void java.util.ArrayList.ensureExplicitCapacity(int) -> <null: null>
+..<= private void java.util.ArrayList.ensureCapacityInternal(int) -> <null: null>
+fibonacci(-19) -> java.lang.Error: Bad argument: -19 < 0
+	at art.Test988.iter_fibonacci(Test988.java:203)
+	at art.Test988$IterOp.applyAsInt(Test988.java:198)
+	at art.Test988.doFibTest(Test988.java:291)
+	at art.Test988.run(Test988.java:261)
+	at Main.main(Main.java:19)
+
+.<= public boolean java.util.ArrayList.add(java.lang.Object) -> <class java.lang.Boolean: true>
+<= public static void art.Test988.doFibTest(int,java.util.function.IntUnaryOperator) -> <null: null>
+=> art.Test988$RecurOp()
+.=> public java.lang.Object()
+.<= public java.lang.Object() -> <null: null>
+<= art.Test988$RecurOp() -> <null: null>
+=> public static void art.Test988.doFibTest(int,java.util.function.IntUnaryOperator)
+.=> public int art.Test988$RecurOp.applyAsInt(int)
+..=> static int art.Test988.fibonacci(int)
+...=> public java.lang.StringBuilder()
+....=> java.lang.AbstractStringBuilder(int)
+.....=> public java.lang.Object()
+.....<= public java.lang.Object() -> <null: null>
+....<= java.lang.AbstractStringBuilder(int) -> <null: null>
+...<= public java.lang.StringBuilder() -> <null: null>
+...=> public java.lang.StringBuilder java.lang.StringBuilder.append(java.lang.String)
+....=> public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(java.lang.String)
+.....=> public int java.lang.String.length()
+.....<= public int java.lang.String.length() -> <class java.lang.Integer: 14>
+.....=> private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int)
+.....<= private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int) -> <null: null>
+.....=> public void java.lang.String.getChars(int,int,char[],int)
+......=> public int java.lang.String.length()
+......<= public int java.lang.String.length() -> <class java.lang.Integer: 14>
+......=> native void java.lang.String.getCharsNoCheck(int,int,char[],int)
+......<= native void java.lang.String.getCharsNoCheck(int,int,char[],int) -> <null: null>
+.....<= public void java.lang.String.getChars(int,int,char[],int) -> <null: null>
+....<= public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(java.lang.String) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...<= public java.lang.StringBuilder java.lang.StringBuilder.append(java.lang.String) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...=> public java.lang.StringBuilder java.lang.StringBuilder.append(int)
+....=> public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(int)
+.....=> static int java.lang.Integer.stringSize(int)
+.....<= static int java.lang.Integer.stringSize(int) -> <class java.lang.Integer: 2>
+.....=> private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int)
+......=> private int java.lang.AbstractStringBuilder.newCapacity(int)
+......<= private int java.lang.AbstractStringBuilder.newCapacity(int) -> <class java.lang.Integer: 34>
+......=> public static char[] java.util.Arrays.copyOf(char[],int)
+.......=> public static int java.lang.Math.min(int,int)
+.......<= public static int java.lang.Math.min(int,int) -> <class java.lang.Integer: 16>
+.......=> public static void java.lang.System.arraycopy(char[],int,char[],int,int)
+.......<= public static void java.lang.System.arraycopy(char[],int,char[],int,int) -> <null: null>
+......<= public static char[] java.util.Arrays.copyOf(char[],int) -> <class [C: [B, a, d,  , a, r, g, u, m, e, n, t, :,  , -, 1, 9,  , <,  , 0, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>]>
+.....<= private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int) -> <null: null>
+.....=> static void java.lang.Integer.getChars(int,int,char[])
+.....<= static void java.lang.Integer.getChars(int,int,char[]) -> <null: null>
+....<= public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(int) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...<= public java.lang.StringBuilder java.lang.StringBuilder.append(int) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...=> public java.lang.StringBuilder java.lang.StringBuilder.append(java.lang.String)
+....=> public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(java.lang.String)
+.....=> public int java.lang.String.length()
+.....<= public int java.lang.String.length() -> <class java.lang.Integer: 4>
+.....=> private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int)
+.....<= private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int) -> <null: null>
+.....=> public void java.lang.String.getChars(int,int,char[],int)
+......=> public int java.lang.String.length()
+......<= public int java.lang.String.length() -> <class java.lang.Integer: 4>
+......=> native void java.lang.String.getCharsNoCheck(int,int,char[],int)
+......<= native void java.lang.String.getCharsNoCheck(int,int,char[],int) -> <null: null>
+.....<= public void java.lang.String.getChars(int,int,char[],int) -> <null: null>
+....<= public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(java.lang.String) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...<= public java.lang.StringBuilder java.lang.StringBuilder.append(java.lang.String) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...=> public java.lang.String java.lang.StringBuilder.toString()
+....=> static native java.lang.String java.lang.StringFactory.newStringFromChars(int,int,char[])
+....<= static native java.lang.String java.lang.StringFactory.newStringFromChars(int,int,char[]) -> <class java.lang.String: Bad argument: -19 < 0>
+...<= public java.lang.String java.lang.StringBuilder.toString() -> <class java.lang.String: Bad argument: -19 < 0>
+...=> public java.lang.Error(java.lang.String)
+....=> public java.lang.Throwable(java.lang.String)
+.....=> public java.lang.Object()
+.....<= public java.lang.Object() -> <null: null>
+.....=> public static final java.util.List java.util.Collections.emptyList()
+.....<= public static final java.util.List java.util.Collections.emptyList() -> <class java.util.Collections$EmptyList: []>
+.....=> public synchronized java.lang.Throwable java.lang.Throwable.fillInStackTrace()
+......=> private static native java.lang.Object java.lang.Throwable.nativeFillInStackTrace()
+......<= private static native java.lang.Object java.lang.Throwable.nativeFillInStackTrace() -> <class [Ljava.lang.Object;: <non-deterministic>>
+.....<= public synchronized java.lang.Throwable java.lang.Throwable.fillInStackTrace() -> <class java.lang.Error: java.lang.Error: Bad argument: -19 < 0
+	at art.Test988.fibonacci(Test988.java:225)
+	at art.Test988$RecurOp.applyAsInt(Test988.java:220)
+	at art.Test988.doFibTest(Test988.java:291)
+	at art.Test988.run(Test988.java:262)
+	at Main.main(Main.java:19)
+>
+....<= public java.lang.Throwable(java.lang.String) -> <null: null>
+...<= public java.lang.Error(java.lang.String) -> <null: null>
+..<= static int art.Test988.fibonacci(int) EXCEPTION
+.<= public int art.Test988$RecurOp.applyAsInt(int) EXCEPTION
+.=> public art.Test988$FibThrow(java.lang.String,int,java.lang.Throwable)
+..=> public java.lang.Object()
+..<= public java.lang.Object() -> <null: null>
+.<= public art.Test988$FibThrow(java.lang.String,int,java.lang.Throwable) -> <null: null>
+.=> public boolean java.util.ArrayList.add(java.lang.Object)
+..=> private void java.util.ArrayList.ensureCapacityInternal(int)
+...=> private void java.util.ArrayList.ensureExplicitCapacity(int)
+...<= private void java.util.ArrayList.ensureExplicitCapacity(int) -> <null: null>
+..<= private void java.util.ArrayList.ensureCapacityInternal(int) -> <null: null>
+fibonacci(-19) -> java.lang.Error: Bad argument: -19 < 0
+	at art.Test988.fibonacci(Test988.java:225)
+	at art.Test988$RecurOp.applyAsInt(Test988.java:220)
+	at art.Test988.doFibTest(Test988.java:291)
+	at art.Test988.run(Test988.java:262)
+	at Main.main(Main.java:19)
+
+.<= public boolean java.util.ArrayList.add(java.lang.Object) -> <class java.lang.Boolean: true>
+<= public static void art.Test988.doFibTest(int,java.util.function.IntUnaryOperator) -> <null: null>
+=> public static native java.lang.Thread java.lang.Thread.currentThread()
+<= public static native java.lang.Thread java.lang.Thread.currentThread() -> <class java.lang.Thread: <non-deterministic>>
+=> public static native void art.Trace.disableMethodTracing(java.lang.Thread)
diff --git a/test/988-method-trace/info.txt b/test/988-method-trace/info.txt
new file mode 100644
index 0000000..f0a200d
--- /dev/null
+++ b/test/988-method-trace/info.txt
@@ -0,0 +1,15 @@
+Tests method tracing in JVMTI
+
+This test is sensitive to the internal implementations of:
+ * java.lang.Error
+ * java.lang.Integer
+ * java.lang.Math
+ * java.lang.String
+ * java.lang.System
+ * java.util.ArrayList
+ * java.util.Arrays
+ * java.util.StringBuilder
+ * all super-classes and super-interfaces of the above types.
+
+Changes to the internal implementation of these classes might (or might not)
+change the output of this test.
diff --git a/test/988-method-trace/run b/test/988-method-trace/run
new file mode 100755
index 0000000..51875a7
--- /dev/null
+++ b/test/988-method-trace/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Ask for stack traces to be dumped to a file rather than to stdout.
+./default-run "$@" --jvmti
diff --git a/test/988-method-trace/src/Main.java b/test/988-method-trace/src/Main.java
new file mode 100644
index 0000000..9dd1142
--- /dev/null
+++ b/test/988-method-trace/src/Main.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    art.Test988.run();
+  }
+}
diff --git a/test/988-method-trace/src/art/Test988.java b/test/988-method-trace/src/art/Test988.java
new file mode 100644
index 0000000..6ac7b11
--- /dev/null
+++ b/test/988-method-trace/src/art/Test988.java
@@ -0,0 +1,297 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.util.Arrays;
+import java.lang.reflect.Method;
+import java.util.List;
+import java.util.Set;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.function.IntUnaryOperator;
+import java.util.function.Function;
+
+public class Test988 {
+
+    // Methods with non-deterministic output that should not be printed.
+    static Set<Method> NON_DETERMINISTIC_OUTPUT_METHODS = new HashSet<>();
+
+    static {
+      try {
+        NON_DETERMINISTIC_OUTPUT_METHODS.add(
+            Throwable.class.getDeclaredMethod("nativeFillInStackTrace"));
+      } catch (Exception e) {}
+      try {
+        NON_DETERMINISTIC_OUTPUT_METHODS.add(Thread.class.getDeclaredMethod("currentThread"));
+      } catch (Exception e) {}
+    }
+
+    static interface Printable {
+        public void Print();
+    }
+
+    static final class MethodEntry implements Printable {
+        private Object m;
+        private int cnt;
+        public MethodEntry(Object m, int cnt) {
+            this.m = m;
+            this.cnt = cnt;
+        }
+        @Override
+        public void Print() {
+            System.out.println(whitespace(cnt) + "=> " + m);
+        }
+    }
+
+    private static String genericToString(Object val) {
+      if (val == null) {
+        return "null";
+      } else if (val.getClass().isArray()) {
+        return arrayToString(val);
+      } else if (val instanceof Throwable) {
+        StringWriter w = new StringWriter();
+        ((Throwable) val).printStackTrace(new PrintWriter(w));
+        return w.toString();
+      } else {
+        return val.toString();
+      }
+    }
+
+    private static String charArrayToString(char[] src) {
+      String[] res = new String[src.length];
+      for (int i = 0; i < src.length; i++) {
+        if (Character.isISOControl(src[i])) {
+          res[i] = Character.getName(src[i]);
+        } else {
+          res[i] = Character.toString(src[i]);
+        }
+      }
+      return Arrays.toString(res);
+    }
+
+    private static String arrayToString(Object val) {
+      Class<?> klass = val.getClass();
+      if ((new Object[0]).getClass().isAssignableFrom(klass)) {
+        return Arrays.toString(
+            Arrays.stream((Object[])val).map(Test988::genericToString).toArray());
+      } else if ((new byte[0]).getClass().isAssignableFrom(klass)) {
+        return Arrays.toString((byte[])val);
+      } else if ((new char[0]).getClass().isAssignableFrom(klass)) {
+        return charArrayToString((char[])val);
+      } else if ((new short[0]).getClass().isAssignableFrom(klass)) {
+        return Arrays.toString((short[])val);
+      } else if ((new int[0]).getClass().isAssignableFrom(klass)) {
+        return Arrays.toString((int[])val);
+      } else if ((new long[0]).getClass().isAssignableFrom(klass)) {
+        return Arrays.toString((long[])val);
+      } else if ((new float[0]).getClass().isAssignableFrom(klass)) {
+        return Arrays.toString((float[])val);
+      } else if ((new double[0]).getClass().isAssignableFrom(klass)) {
+        return Arrays.toString((double[])val);
+      } else {
+        throw new Error("Unknown type " + klass);
+      }
+    }
+
+    static final class MethodReturn implements Printable {
+        private Object m;
+        private Object val;
+        private int cnt;
+        public MethodReturn(Object m, Object val, int cnt) {
+            this.m = m;
+            this.val = val;
+            this.cnt = cnt;
+        }
+        @Override
+        public void Print() {
+            String print;
+            if (NON_DETERMINISTIC_OUTPUT_METHODS.contains(m)) {
+                print = "<non-deterministic>";
+            } else {
+                print = genericToString(val);
+            }
+            Class<?> klass = null;
+            if (val != null) {
+              klass = val.getClass();
+            }
+            System.out.println(
+                whitespace(cnt) + "<= " + m + " -> <" + klass + ": " + print + ">");
+        }
+    }
+
+    static final class MethodThrownThrough implements Printable {
+        private Object m;
+        private int cnt;
+        public MethodThrownThrough(Object m, int cnt) {
+            this.m = m;
+            this.cnt = cnt;
+        }
+        @Override
+        public void Print() {
+            System.out.println(whitespace(cnt) + "<= " + m + " EXCEPTION");
+        }
+    }
+
+    private static String whitespace(int n) {
+      String out = "";
+      while (n > 0) {
+        n--;
+        out += ".";
+      }
+      return out;
+    }
+
+    static final class FibThrow implements Printable {
+        private String format;
+        private int arg;
+        private Throwable res;
+        public FibThrow(String format, int arg, Throwable res) {
+            this.format = format;
+            this.arg = arg;
+            this.res = res;
+        }
+
+        @Override
+        public void Print() {
+            System.out.printf(format, arg, genericToString(res));
+        }
+    }
+
+    static final class FibResult implements Printable {
+        private String format;
+        private int arg;
+        private int res;
+        public FibResult(String format, int arg, int res) {
+            this.format = format;
+            this.arg = arg;
+            this.res = res;
+        }
+
+        @Override
+        public void Print() {
+            System.out.printf(format, arg, res);
+        }
+    }
+
+    private static List<Printable> results = new ArrayList<>();
+    private static int cnt = 1;
+
+    // Iterative version
+    static final class IterOp implements IntUnaryOperator {
+      public int applyAsInt(int x) {
+        return iter_fibonacci(x);
+      }
+    }
+    static int iter_fibonacci(int n) {
+        if (n < 0) {
+            throw new Error("Bad argument: " + n + " < 0");
+        } else if (n == 0) {
+            return 0;
+        }
+        int x = 1;
+        int y = 1;
+        for (int i = 3; i <= n; i++) {
+            int z = x + y;
+            x = y;
+            y = z;
+        }
+        return y;
+    }
+
+    // Recursive version
+    static final class RecurOp implements IntUnaryOperator {
+      public int applyAsInt(int x) {
+        return fibonacci(x);
+      }
+    }
+    static int fibonacci(int n) {
+        if (n < 0) {
+            throw new Error("Bad argument: " + n + " < 0");
+        } else if ((n == 0) || (n == 1)) {
+            return n;
+        } else {
+            return fibonacci(n - 1) + (fibonacci(n - 2));
+        }
+    }
+
+    public static void notifyMethodEntry(Object m) {
+        // Called by native code when a method is entered. This method is ignored by the native
+        // entry and exit hooks.
+        results.add(new MethodEntry(m, cnt));
+        cnt++;
+    }
+
+    public static void notifyMethodExit(Object m, boolean exception, Object result) {
+        cnt--;
+        if (exception) {
+            results.add(new MethodThrownThrough(m, cnt));
+        } else {
+            results.add(new MethodReturn(m, result, cnt));
+        }
+    }
+
+    public static void run() throws Exception {
+        // call this here so it is linked. It doesn't actually do anything here.
+        loadAllClasses();
+        Trace.disableMethodTracing(Thread.currentThread());
+        Trace.enableMethodTracing(
+            Test988.class,
+            Test988.class.getDeclaredMethod("notifyMethodEntry", Object.class),
+            Test988.class.getDeclaredMethod(
+                "notifyMethodExit", Object.class, Boolean.TYPE, Object.class),
+            Thread.currentThread());
+        doFibTest(30, new IterOp());
+        doFibTest(5, new RecurOp());
+        doFibTest(-19, new IterOp());
+        doFibTest(-19, new RecurOp());
+        // Turn off method tracing so we don't have to deal with print internals.
+        Trace.disableMethodTracing(Thread.currentThread());
+        printResults();
+    }
+
+    // This ensures that all classes we touch are loaded before we start recording traces. This
+    // eliminates a major source of divergence between the RI and ART.
+    public static void loadAllClasses() {
+      MethodThrownThrough.class.toString();
+      MethodEntry.class.toString();
+      MethodReturn.class.toString();
+      FibResult.class.toString();
+      FibThrow.class.toString();
+      Printable.class.toString();
+      ArrayList.class.toString();
+      RecurOp.class.toString();
+      IterOp.class.toString();
+      StringBuilder.class.toString();
+    }
+
+    public static void printResults() {
+        for (Printable p : results) {
+            p.Print();
+        }
+    }
+
+    public static void doFibTest(int x, IntUnaryOperator op) {
+      try {
+        int y = op.applyAsInt(x);
+        results.add(new FibResult("fibonacci(%d)=%d\n", x, y));
+      } catch (Throwable t) {
+        results.add(new FibThrow("fibonacci(%d) -> %s\n", x, t));
+      }
+    }
+}
diff --git a/test/988-method-trace/src/art/Trace.java b/test/988-method-trace/src/art/Trace.java
new file mode 100644
index 0000000..3370996
--- /dev/null
+++ b/test/988-method-trace/src/art/Trace.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+import java.lang.reflect.Method;
+
+public class Trace {
+  public static native void enableMethodTracing(
+      Class<?> methodClass, Method entryMethod, Method exitMethod, Thread thr);
+  public static native void disableMethodTracing(Thread thr);
+}
diff --git a/test/989-method-trace-throw/expected.txt b/test/989-method-trace-throw/expected.txt
new file mode 100644
index 0000000..0911bc3
--- /dev/null
+++ b/test/989-method-trace-throw/expected.txt
@@ -0,0 +1,188 @@
+Normal: Entering public static void art.Test989.doNothing()
+Normal: Leaving public static void art.Test989.doNothing() returned null
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$doNothingClass].
+Normal: Entering public static native void art.Test989.doNothingNative()
+Normal: Leaving public static native void art.Test989.doNothingNative() returned null
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$doNothingNativeClass].
+Normal: Entering public static void art.Test989.throwA()
+Normal: Leaving public static void art.Test989.throwA() returned <exception>
+Received expected error for test[class art.Test989$NormalTracer, class art.Test989$throwAClass] - art.Test989$ErrorA: Throwing Error A
+Normal: Entering public static native void art.Test989.throwANative()
+Normal: Leaving public static native void art.Test989.throwANative() returned <exception>
+Received expected error for test[class art.Test989$NormalTracer, class art.Test989$throwANativeClass] - art.Test989$ErrorA: Throwing Error A
+Normal: Entering public static java.lang.Object art.Test989.returnValue()
+Normal: Leaving public static java.lang.Object art.Test989.returnValue() returned TestObject(0)
+returnValue returned: TestObject(0)
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$returnValueClass].
+Normal: Entering public static native java.lang.Object art.Test989.returnValueNative()
+Normal: Leaving public static native java.lang.Object art.Test989.returnValueNative() returned TestObject(1)
+returnValueNative returned: TestObject(1)
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$returnValueNativeClass].
+Normal: Entering public static void art.Test989.acceptValue(java.lang.Object)
+Recieved TestObject(2)
+Normal: Leaving public static void art.Test989.acceptValue(java.lang.Object) returned null
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$acceptValueClass].
+Normal: Entering public static native void art.Test989.acceptValueNative(java.lang.Object)
+Recieved TestObject(3)
+Normal: Leaving public static native void art.Test989.acceptValueNative(java.lang.Object) returned null
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$acceptValueNativeClass].
+Normal: Entering public static void art.Test989.tryCatchExit()
+Normal: Leaving public static void art.Test989.tryCatchExit() returned null
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$tryCatchExitClass].
+Normal: Entering public static float art.Test989.returnFloat()
+Normal: Leaving public static float art.Test989.returnFloat() returned 1.618
+returnFloat returned: 1.618
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$returnFloatClass].
+Normal: Entering public static native float art.Test989.returnFloatNative()
+Normal: Leaving public static native float art.Test989.returnFloatNative() returned 1.618
+returnFloatNative returned: 1.618
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$returnFloatNativeClass].
+Normal: Entering public static double art.Test989.returnDouble()
+Normal: Leaving public static double art.Test989.returnDouble() returned 3.14159628
+returnDouble returned: 3.14159628
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$returnDoubleClass].
+Normal: Entering public static native double art.Test989.returnDoubleNative()
+Normal: Leaving public static native double art.Test989.returnDoubleNative() returned 3.14159628
+returnDoubleNative returned: 3.14159628
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$returnDoubleNativeClass].
+ThrowEnter: Entering public static void art.Test989.doNothing()
+ThrowEnter: Leaving public static void art.Test989.doNothing() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$doNothingClass] - art.Test989$ErrorB: Throwing error while entering public static void art.Test989.doNothing()
+ThrowEnter: Entering public static native void art.Test989.doNothingNative()
+ThrowEnter: Leaving public static native void art.Test989.doNothingNative() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$doNothingNativeClass] - art.Test989$ErrorB: Throwing error while entering public static native void art.Test989.doNothingNative()
+ThrowEnter: Entering public static void art.Test989.throwA()
+ThrowEnter: Leaving public static void art.Test989.throwA() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$throwAClass] - art.Test989$ErrorB: Throwing error while entering public static void art.Test989.throwA()
+ThrowEnter: Entering public static native void art.Test989.throwANative()
+ThrowEnter: Leaving public static native void art.Test989.throwANative() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$throwANativeClass] - art.Test989$ErrorB: Throwing error while entering public static native void art.Test989.throwANative()
+ThrowEnter: Entering public static java.lang.Object art.Test989.returnValue()
+ThrowEnter: Leaving public static java.lang.Object art.Test989.returnValue() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$returnValueClass] - art.Test989$ErrorB: Throwing error while entering public static java.lang.Object art.Test989.returnValue()
+ThrowEnter: Entering public static native java.lang.Object art.Test989.returnValueNative()
+ThrowEnter: Leaving public static native java.lang.Object art.Test989.returnValueNative() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$returnValueNativeClass] - art.Test989$ErrorB: Throwing error while entering public static native java.lang.Object art.Test989.returnValueNative()
+ThrowEnter: Entering public static void art.Test989.acceptValue(java.lang.Object)
+ThrowEnter: Leaving public static void art.Test989.acceptValue(java.lang.Object) returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$acceptValueClass] - art.Test989$ErrorB: Throwing error while entering public static void art.Test989.acceptValue(java.lang.Object)
+ThrowEnter: Entering public static native void art.Test989.acceptValueNative(java.lang.Object)
+ThrowEnter: Leaving public static native void art.Test989.acceptValueNative(java.lang.Object) returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$acceptValueNativeClass] - art.Test989$ErrorB: Throwing error while entering public static native void art.Test989.acceptValueNative(java.lang.Object)
+ThrowEnter: Entering public static void art.Test989.tryCatchExit()
+ThrowEnter: Leaving public static void art.Test989.tryCatchExit() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$tryCatchExitClass] - art.Test989$ErrorB: Throwing error while entering public static void art.Test989.tryCatchExit()
+ThrowEnter: Entering public static float art.Test989.returnFloat()
+ThrowEnter: Leaving public static float art.Test989.returnFloat() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$returnFloatClass] - art.Test989$ErrorB: Throwing error while entering public static float art.Test989.returnFloat()
+ThrowEnter: Entering public static native float art.Test989.returnFloatNative()
+ThrowEnter: Leaving public static native float art.Test989.returnFloatNative() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$returnFloatNativeClass] - art.Test989$ErrorB: Throwing error while entering public static native float art.Test989.returnFloatNative()
+ThrowEnter: Entering public static double art.Test989.returnDouble()
+ThrowEnter: Leaving public static double art.Test989.returnDouble() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$returnDoubleClass] - art.Test989$ErrorB: Throwing error while entering public static double art.Test989.returnDouble()
+ThrowEnter: Entering public static native double art.Test989.returnDoubleNative()
+ThrowEnter: Leaving public static native double art.Test989.returnDoubleNative() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$returnDoubleNativeClass] - art.Test989$ErrorB: Throwing error while entering public static native double art.Test989.returnDoubleNative()
+ThrowExit: Entering public static void art.Test989.doNothing()
+ThrowExit: Leaving public static void art.Test989.doNothing() returned null
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$doNothingClass] - art.Test989$ErrorB: Throwing error while exit public static void art.Test989.doNothing() returned null
+ThrowExit: Entering public static native void art.Test989.doNothingNative()
+ThrowExit: Leaving public static native void art.Test989.doNothingNative() returned null
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$doNothingNativeClass] - art.Test989$ErrorB: Throwing error while exit public static native void art.Test989.doNothingNative() returned null
+ThrowExit: Entering public static void art.Test989.throwA()
+ThrowExit: Leaving public static void art.Test989.throwA() returned <exception>
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$throwAClass] - art.Test989$ErrorB: Throwing error while exit public static void art.Test989.throwA() returned <exception>
+ThrowExit: Entering public static native void art.Test989.throwANative()
+ThrowExit: Leaving public static native void art.Test989.throwANative() returned <exception>
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$throwANativeClass] - art.Test989$ErrorB: Throwing error while exit public static native void art.Test989.throwANative() returned <exception>
+ThrowExit: Entering public static java.lang.Object art.Test989.returnValue()
+ThrowExit: Leaving public static java.lang.Object art.Test989.returnValue() returned TestObject(7)
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$returnValueClass] - art.Test989$ErrorB: Throwing error while exit public static java.lang.Object art.Test989.returnValue() returned TestObject(7)
+ThrowExit: Entering public static native java.lang.Object art.Test989.returnValueNative()
+ThrowExit: Leaving public static native java.lang.Object art.Test989.returnValueNative() returned TestObject(8)
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$returnValueNativeClass] - art.Test989$ErrorB: Throwing error while exit public static native java.lang.Object art.Test989.returnValueNative() returned TestObject(8)
+ThrowExit: Entering public static void art.Test989.acceptValue(java.lang.Object)
+Recieved TestObject(9)
+ThrowExit: Leaving public static void art.Test989.acceptValue(java.lang.Object) returned null
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$acceptValueClass] - art.Test989$ErrorB: Throwing error while exit public static void art.Test989.acceptValue(java.lang.Object) returned null
+ThrowExit: Entering public static native void art.Test989.acceptValueNative(java.lang.Object)
+Recieved TestObject(10)
+ThrowExit: Leaving public static native void art.Test989.acceptValueNative(java.lang.Object) returned null
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$acceptValueNativeClass] - art.Test989$ErrorB: Throwing error while exit public static native void art.Test989.acceptValueNative(java.lang.Object) returned null
+ThrowExit: Entering public static void art.Test989.tryCatchExit()
+ThrowExit: Leaving public static void art.Test989.tryCatchExit() returned null
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$tryCatchExitClass] - art.Test989$ErrorB: Throwing error while exit public static void art.Test989.tryCatchExit() returned null
+ThrowExit: Entering public static float art.Test989.returnFloat()
+ThrowExit: Leaving public static float art.Test989.returnFloat() returned 1.618
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$returnFloatClass] - art.Test989$ErrorB: Throwing error while exit public static float art.Test989.returnFloat() returned 1.618
+ThrowExit: Entering public static native float art.Test989.returnFloatNative()
+ThrowExit: Leaving public static native float art.Test989.returnFloatNative() returned 1.618
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$returnFloatNativeClass] - art.Test989$ErrorB: Throwing error while exit public static native float art.Test989.returnFloatNative() returned 1.618
+ThrowExit: Entering public static double art.Test989.returnDouble()
+ThrowExit: Leaving public static double art.Test989.returnDouble() returned 3.14159628
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$returnDoubleClass] - art.Test989$ErrorB: Throwing error while exit public static double art.Test989.returnDouble() returned 3.14159628
+ThrowExit: Entering public static native double art.Test989.returnDoubleNative()
+ThrowExit: Leaving public static native double art.Test989.returnDoubleNative() returned 3.14159628
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$returnDoubleNativeClass] - art.Test989$ErrorB: Throwing error while exit public static native double art.Test989.returnDoubleNative() returned 3.14159628
+ThrowBoth: Entering public static void art.Test989.doNothing()
+ThrowBoth: Leaving public static void art.Test989.doNothing() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$doNothingClass] - art.Test989$ErrorC: Throwing error while exit public static void art.Test989.doNothing() returned <exception>
+ThrowBoth: Entering public static native void art.Test989.doNothingNative()
+ThrowBoth: Leaving public static native void art.Test989.doNothingNative() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$doNothingNativeClass] - art.Test989$ErrorC: Throwing error while exit public static native void art.Test989.doNothingNative() returned <exception>
+ThrowBoth: Entering public static void art.Test989.throwA()
+ThrowBoth: Leaving public static void art.Test989.throwA() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$throwAClass] - art.Test989$ErrorC: Throwing error while exit public static void art.Test989.throwA() returned <exception>
+ThrowBoth: Entering public static native void art.Test989.throwANative()
+ThrowBoth: Leaving public static native void art.Test989.throwANative() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$throwANativeClass] - art.Test989$ErrorC: Throwing error while exit public static native void art.Test989.throwANative() returned <exception>
+ThrowBoth: Entering public static java.lang.Object art.Test989.returnValue()
+ThrowBoth: Leaving public static java.lang.Object art.Test989.returnValue() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$returnValueClass] - art.Test989$ErrorC: Throwing error while exit public static java.lang.Object art.Test989.returnValue() returned <exception>
+ThrowBoth: Entering public static native java.lang.Object art.Test989.returnValueNative()
+ThrowBoth: Leaving public static native java.lang.Object art.Test989.returnValueNative() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$returnValueNativeClass] - art.Test989$ErrorC: Throwing error while exit public static native java.lang.Object art.Test989.returnValueNative() returned <exception>
+ThrowBoth: Entering public static void art.Test989.acceptValue(java.lang.Object)
+ThrowBoth: Leaving public static void art.Test989.acceptValue(java.lang.Object) returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$acceptValueClass] - art.Test989$ErrorC: Throwing error while exit public static void art.Test989.acceptValue(java.lang.Object) returned <exception>
+ThrowBoth: Entering public static native void art.Test989.acceptValueNative(java.lang.Object)
+ThrowBoth: Leaving public static native void art.Test989.acceptValueNative(java.lang.Object) returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$acceptValueNativeClass] - art.Test989$ErrorC: Throwing error while exit public static native void art.Test989.acceptValueNative(java.lang.Object) returned <exception>
+ThrowBoth: Entering public static void art.Test989.tryCatchExit()
+ThrowBoth: Leaving public static void art.Test989.tryCatchExit() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$tryCatchExitClass] - art.Test989$ErrorC: Throwing error while exit public static void art.Test989.tryCatchExit() returned <exception>
+ThrowBoth: Entering public static float art.Test989.returnFloat()
+ThrowBoth: Leaving public static float art.Test989.returnFloat() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$returnFloatClass] - art.Test989$ErrorC: Throwing error while exit public static float art.Test989.returnFloat() returned <exception>
+ThrowBoth: Entering public static native float art.Test989.returnFloatNative()
+ThrowBoth: Leaving public static native float art.Test989.returnFloatNative() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$returnFloatNativeClass] - art.Test989$ErrorC: Throwing error while exit public static native float art.Test989.returnFloatNative() returned <exception>
+ThrowBoth: Entering public static double art.Test989.returnDouble()
+ThrowBoth: Leaving public static double art.Test989.returnDouble() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$returnDoubleClass] - art.Test989$ErrorC: Throwing error while exit public static double art.Test989.returnDouble() returned <exception>
+ThrowBoth: Entering public static native double art.Test989.returnDoubleNative()
+ThrowBoth: Leaving public static native double art.Test989.returnDoubleNative() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$returnDoubleNativeClass] - art.Test989$ErrorC: Throwing error while exit public static native double art.Test989.returnDoubleNative() returned <exception>
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$doNothingClass].
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$doNothingNativeClass].
+Received expected error for test[class art.Test989$ForceGCTracer, class art.Test989$throwAClass] - art.Test989$ErrorA: Throwing Error A
+Received expected error for test[class art.Test989$ForceGCTracer, class art.Test989$throwANativeClass] - art.Test989$ErrorA: Throwing Error A
+returnValue returned: TestObject(14)
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$returnValueClass].
+returnValueNative returned: TestObject(15)
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$returnValueNativeClass].
+Recieved TestObject(16)
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$acceptValueClass].
+Recieved TestObject(17)
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$acceptValueNativeClass].
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$tryCatchExitClass].
+returnFloat returned: 1.618
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$returnFloatClass].
+returnFloatNative returned: 1.618
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$returnFloatNativeClass].
+returnDouble returned: 3.14159628
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$returnDoubleClass].
+returnDoubleNative returned: 3.14159628
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$returnDoubleNativeClass].
+Finished!
diff --git a/test/989-method-trace-throw/info.txt b/test/989-method-trace-throw/info.txt
new file mode 100644
index 0000000..f0a200d
--- /dev/null
+++ b/test/989-method-trace-throw/info.txt
@@ -0,0 +1,15 @@
+Tests method tracing in JVMTI
+
+This test is sensitive to the internal implementations of:
+ * java.lang.Error
+ * java.lang.Integer
+ * java.lang.Math
+ * java.lang.String
+ * java.lang.System
+ * java.util.ArrayList
+ * java.util.Arrays
+ * java.util.StringBuilder
+ * all super-classes and super-interfaces of the above types.
+
+Changes to the internal implementation of these classes might (or might not)
+change the output of this test.
diff --git a/test/989-method-trace-throw/method_trace.cc b/test/989-method-trace-throw/method_trace.cc
new file mode 100644
index 0000000..554784e
--- /dev/null
+++ b/test/989-method-trace-throw/method_trace.cc
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <inttypes.h>
+#include <memory>
+#include <stdio.h>
+
+#include "android-base/logging.h"
+#include "android-base/stringprintf.h"
+
+#include "jni.h"
+#include "jvmti.h"
+#include "scoped_local_ref.h"
+
+// Test infrastructure
+#include "jni_binder.h"
+#include "jni_helper.h"
+#include "jvmti_helper.h"
+#include "test_env.h"
+#include "ti_macros.h"
+
+namespace art {
+namespace Test989StackTraceThrow {
+
+extern "C" JNIEXPORT
+jfloat JNICALL Java_art_Test989_returnFloatNative(JNIEnv* env, jclass klass) {
+  jmethodID targetMethod = env->GetStaticMethodID(klass, "doGetFloat", "()F");
+  return env->CallStaticFloatMethod(klass, targetMethod);
+}
+extern "C" JNIEXPORT
+jdouble JNICALL Java_art_Test989_returnDoubleNative(JNIEnv* env, jclass klass) {
+  jmethodID targetMethod = env->GetStaticMethodID(klass, "doGetDouble", "()D");
+  return env->CallStaticDoubleMethod(klass, targetMethod);
+}
+
+extern "C" JNIEXPORT jobject JNICALL Java_art_Test989_returnValueNative(JNIEnv* env, jclass klass) {
+  jmethodID targetMethod = env->GetStaticMethodID(klass, "mkTestObject", "()Ljava/lang/Object;");
+  return env->CallStaticObjectMethod(klass, targetMethod);
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Test989_doNothingNative(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                                   jclass klass ATTRIBUTE_UNUSED) {
+  return;
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Test989_throwANative(JNIEnv* env,
+                                                                jclass klass) {
+  jmethodID targetMethod = env->GetStaticMethodID(klass, "doThrowA", "()V");
+  env->CallStaticVoidMethod(klass, targetMethod);
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Test989_acceptValueNative(JNIEnv* env,
+                                                                     jclass klass,
+                                                                     jobject arg) {
+  jmethodID targetMethod = env->GetStaticMethodID(klass, "printObject", "(Ljava/lang/Object;)V");
+  env->CallStaticVoidMethod(klass, targetMethod, arg);
+}
+
+}  // namespace Test989StackTraceThrow
+}  // namespace art
+
diff --git a/test/989-method-trace-throw/run b/test/989-method-trace-throw/run
new file mode 100755
index 0000000..51875a7
--- /dev/null
+++ b/test/989-method-trace-throw/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Ask for stack traces to be dumped to a file rather than to stdout.
+./default-run "$@" --jvmti
diff --git a/test/989-method-trace-throw/src/Main.java b/test/989-method-trace-throw/src/Main.java
new file mode 100644
index 0000000..29b9de1
--- /dev/null
+++ b/test/989-method-trace-throw/src/Main.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    art.Test989.run();
+  }
+}
diff --git a/test/989-method-trace-throw/src/art/Test989.java b/test/989-method-trace-throw/src/art/Test989.java
new file mode 100644
index 0000000..18421bd
--- /dev/null
+++ b/test/989-method-trace-throw/src/art/Test989.java
@@ -0,0 +1,465 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+import java.lang.reflect.Method;
+import java.util.Set;
+import java.util.HashSet;
+
+public class Test989 {
+  static boolean PRINT_STACK_TRACE = false;
+  static Set<Method> testMethods = new HashSet<>();
+
+  static MethodTracer currentTracer = new MethodTracer() {
+    public void methodEntry(Object o) { return; }
+    public void methodExited(Object o, boolean e, Object r) { return; }
+  };
+
+  private static boolean DISABLE_TRACING = false;
+
+  static {
+    try {
+      testMethods.add(Test989.class.getDeclaredMethod("doNothing"));
+      testMethods.add(Test989.class.getDeclaredMethod("doNothingNative"));
+      testMethods.add(Test989.class.getDeclaredMethod("throwA"));
+      testMethods.add(Test989.class.getDeclaredMethod("throwANative"));
+      testMethods.add(Test989.class.getDeclaredMethod("returnFloat"));
+      testMethods.add(Test989.class.getDeclaredMethod("returnFloatNative"));
+      testMethods.add(Test989.class.getDeclaredMethod("returnDouble"));
+      testMethods.add(Test989.class.getDeclaredMethod("returnDoubleNative"));
+      testMethods.add(Test989.class.getDeclaredMethod("returnValue"));
+      testMethods.add(Test989.class.getDeclaredMethod("returnValueNative"));
+      testMethods.add(Test989.class.getDeclaredMethod("acceptValue", Object.class));
+      testMethods.add(Test989.class.getDeclaredMethod("acceptValueNative", Object.class));
+      testMethods.add(Test989.class.getDeclaredMethod("tryCatchExit"));
+    } catch (Exception e) {
+      throw new Error("Bad static!", e);
+    }
+  }
+
+  // Disables tracing only on RI. Used to work around an annoying piece of behavior where in the
+  // RI throwing an exception in an exit hook causes the exit hook to be re-executed. This leads
+  // to an infinite loop on the RI.
+  private static void disableTraceForRI() {
+    if (!System.getProperty("java.vm.name").equals("Dalvik")) {
+      Trace.disableMethodTracing(Thread.currentThread());
+    }
+  }
+
+  private static String getInfo(Object m, boolean exception, Object result) {
+    String out = m.toString() + " returned ";
+    if (exception) {
+      out += "<exception>";
+    } else {
+      out += result;
+    }
+    return out;
+  }
+
+  public static interface MethodTracer {
+    public void methodEntry(Object m);
+    public void methodExited(Object m, boolean exception, Object result);
+    public default Class<?> entryException() { return null; }
+    public default Class<?> exitException() { return null; }
+  }
+
+  public static class NormalTracer implements MethodTracer {
+    public void methodEntry(Object m) {
+      if (testMethods.contains(m)) {
+        System.out.println("Normal: Entering " + m);
+      }
+    }
+    public void methodExited(Object m, boolean exception, Object result) {
+      if (testMethods.contains(m)) {
+        System.out.println("Normal: Leaving " + getInfo(m, exception, result));
+      }
+    }
+  }
+
+  public static class ThrowEnterTracer implements MethodTracer {
+    public void methodEntry(Object m) {
+      if (testMethods.contains(m)) {
+        System.out.println("ThrowEnter: Entering " + m);
+        throw new ErrorB("Throwing error while entering " + m);
+      }
+    }
+    public void methodExited(Object m, boolean exception, Object result) {
+      if (testMethods.contains(m)) {
+        System.out.println("ThrowEnter: Leaving " + getInfo(m, exception, result));
+      }
+    }
+    public Class<?> entryException() { return ErrorB.class; }
+  }
+
+  public static class ThrowExitTracer implements MethodTracer {
+    public void methodEntry(Object m) {
+      if (testMethods.contains(m)) {
+        System.out.println("ThrowExit: Entering " + m);
+      }
+    }
+    public void methodExited(Object m, boolean exception, Object result) {
+      if (testMethods.contains(m)) {
+        // The RI goes into an infinite loop if we throw exceptions in an ExitHook. See
+        // disableTraceForRI for explanation.
+        disableTraceForRI();
+        System.out.println("ThrowExit: Leaving " + getInfo(m, exception, result));
+        throw new ErrorB("Throwing error while exit " + getInfo(m, exception, result));
+      }
+    }
+    public Class<?> exitException() { return ErrorB.class; }
+  }
+
+  public static class ThrowBothTracer implements MethodTracer {
+    public void methodEntry(Object m) {
+      if (testMethods.contains(m)) {
+        System.out.println("ThrowBoth: Entering " + m);
+        throw new ErrorB("Throwing error while entering " + m);
+      }
+    }
+    public void methodExited(Object m, boolean exception, Object result) {
+      if (testMethods.contains(m)) {
+        // The RI goes into an infinite loop if we throw exceptions in an ExitHook. See
+        // disableTraceForRI for explanation.
+        disableTraceForRI();
+        System.out.println("ThrowBoth: Leaving " + getInfo(m, exception, result));
+        throw new ErrorC("Throwing error while exit " + getInfo(m, exception, result));
+      }
+    }
+    public Class<?> entryException() { return ErrorB.class; }
+    public Class<?> exitException() { return ErrorC.class; }
+  }
+
+  public static class ForceGCTracer implements MethodTracer {
+    public void methodEntry(Object m) {
+      if (System.getProperty("java.vm.name").equals("Dalvik")) {
+        System.gc();
+      }
+    }
+    public void methodExited(Object m, boolean exception, Object result) {
+      if (System.getProperty("java.vm.name").equals("Dalvik")) {
+        System.gc();
+      }
+    }
+  }
+
+  private static void maybeDisableTracing() throws Exception {
+    if (DISABLE_TRACING) {
+      Trace.disableMethodTracing(Thread.currentThread());
+    }
+  }
+
+  public static void baseNotifyMethodEntry(Object o) {
+    currentTracer.methodEntry(o);
+  }
+  public static void baseNotifyMethodExit(Object o, boolean exception, Object res) {
+    currentTracer.methodExited(o, exception, res);
+  }
+
+  private static void setupTracing() throws Exception {
+    Trace.enableMethodTracing(
+        Test989.class,
+        Test989.class.getDeclaredMethod("baseNotifyMethodEntry", Object.class),
+        Test989.class.getDeclaredMethod(
+            "baseNotifyMethodExit", Object.class, Boolean.TYPE, Object.class),
+        Thread.currentThread());
+  }
+  private static void setEntry(MethodTracer type) throws Exception {
+    if (DISABLE_TRACING || !System.getProperty("java.vm.name").equals("Dalvik")) {
+      Trace.disableMethodTracing(Thread.currentThread());
+      setupTracing();
+    }
+    currentTracer = type;
+  }
+
+  private static String testDescription(MethodTracer type, Runnable test) {
+    return "test[" + type.getClass() + ", " + test.getClass() + "]";
+  }
+
+  private static Class<?> getExpectedError(MethodTracer t, MyRunnable r) {
+    if (t.exitException() != null) {
+      return t.exitException();
+    } else if (t.entryException() != null) {
+      return t.entryException();
+    } else {
+      return r.expectedThrow();
+    }
+  }
+
+  private static void doTest(MethodTracer type, MyRunnable test) throws Exception {
+    Class<?> expected = getExpectedError(type, test);
+
+    setEntry(type);
+    try {
+      test.run();
+      // Disabling method tracing just makes this test somewhat faster.
+      maybeDisableTracing();
+      if (expected == null) {
+        System.out.println(
+            "Received no exception as expected for " + testDescription(type, test) + ".");
+        return;
+      }
+    } catch (Error t) {
+      // Disabling method tracing just makes this test somewhat faster.
+      maybeDisableTracing();
+      if (expected == null) {
+        throw new Error("Unexpected error occured: " + t + " for " + testDescription(type, test), t);
+      } else if (!expected.isInstance(t)) {
+        throw new Error("Expected error of type " + expected + " not " + t +
+            " for " + testDescription(type, test), t);
+      } else {
+        System.out.println(
+            "Received expected error for " + testDescription(type, test) + " - " + t);
+        if (PRINT_STACK_TRACE) {
+          t.printStackTrace();
+        }
+        return;
+      }
+    }
+    System.out.println("Expected an error of type " + expected + " but got no exception for "
+        + testDescription(type, test));
+    // throw new Error("Expected an error of type " + expected + " but got no exception for "
+    //     + testDescription(type, test));
+  }
+
+  public static interface MyRunnable extends Runnable {
+    public default Class<?> expectedThrow() {
+      return null;
+    }
+  }
+
+  public static void run() throws Exception {
+    MyRunnable[] testCases = new MyRunnable[] {
+      new doNothingClass(),
+      new doNothingNativeClass(),
+      new throwAClass(),
+      new throwANativeClass(),
+      new returnValueClass(),
+      new returnValueNativeClass(),
+      new acceptValueClass(),
+      new acceptValueNativeClass(),
+      new tryCatchExitClass(),
+      new returnFloatClass(),
+      new returnFloatNativeClass(),
+      new returnDoubleClass(),
+      new returnDoubleNativeClass(),
+    };
+    MethodTracer[] tracers = new MethodTracer[] {
+      new NormalTracer(),
+      new ThrowEnterTracer(),
+      new ThrowExitTracer(),
+      new ThrowBothTracer(),
+      new ForceGCTracer(),
+    };
+
+    setupTracing();
+    for (MethodTracer t : tracers) {
+      for (MyRunnable r : testCases) {
+        doTest(t, r);
+      }
+    }
+
+    maybeDisableTracing();
+    System.out.println("Finished!");
+    Trace.disableMethodTracing(Thread.currentThread());
+  }
+
+  private static final class throwAClass implements MyRunnable {
+    public void run() {
+      throwA();
+    }
+    @Override
+    public Class<?> expectedThrow() {
+      return ErrorA.class;
+    }
+  }
+
+  private static final class throwANativeClass implements MyRunnable {
+    public void run() {
+      throwANative();
+    }
+    @Override
+    public Class<?> expectedThrow() {
+      return ErrorA.class;
+    }
+  }
+
+  private static final class tryCatchExitClass implements MyRunnable {
+    public void run() {
+      tryCatchExit();
+    }
+  }
+
+  private static final class doNothingClass implements MyRunnable {
+    public void run() {
+      doNothing();
+    }
+  }
+
+  private static final class doNothingNativeClass implements MyRunnable {
+    public void run() {
+      doNothingNative();
+    }
+  }
+
+  private static final class acceptValueClass implements MyRunnable {
+    public void run() {
+      acceptValue(mkTestObject());
+    }
+  }
+
+  private static final class acceptValueNativeClass implements MyRunnable {
+    public void run() {
+      acceptValueNative(mkTestObject());
+    }
+  }
+
+  private static final class returnValueClass implements MyRunnable {
+    public void run() {
+      Object o = returnValue();
+      System.out.println("returnValue returned: " + o);
+    }
+  }
+
+  private static final class returnValueNativeClass implements MyRunnable {
+    public void run() {
+      Object o = returnValueNative();
+      System.out.println("returnValueNative returned: " + o);
+    }
+  }
+
+  private static final class returnFloatClass implements MyRunnable {
+    public void run() {
+      float d = returnFloat();
+      System.out.println("returnFloat returned: " + d);
+    }
+  }
+
+  private static final class returnFloatNativeClass implements MyRunnable {
+    public void run() {
+      float d = returnFloatNative();
+      System.out.println("returnFloatNative returned: " + d);
+    }
+  }
+
+  private static final class returnDoubleClass implements MyRunnable {
+    public void run() {
+      double d = returnDouble();
+      System.out.println("returnDouble returned: " + d);
+    }
+  }
+
+  private static final class returnDoubleNativeClass implements MyRunnable {
+    public void run() {
+      double d = returnDoubleNative();
+      System.out.println("returnDoubleNative returned: " + d);
+    }
+  }
+
+  private static class ErrorA extends Error {
+    private static final long serialVersionUID = 0;
+    public ErrorA(String s) { super(s); }
+  }
+
+  private static class ErrorB extends Error {
+    private static final long serialVersionUID = 1;
+    public ErrorB(String s) { super(s); }
+  }
+
+  private static class ErrorC extends Error {
+    private static final long serialVersionUID = 2;
+    public ErrorC(String s) { super(s); }
+  }
+
+  // Does nothing.
+  public static void doNothing() { }
+
+  public static void tryCatchExit() {
+    try {
+      Object o = mkTestObject();
+      return;
+    } catch (ErrorB b) {
+      System.out.println("ERROR: Caught " + b);
+      b.printStackTrace();
+    } catch (ErrorC c) {
+      System.out.println("ERROR: Caught " + c);
+      c.printStackTrace();
+    }
+  }
+
+  public static float returnFloat() {
+    return doGetFloat();
+  }
+
+  public static double returnDouble() {
+    return doGetDouble();
+  }
+
+  // Throws an ErrorA.
+  public static void throwA() {
+    doThrowA();
+  }
+
+  public static void doThrowA() {
+    throw new ErrorA("Throwing Error A");
+  }
+
+  static final class TestObject {
+    private int idx;
+    public TestObject(int v) {
+      this.idx = v;
+    }
+    @Override
+    public String toString() {
+      return "TestObject(" + idx + ")";
+    }
+  }
+
+  static int counter = 0;
+  public static Object mkTestObject() {
+    return new TestObject(counter++);
+  }
+
+  public static void printObject(Object o) {
+    System.out.println("Recieved " + o);
+  }
+
+  // Returns a newly allocated value.
+  public static Object returnValue() {
+    return mkTestObject();
+  }
+
+  public static void acceptValue(Object o) {
+    printObject(o);
+  }
+
+  public static float doGetFloat() {
+    return 1.618f;
+  }
+
+  public static double doGetDouble() {
+    return 3.14159628;
+  }
+
+  // Calls mkTestObject from native code and returns it.
+  public static native Object returnValueNative();
+  // Calls printObject from native code.
+  public static native void acceptValueNative(Object t);
+  public static native void doNothingNative();
+  public static native void throwANative();
+  public static native float returnFloatNative();
+  public static native double returnDoubleNative();
+}
diff --git a/test/989-method-trace-throw/src/art/Trace.java b/test/989-method-trace-throw/src/art/Trace.java
new file mode 100644
index 0000000..3370996
--- /dev/null
+++ b/test/989-method-trace-throw/src/art/Trace.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+import java.lang.reflect.Method;
+
+public class Trace {
+  public static native void enableMethodTracing(
+      Class<?> methodClass, Method entryMethod, Method exitMethod, Thread thr);
+  public static native void disableMethodTracing(Thread thr);
+}
diff --git a/test/Android.bp b/test/Android.bp
index 599b011..35c3d9c 100644
--- a/test/Android.bp
+++ b/test/Android.bp
@@ -278,6 +278,7 @@
         "984-obsolete-invoke/obsolete_invoke.cc",
         "986-native-method-bind/native_bind.cc",
         "987-agent-bind/agent_bind.cc",
+        "989-method-trace-throw/method_trace.cc",
     ],
     shared_libs: [
         "libbase",
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 7677025..8aacc8c 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -63,6 +63,8 @@
 TEST_IS_NDEBUG="n"
 APP_IMAGE="y"
 JVMTI_STRESS="n"
+JVMTI_TRACE_STRESS="n"
+JVMTI_REDEFINE_STRESS="n"
 VDEX_FILTER=""
 PROFILE="n"
 RANDOM_PROFILE="n"
@@ -151,10 +153,15 @@
     elif [ "x$1" = "x--prebuild" ]; then
         PREBUILD="y"
         shift
-    elif [ "x$1" = "x--jvmti-stress" ]; then
-        # APP_IMAGE doesn't really work with jvmti-torture
+    elif [ "x$1" = "x--jvmti-redefine-stress" ]; then
+        # APP_IMAGE doesn't really work with jvmti redefine stress
         APP_IMAGE="n"
         JVMTI_STRESS="y"
+        JVMTI_REDEFINE_STRESS="y"
+        shift
+    elif [ "x$1" = "x--jvmti-trace-stress" ]; then
+        JVMTI_STRESS="y"
+        JVMTI_TRACE_STRESS="y"
         shift
     elif [ "x$1" = "x--no-app-image" ]; then
         APP_IMAGE="n"
@@ -397,13 +404,25 @@
     plugin=libopenjdkjvmti.so
   fi
 
-  file_1=$(mktemp --tmpdir=${DEX_LOCATION})
-  file_2=$(mktemp --tmpdir=${DEX_LOCATION})
+  # Just give it a default start so we can always add ',' to it.
+  agent_args="jvmti-stress"
+  if [[ "$JVMTI_REDEFINE_STRESS" = "y" ]]; then
+    # We really cannot do this on RI so don't both passing it in that case.
+    if [[ "$USE_JVM" = "n" ]]; then
+      file_1=$(mktemp --tmpdir=${DEX_LOCATION})
+      file_2=$(mktemp --tmpdir=${DEX_LOCATION})
+      # TODO Remove need for DEXTER_BINARY!
+      agent_args="${agent_args},redefine,${DEXTER_BINARY},${file_1},${file_2}"
+    fi
+  fi
+  if [[ "$JVMTI_TRACE_STRESS" = "y" ]]; then
+    agent_args="${agent_args},trace"
+  fi
+  # In the future add onto this;
   if [[ "$USE_JVM" = "y" ]]; then
-    FLAGS="${FLAGS} -agentpath:${ANDROID_HOST_OUT}/nativetest64/${agent}=/bin/false,${file_1},${file_2}"
+    FLAGS="${FLAGS} -agentpath:${ANDROID_HOST_OUT}/nativetest64/${agent}=${agent_args}"
   else
-    # TODO Remove need for DEXTER_BINARY!
-    FLAGS="${FLAGS} -agentpath:${agent}=${DEXTER_BINARY},${file_1},${file_2}"
+    FLAGS="${FLAGS} -agentpath:${agent}=${agent_args}"
     if [ "$IS_JVMTI_TEST" = "n" ]; then
       FLAGS="${FLAGS} -Xplugin:${plugin}"
       FLAGS="${FLAGS} -Xcompiler-option --debuggable"
diff --git a/test/knownfailures.json b/test/knownfailures.json
index 8ab741a..41e41d7 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -511,7 +511,7 @@
             "645-checker-abs-simd",
             "706-checker-scheduler"],
         "description": ["Checker tests are not compatible with jvmti."],
-        "variant": "jvmti-stress"
+        "variant": "jvmti-stress | redefine-stress | trace-stress"
     },
     {
         "tests": [
@@ -519,7 +519,7 @@
             "964-default-iface-init-gen"
         ],
         "description": ["Tests that just take too long with jvmti-stress"],
-        "variant": "jvmti-stress"
+        "variant": "jvmti-stress | redefine-stress | trace-stress"
     },
     {
         "tests": [
@@ -539,7 +539,7 @@
             "dexter/slicer."
         ],
         "bug": "b/37272822",
-        "variant": "jvmti-stress"
+        "variant": "jvmti-stress | redefine-stress"
     },
     {
         "tests": [
@@ -550,7 +550,7 @@
             "981-dedup-original-dex"
         ],
         "description": ["Tests that require exact knowledge of the number of plugins and agents."],
-        "variant": "jvmti-stress"
+        "variant": "jvmti-stress | redefine-stress | trace-stress"
     },
     {
         "tests": [
@@ -564,7 +564,7 @@
         "description": [
             "Tests that use illegal dex files or otherwise break dexter assumptions"
         ],
-        "variant": "jvmti-stress"
+        "variant": "jvmti-stress | redefine-stress"
     },
     {
         "tests": [
@@ -581,7 +581,7 @@
             "Tests that use custom class loaders or other features not supported ",
             "by our JVMTI implementation"
         ],
-        "variant": "jvmti-stress"
+        "variant": "jvmti-stress | redefine-stress"
     },
     {
         "tests": [
@@ -592,7 +592,7 @@
             "Tests that use annotations and debug data that is not kept around by dexter."
         ],
         "bug": "b/37239009",
-        "variant": "jvmti-stress"
+        "variant": "jvmti-stress | redefine-stress"
     },
     {
         "tests": [
@@ -701,6 +701,11 @@
         "env_vars": {"SANITIZE_HOST": "address"}
     },
     {
+        "tests": ["988-method-trace"],
+        "variant": "redefine-stress | jvmti-stress",
+        "description": "Test disabled due to redefine-stress disabling intrinsics which changes the trace output slightly."
+    },
+    {
         "tests": "137-cfi",
         "description": [ "ASan is reporting out-of-bounds reads in libunwind."],
         "variant": "host",
diff --git a/test/run-test b/test/run-test
index 7db37a4..41a0dc2 100755
--- a/test/run-test
+++ b/test/run-test
@@ -137,7 +137,8 @@
 basic_verify="false"
 gc_verify="false"
 gc_stress="false"
-jvmti_stress="false"
+jvmti_trace_stress="false"
+jvmti_redefine_stress="false"
 strace="false"
 always_clean="no"
 never_clean="no"
@@ -234,8 +235,11 @@
         basic_verify="true"
         gc_stress="true"
         shift
-    elif [ "x$1" = "x--jvmti-stress" ]; then
-        jvmti_stress="true"
+    elif [ "x$1" = "x--jvmti-redefine-stress" ]; then
+        jvmti_redefine_stress="true"
+        shift
+    elif [ "x$1" = "x--jvmti-trace-stress" ]; then
+        jvmti_trace_stress="true"
         shift
     elif [ "x$1" = "x--suspend-timeout" ]; then
         shift
@@ -447,8 +451,11 @@
 if [ "$gc_stress" = "true" ]; then
   run_args="${run_args} --gc-stress --runtime-option -Xgc:gcstress --runtime-option -Xms2m --runtime-option -Xmx16m"
 fi
-if [ "$jvmti_stress" = "true" ]; then
-    run_args="${run_args} --no-app-image --jvmti-stress"
+if [ "$jvmti_redefine_stress" = "true" ]; then
+    run_args="${run_args} --no-app-image --jvmti-redefine-stress"
+fi
+if [ "$jvmti_trace_stress" = "true" ]; then
+    run_args="${run_args} --no-app-image --jvmti-trace-stress"
 fi
 if [ "$trace" = "true" ]; then
     run_args="${run_args} --runtime-option -Xmethod-trace --runtime-option -Xmethod-trace-file-size:2000000"
@@ -658,7 +665,9 @@
         echo "    --stream              Run method tracing in streaming mode (requires --trace)"
         echo "    --gcstress            Run with gc stress testing"
         echo "    --gcverify            Run with gc verification"
-        echo "    --jvmti-stress        Run with jvmti stress testing"
+        echo "    --jvmti-trace-stress  Run with jvmti method tracing stress testing"
+        echo "    --jvmti-redefine-stress"
+        echo "                          Run with jvmti method redefinition stress testing"
         echo "    --always-clean        Delete the test files even if the test fails."
         echo "    --never-clean         Keep the test files even if the test succeeds."
         echo "    --android-root [path] The path on target for the android root. (/system by default)."
@@ -728,8 +737,8 @@
 # Checker when compiled with Optimizing on host.
 if [[ "$TEST_NAME" =~ ^[0-9]+-checker- ]]; then
   if [ "$runtime" = "art" -a "$image_suffix" = "" -a "$USE_JACK" = "true" ]; then
-    # In no-prebuild mode, the compiler only quickens so disable the checker.
-    if [ "$prebuild_mode" = "yes" ]; then
+    # In no-prebuild or no-image mode, the compiler only quickens so disable the checker.
+    if [ "$prebuild_mode" = "yes" -a "$have_image" = "yes" ]; then
       run_checker="yes"
 
       if [ "$target_mode" = "no" ]; then
diff --git a/test/testrunner/testrunner.py b/test/testrunner/testrunner.py
index 77ef25a..3445071 100755
--- a/test/testrunner/testrunner.py
+++ b/test/testrunner/testrunner.py
@@ -147,7 +147,7 @@
   VARIANT_TYPE_DICT['relocate'] = {'relocate-npatchoat', 'relocate', 'no-relocate'}
   VARIANT_TYPE_DICT['jni'] = {'jni', 'forcecopy', 'checkjni'}
   VARIANT_TYPE_DICT['address_sizes'] = {'64', '32'}
-  VARIANT_TYPE_DICT['jvmti'] = {'no-jvmti', 'jvmti-stress'}
+  VARIANT_TYPE_DICT['jvmti'] = {'no-jvmti', 'jvmti-stress', 'redefine-stress', 'trace-stress'}
   VARIANT_TYPE_DICT['compiler'] = {'interp-ac', 'interpreter', 'jit', 'optimizing',
                               'regalloc_gc', 'speed-profile'}
 
@@ -437,7 +437,11 @@
         options_test += ' --debuggable'
 
       if jvmti == 'jvmti-stress':
-        options_test += ' --jvmti-stress'
+        options_test += ' --jvmti-trace-stress --jvmti-redefine-stress'
+      elif jvmti == 'trace-stress':
+        options_test += ' --jvmti-trace-stress'
+      elif jvmti == 'redefine-stress':
+        options_test += ' --jvmti-redefine-stress'
 
       if address_size == '64':
         options_test += ' --64'
@@ -954,6 +958,10 @@
     IMAGE_TYPES.add('multipicimage')
   if options['jvmti_stress']:
     JVMTI_TYPES.add('jvmti-stress')
+  if options['redefine_stress']:
+    JVMTI_TYPES.add('redefine-stress')
+  if options['trace_stress']:
+    JVMTI_TYPES.add('trace-stress')
   if options['no_jvmti']:
     JVMTI_TYPES.add('no-jvmti')
   if options['verbose']:
diff --git a/test/ti-agent/common_helper.cc b/test/ti-agent/common_helper.cc
index bfd4d25..6eaa5c3 100644
--- a/test/ti-agent/common_helper.cc
+++ b/test/ti-agent/common_helper.cc
@@ -69,6 +69,214 @@
   env->ThrowNew(env->FindClass("java/lang/Exception"), message.c_str());
 }
 
+namespace common_trace {
+
+// Taken from art/runtime/modifiers.h
+static constexpr uint32_t kAccStatic =       0x0008;  // field, method, ic
+
+struct TraceData {
+  jclass test_klass;
+  jmethodID enter_method;
+  jmethodID exit_method;
+  bool in_callback;
+};
+
+static jobject GetJavaMethod(jvmtiEnv* jvmti, JNIEnv* env, jmethodID m) {
+  jint mods = 0;
+  if (JvmtiErrorToException(env, jvmti, jvmti->GetMethodModifiers(m, &mods))) {
+    return nullptr;
+  }
+
+  bool is_static = (mods & kAccStatic) != 0;
+  jclass method_klass = nullptr;
+  if (JvmtiErrorToException(env, jvmti, jvmti->GetMethodDeclaringClass(m, &method_klass))) {
+    return nullptr;
+  }
+  jobject res = env->ToReflectedMethod(method_klass, m, is_static);
+  env->DeleteLocalRef(method_klass);
+  return res;
+}
+
+static jobject GetJavaValue(jvmtiEnv* jvmtienv,
+                            JNIEnv* env,
+                            jmethodID m,
+                            jvalue value) {
+  char *fname, *fsig, *fgen;
+  if (JvmtiErrorToException(env, jvmtienv, jvmtienv->GetMethodName(m, &fname, &fsig, &fgen))) {
+    return nullptr;
+  }
+  std::string type(fsig);
+  type = type.substr(type.find(")") + 1);
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fsig));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fname));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fgen));
+  std::string name;
+  switch (type[0]) {
+    case 'V':
+      return nullptr;
+    case '[':
+    case 'L':
+      return value.l;
+    case 'Z':
+      name = "java/lang/Boolean";
+      break;
+    case 'B':
+      name = "java/lang/Byte";
+      break;
+    case 'C':
+      name = "java/lang/Character";
+      break;
+    case 'S':
+      name = "java/lang/Short";
+      break;
+    case 'I':
+      name = "java/lang/Integer";
+      break;
+    case 'J':
+      name = "java/lang/Long";
+      break;
+    case 'F':
+      name = "java/lang/Float";
+      break;
+    case 'D':
+      name = "java/lang/Double";
+      break;
+    default:
+      LOG(FATAL) << "Unable to figure out type!";
+      return nullptr;
+  }
+  std::ostringstream oss;
+  oss << "(" << type[0] << ")L" << name << ";";
+  std::string args = oss.str();
+  jclass target = env->FindClass(name.c_str());
+  jmethodID valueOfMethod = env->GetStaticMethodID(target, "valueOf", args.c_str());
+
+  CHECK(valueOfMethod != nullptr) << args;
+  jobject res = env->CallStaticObjectMethodA(target, valueOfMethod, &value);
+  env->DeleteLocalRef(target);
+  return res;
+}
+
+static void methodExitCB(jvmtiEnv* jvmti,
+                         JNIEnv* jnienv,
+                         jthread thr ATTRIBUTE_UNUSED,
+                         jmethodID method,
+                         jboolean was_popped_by_exception,
+                         jvalue return_value) {
+  TraceData* data = nullptr;
+  if (JvmtiErrorToException(jnienv, jvmti,
+                            jvmti->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&data)))) {
+    return;
+  }
+  if (method == data->exit_method || method == data->enter_method || data->in_callback) {
+    // Don't do callback for either of these to prevent an infinite loop.
+    return;
+  }
+  data->in_callback = true;
+  jobject method_arg = GetJavaMethod(jvmti, jnienv, method);
+  jobject result =
+      was_popped_by_exception ? nullptr : GetJavaValue(jvmti, jnienv, method, return_value);
+  if (jnienv->ExceptionCheck()) {
+    data->in_callback = false;
+    return;
+  }
+  jnienv->CallStaticVoidMethod(data->test_klass,
+                               data->exit_method,
+                               method_arg,
+                               was_popped_by_exception,
+                               result);
+  jnienv->DeleteLocalRef(method_arg);
+  data->in_callback = false;
+}
+
+static void methodEntryCB(jvmtiEnv* jvmti,
+                          JNIEnv* jnienv,
+                          jthread thr ATTRIBUTE_UNUSED,
+                          jmethodID method) {
+  TraceData* data = nullptr;
+  if (JvmtiErrorToException(jnienv, jvmti,
+                            jvmti->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&data)))) {
+    return;
+  }
+  if (method == data->exit_method || method == data->enter_method || data->in_callback) {
+    // Don't do callback for either of these to prevent an infinite loop.
+    return;
+  }
+  data->in_callback = true;
+  jobject method_arg = GetJavaMethod(jvmti, jnienv, method);
+  if (jnienv->ExceptionCheck()) {
+    return;
+  }
+  jnienv->CallStaticVoidMethod(data->test_klass, data->enter_method, method_arg);
+  jnienv->DeleteLocalRef(method_arg);
+  data->in_callback = false;
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Trace_enableMethodTracing(
+    JNIEnv* env,
+    jclass trace ATTRIBUTE_UNUSED,
+    jclass klass,
+    jobject enter,
+    jobject exit,
+    jthread thr) {
+  TraceData* data = nullptr;
+  if (JvmtiErrorToException(env,
+                            jvmti_env,
+                            jvmti_env->Allocate(sizeof(TraceData),
+                                                reinterpret_cast<unsigned char**>(&data)))) {
+    return;
+  }
+  memset(data, 0, sizeof(TraceData));
+  data->test_klass = reinterpret_cast<jclass>(env->NewGlobalRef(klass));
+  data->enter_method = env->FromReflectedMethod(enter);
+  data->exit_method = env->FromReflectedMethod(exit);
+  data->in_callback = false;
+
+  if (JvmtiErrorToException(env, jvmti_env, jvmti_env->SetEnvironmentLocalStorage(data))) {
+    return;
+  }
+
+  jvmtiEventCallbacks cb;
+  memset(&cb, 0, sizeof(cb));
+  cb.MethodEntry = methodEntryCB;
+  cb.MethodExit = methodExitCB;
+  if (JvmtiErrorToException(env, jvmti_env, jvmti_env->SetEventCallbacks(&cb, sizeof(cb)))) {
+    return;
+  }
+  if (JvmtiErrorToException(env,
+                            jvmti_env,
+                            jvmti_env->SetEventNotificationMode(JVMTI_ENABLE,
+                                                                JVMTI_EVENT_METHOD_ENTRY,
+                                                                thr))) {
+    return;
+  }
+  if (JvmtiErrorToException(env,
+                            jvmti_env,
+                            jvmti_env->SetEventNotificationMode(JVMTI_ENABLE,
+                                                                JVMTI_EVENT_METHOD_EXIT,
+                                                                thr))) {
+    return;
+  }
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Trace_disableMethodTracing(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jthread thr) {
+  if (JvmtiErrorToException(env, jvmti_env,
+                            jvmti_env->SetEventNotificationMode(JVMTI_DISABLE,
+                                                                JVMTI_EVENT_METHOD_ENTRY,
+                                                                thr))) {
+    return;
+  }
+  if (JvmtiErrorToException(env, jvmti_env,
+                            jvmti_env->SetEventNotificationMode(JVMTI_DISABLE,
+                                                                JVMTI_EVENT_METHOD_EXIT,
+                                                                thr))) {
+    return;
+  }
+}
+
+}  // namespace common_trace
+
 namespace common_redefine {
 
 static void throwRedefinitionError(jvmtiEnv* jvmti,
diff --git a/test/ti-stress/stress.cc b/test/ti-stress/stress.cc
index e8e3cc7..497db1c 100644
--- a/test/ti-stress/stress.cc
+++ b/test/ti-stress/stress.cc
@@ -20,6 +20,7 @@
 #include <fstream>
 #include <stdio.h>
 #include <sstream>
+#include <strstream>
 
 #include "jvmti.h"
 #include "exec_utils.h"
@@ -35,6 +36,8 @@
   std::string out_temp_dex;
   std::string in_temp_dex;
   bool vm_class_loader_initialized;
+  bool trace_stress;
+  bool redefine_stress;
 };
 
 static void WriteToFile(const std::string& fname, jint data_len, const unsigned char* data) {
@@ -95,7 +98,6 @@
   if (thread == nullptr) {
     info.name = const_cast<char*>("<NULLPTR>");
   } else if (jvmtienv->GetThreadInfo(thread, &info) != JVMTI_ERROR_NONE) {
-    LOG(WARNING) << "Unable to get thread info!";
     info.name = const_cast<char*>("<UNKNOWN THREAD>");
   }
   char *fname, *fsig, *fgen;
@@ -115,8 +117,8 @@
     env->DeleteLocalRef(klass);
     return;
   }
-  LOG(INFO) << "Loading native method \"" << cname << "->" << fname << fsig << "\". Thread is "
-            << info.name;
+  LOG(INFO) << "Loading native method \"" << cname << "->" << fname << fsig << "\". Thread is \""
+            << info.name << "\"";
   jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(cname));
   jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(cgen));
   jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fname));
@@ -126,6 +128,151 @@
   return;
 }
 
+static std::string GetName(jvmtiEnv* jvmtienv, JNIEnv* jnienv, jobject obj) {
+  jclass klass = jnienv->GetObjectClass(obj);
+  char *cname, *cgen;
+  if (jvmtienv->GetClassSignature(klass, &cname, &cgen) != JVMTI_ERROR_NONE) {
+    LOG(ERROR) << "Unable to get class name!";
+    jnienv->DeleteLocalRef(klass);
+    return "<UNKNOWN>";
+  }
+  std::string name(cname);
+  if (name == "Ljava/lang/String;") {
+    jstring str = reinterpret_cast<jstring>(obj);
+    const char* val = jnienv->GetStringUTFChars(str, nullptr);
+    if (val == nullptr) {
+      name += " (unable to get value)";
+    } else {
+      std::ostringstream oss;
+      oss << name << " (value: \"" << val << "\")";
+      name = oss.str();
+      jnienv->ReleaseStringUTFChars(str, val);
+    }
+  }
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(cname));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(cgen));
+  jnienv->DeleteLocalRef(klass);
+  return name;
+}
+
+static std::string GetValOf(jvmtiEnv* env, JNIEnv* jnienv, std::string type, jvalue val) {
+  std::ostringstream oss;
+  switch (type[0]) {
+    case '[':
+    case 'L':
+      return val.l != nullptr ? GetName(env, jnienv, val.l) : "null";
+    case 'Z':
+      return val.z == JNI_TRUE ? "true" : "false";
+    case 'B':
+      oss << val.b;
+      return oss.str();
+    case 'C':
+      oss << val.c;
+      return oss.str();
+    case 'S':
+      oss << val.s;
+      return oss.str();
+    case 'I':
+      oss << val.i;
+      return oss.str();
+    case 'J':
+      oss << val.j;
+      return oss.str();
+    case 'F':
+      oss << val.f;
+      return oss.str();
+    case 'D':
+      oss << val.d;
+      return oss.str();
+    case 'V':
+      return "<void>";
+    default:
+      return "<ERROR Found type " + type + ">";
+  }
+}
+
+void JNICALL MethodExitHook(jvmtiEnv* jvmtienv,
+                            JNIEnv* env,
+                            jthread thread,
+                            jmethodID m,
+                            jboolean was_popped_by_exception,
+                            jvalue val) {
+  jvmtiThreadInfo info;
+  if (thread == nullptr) {
+    info.name = const_cast<char*>("<NULLPTR>");
+  } else if (jvmtienv->GetThreadInfo(thread, &info) != JVMTI_ERROR_NONE) {
+    // LOG(WARNING) << "Unable to get thread info!";
+    info.name = const_cast<char*>("<UNKNOWN THREAD>");
+  }
+  char *fname, *fsig, *fgen;
+  char *cname, *cgen;
+  jclass klass = nullptr;
+  if (jvmtienv->GetMethodDeclaringClass(m, &klass) != JVMTI_ERROR_NONE) {
+    LOG(ERROR) << "Unable to get method declaring class!";
+    return;
+  }
+  if (jvmtienv->GetMethodName(m, &fname, &fsig, &fgen) != JVMTI_ERROR_NONE) {
+    LOG(ERROR) << "Unable to get method name!";
+    env->DeleteLocalRef(klass);
+    return;
+  }
+  if (jvmtienv->GetClassSignature(klass, &cname, &cgen) != JVMTI_ERROR_NONE) {
+    LOG(ERROR) << "Unable to get class name!";
+    env->DeleteLocalRef(klass);
+    return;
+  }
+  std::string type(fsig);
+  type = type.substr(type.find(")") + 1);
+  std::string out_val(was_popped_by_exception ? "" : GetValOf(jvmtienv, env, type, val));
+  LOG(INFO) << "Leaving method \"" << cname << "->" << fname << fsig << "\". Thread is \""
+            << info.name << "\"." << std::endl
+            << "    Cause: " << (was_popped_by_exception ? "exception" : "return ")
+            << out_val << ".";
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(cname));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(cgen));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fname));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fsig));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fgen));
+  env->DeleteLocalRef(klass);
+}
+
+void JNICALL MethodEntryHook(jvmtiEnv* jvmtienv,
+                             JNIEnv* env,
+                             jthread thread,
+                             jmethodID m) {
+  jvmtiThreadInfo info;
+  if (thread == nullptr) {
+    info.name = const_cast<char*>("<NULLPTR>");
+  } else if (jvmtienv->GetThreadInfo(thread, &info) != JVMTI_ERROR_NONE) {
+    info.name = const_cast<char*>("<UNKNOWN THREAD>");
+  }
+  char *fname, *fsig, *fgen;
+  char *cname, *cgen;
+  jclass klass = nullptr;
+  if (jvmtienv->GetMethodDeclaringClass(m, &klass) != JVMTI_ERROR_NONE) {
+    LOG(ERROR) << "Unable to get method declaring class!";
+    return;
+  }
+  if (jvmtienv->GetMethodName(m, &fname, &fsig, &fgen) != JVMTI_ERROR_NONE) {
+    LOG(ERROR) << "Unable to get method name!";
+    env->DeleteLocalRef(klass);
+    return;
+  }
+  if (jvmtienv->GetClassSignature(klass, &cname, &cgen) != JVMTI_ERROR_NONE) {
+    LOG(ERROR) << "Unable to get class name!";
+    env->DeleteLocalRef(klass);
+    return;
+  }
+  LOG(INFO) << "Entering method \"" << cname << "->" << fname << fsig << "\". Thread is \""
+            << info.name << "\"";
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(cname));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(cgen));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fname));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fsig));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fgen));
+  env->DeleteLocalRef(klass);
+}
+
 // The hook we are using.
 void JNICALL ClassFileLoadHookSecretNoOp(jvmtiEnv* jvmti,
                                          JNIEnv* jni_env ATTRIBUTE_UNUSED,
@@ -163,27 +310,57 @@
   }
 }
 
-// Options are ${DEXTER_BINARY},${TEMP_FILE_1},${TEMP_FILE_2}
-static void ReadOptions(StressData* data, char* options) {
-  std::string ops(options);
-  data->dexter_cmd = ops.substr(0, ops.find(','));
-  ops = ops.substr(ops.find(',') + 1);
-  data->in_temp_dex = ops.substr(0, ops.find(','));
-  ops = ops.substr(ops.find(',') + 1);
-  data->out_temp_dex = ops;
+static std::string AdvanceOption(const std::string& ops) {
+  return ops.substr(ops.find(',') + 1);
 }
 
-// We need to make sure that VMClassLoader is initialized before we start redefining anything since
-// it can give (non-fatal) error messages if it's initialized after we've redefined BCP classes.
-// These error messages are expected and no problem but they will mess up our testing
-// infrastructure.
-static void JNICALL EnsureVMClassloaderInitializedCB(jvmtiEnv *jvmti_env,
-                                                     JNIEnv* jni_env,
-                                                     jthread thread ATTRIBUTE_UNUSED) {
+static bool HasNextOption(const std::string& ops) {
+  return ops.find(',') != std::string::npos;
+}
+
+static std::string GetOption(const std::string& in) {
+  return in.substr(0, in.find(','));
+}
+
+// Options are
+// jvmti-stress,[redefine,${DEXTER_BINARY},${TEMP_FILE_1},${TEMP_FILE_2},][trace]
+static void ReadOptions(StressData* data, char* options) {
+  std::string ops(options);
+  CHECK_EQ(GetOption(ops), "jvmti-stress") << "Options should start with jvmti-stress";
+  do {
+    ops = AdvanceOption(ops);
+    std::string cur = GetOption(ops);
+    if (cur == "trace") {
+      data->trace_stress = true;
+    } else if (cur == "redefine") {
+      data->redefine_stress = true;
+      ops = AdvanceOption(ops);
+      data->dexter_cmd = GetOption(ops);
+      ops = AdvanceOption(ops);
+      data->in_temp_dex = GetOption(ops);
+      ops = AdvanceOption(ops);
+      data->out_temp_dex = GetOption(ops);
+    } else {
+      LOG(FATAL) << "Unknown option: " << GetOption(ops);
+    }
+  } while (HasNextOption(ops));
+}
+
+// Do final setup during the VMInit callback. By this time most things are all setup.
+static void JNICALL PerformFinalSetupVMInit(jvmtiEnv *jvmti_env,
+                                            JNIEnv* jni_env,
+                                            jthread thread ATTRIBUTE_UNUSED) {
   // Load the VMClassLoader class. We will get a ClassNotFound exception because we don't have
   // visibility but the class will be loaded behind the scenes.
   LOG(INFO) << "manual load & initialization of class java/lang/VMClassLoader!";
   jclass klass = jni_env->FindClass("java/lang/VMClassLoader");
+  StressData* data = nullptr;
+  CHECK_EQ(jvmti_env->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&data)),
+           JVMTI_ERROR_NONE);
+  // We need to make sure that VMClassLoader is initialized before we start redefining anything
+  // since it can give (non-fatal) error messages if it's initialized after we've redefined BCP
+  // classes. These error messages are expected and no problem but they will mess up our testing
+  // infrastructure.
   if (klass == nullptr) {
     // Probably on RI. Clear the exception so we can continue but don't mark vmclassloader as
     // initialized.
@@ -193,11 +370,20 @@
     // GetMethodID is spec'd to cause the class to be initialized.
     jni_env->GetMethodID(klass, "hashCode", "()I");
     jni_env->DeleteLocalRef(klass);
-    StressData* data = nullptr;
-    CHECK_EQ(jvmti_env->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&data)),
-             JVMTI_ERROR_NONE);
     data->vm_class_loader_initialized = true;
   }
+  if (data->trace_stress) {
+    if (jvmti_env->SetEventNotificationMode(JVMTI_ENABLE,
+                                            JVMTI_EVENT_METHOD_ENTRY,
+                                            nullptr) != JVMTI_ERROR_NONE) {
+      LOG(ERROR) << "Unable to enable JVMTI_EVENT_METHOD_ENTRY event!";
+    }
+    if (jvmti_env->SetEventNotificationMode(JVMTI_ENABLE,
+                                        JVMTI_EVENT_METHOD_EXIT,
+                                        nullptr) != JVMTI_ERROR_NONE) {
+      LOG(ERROR) << "Unable to enable JVMTI_EVENT_METHOD_EXIT event!";
+    }
+  }
 }
 
 extern "C" JNIEXPORT jint JNICALL Agent_OnLoad(JavaVM* vm,
@@ -233,7 +419,9 @@
   memset(&cb, 0, sizeof(cb));
   cb.ClassFileLoadHook = ClassFileLoadHookSecretNoOp;
   cb.NativeMethodBind = doJvmtiMethodBind;
-  cb.VMInit = EnsureVMClassloaderInitializedCB;
+  cb.VMInit = PerformFinalSetupVMInit;
+  cb.MethodEntry = MethodEntryHook;
+  cb.MethodExit = MethodExitHook;
   if (jvmti->SetEventCallbacks(&cb, sizeof(cb)) != JVMTI_ERROR_NONE) {
     LOG(ERROR) << "Unable to set class file load hook cb!";
     return 1;
@@ -250,11 +438,13 @@
     LOG(ERROR) << "Unable to enable JVMTI_EVENT_VM_INIT event!";
     return 1;
   }
-  if (jvmti->SetEventNotificationMode(JVMTI_ENABLE,
-                                      JVMTI_EVENT_CLASS_FILE_LOAD_HOOK,
-                                      nullptr) != JVMTI_ERROR_NONE) {
-    LOG(ERROR) << "Unable to enable CLASS_FILE_LOAD_HOOK event!";
-    return 1;
+  if (data->redefine_stress) {
+    if (jvmti->SetEventNotificationMode(JVMTI_ENABLE,
+                                        JVMTI_EVENT_CLASS_FILE_LOAD_HOOK,
+                                        nullptr) != JVMTI_ERROR_NONE) {
+      LOG(ERROR) << "Unable to enable CLASS_FILE_LOAD_HOOK event!";
+      return 1;
+    }
   }
   return 0;
 }