Merge "Improve profile processing"
diff --git a/Android.mk b/Android.mk
index 34022ae..4f73127 100644
--- a/Android.mk
+++ b/Android.mk
@@ -400,6 +400,35 @@
 	$(TEST_ART_ADB_ROOT_AND_REMOUNT)
 	adb sync
 
+####################################################################################################
+# Fake packages to ensure generation of libopenjdkd when one builds with mm/mmm/mmma.
+#
+# The library is required for starting a runtime in debug mode, but libartd does not depend on it
+# (dependency cycle otherwise).
+#
+# Note: * As the package is phony to create a dependency the package name is irrelevant.
+#       * We make MULTILIB explicit to "both," just to state here that we want both libraries on
+#         64-bit systems, even if it is the default.
+
+# ART on the host.
+ifeq ($(ART_BUILD_HOST_DEBUG),true)
+include $(CLEAR_VARS)
+LOCAL_MODULE := art-libartd-libopenjdkd-host-dependency
+LOCAL_MULTILIB := both
+LOCAL_REQUIRED_MODULES := libopenjdkd
+LOCAL_IS_HOST_MODULE := true
+include $(BUILD_PHONY_PACKAGE)
+endif
+
+# ART on the target.
+ifeq ($(ART_BUILD_TARGET_DEBUG),true)
+include $(CLEAR_VARS)
+LOCAL_MODULE := art-libartd-libopenjdkd-target-dependency
+LOCAL_MULTILIB := both
+LOCAL_REQUIRED_MODULES := libopenjdkd
+include $(BUILD_PHONY_PACKAGE)
+endif
+
 ########################################################################
 # "m build-art" for quick minimal build
 .PHONY: build-art
diff --git a/build/Android.cpplint.mk b/build/Android.cpplint.mk
index 953cfc0..a06f45a 100644
--- a/build/Android.cpplint.mk
+++ b/build/Android.cpplint.mk
@@ -16,10 +16,14 @@
 
 include art/build/Android.common_build.mk
 
-ART_CPPLINT := art/tools/cpplint.py
+ART_CPPLINT := $(LOCAL_PATH)/tools/cpplint.py
 ART_CPPLINT_FILTER := --filter=-whitespace/line_length,-build/include,-readability/function,-readability/streams,-readability/todo,-runtime/references,-runtime/sizeof,-runtime/threadsafe_fn,-runtime/printf
 ART_CPPLINT_FLAGS := --quiet
-ART_CPPLINT_SRC := $(shell find art -name "*.h" -o -name "*$(ART_CPP_EXTENSION)" | grep -v art/compiler/llvm/generated/ | grep -v art/runtime/elf\.h)
+# This:
+#  1) Gets a list of all .h & .cc files in the art directory.
+#  2) Prepends 'art/' to each of them to make the full name.
+#  3) removes art/runtime/elf.h from the list.
+ART_CPPLINT_SRC := $(filter-out $(LOCAL_PATH)/runtime/elf.h, $(addprefix $(LOCAL_PATH)/, $(call all-subdir-named-files,*.h) $(call all-subdir-named-files,*$(ART_CPP_EXTENSION))))
 
 # "mm cpplint-art" to verify we aren't regressing
 .PHONY: cpplint-art
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 2fec791..d38cd63 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -446,7 +446,7 @@
     $(foreach file,$(ART_GTEST_$(1)_DEX_DEPS),$(ART_TEST_TARGET_GTEST_$(file)_DEX)) \
     $$(ART_TARGET_NATIVETEST_OUT)/$$(TARGET_$(2)ARCH)/$(1) \
     $$($(2)TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so \
-    $$($(2)TARGET_OUT_SHARED_LIBRARIES)/libopenjdk.so \
+    $$($(2)TARGET_OUT_SHARED_LIBRARIES)/libopenjdkd.so \
     $$(TARGET_OUT_JAVA_LIBRARIES)/core-libart-testdex.jar \
     $$(TARGET_OUT_JAVA_LIBRARIES)/core-oj-testdex.jar
 
@@ -490,7 +490,7 @@
   # Dependencies for all host gtests.
   gtest_deps := $$(HOST_CORE_DEX_LOCATIONS) \
     $$($(2)ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$$(ART_HOST_SHLIB_EXTENSION) \
-    $$($(2)ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdk$$(ART_HOST_SHLIB_EXTENSION) \
+    $$($(2)ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkd$$(ART_HOST_SHLIB_EXTENSION) \
     $$(gtest_exe) \
     $$(ART_GTEST_$(1)_HOST_DEPS) \
     $(foreach file,$(ART_GTEST_$(1)_DEX_DEPS),$(ART_TEST_HOST_GTEST_$(file)_DEX))
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 9e69312..afc8463 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -210,8 +210,6 @@
                                             2,
                                             true,
                                             true,
-                                            "",
-                                            false,
                                             timer_.get(),
                                             -1,
                                             /* dex_to_oat_map */ nullptr,
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 32d7518..4617668 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -110,9 +110,9 @@
 static_assert(kIntrinsicIsStatic[kIntrinsicAbsFloat], "AbsFloat must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicAbsDouble], "AbsDouble must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxInt], "MinMaxInt must be static");
-static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxLong], "MinMaxLong_must_be_static");
-static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxFloat], "MinMaxFloat_must_be_static");
-static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxDouble], "MinMaxDouble_must_be_static");
+static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxLong], "MinMaxLong must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxFloat], "MinMaxFloat must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxDouble], "MinMaxDouble must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicCos], "Cos must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicSin], "Sin must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicAcos], "Acos must be static");
@@ -153,7 +153,7 @@
 static_assert(kIntrinsicIsStatic[kIntrinsicPeek], "Peek must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicPoke], "Poke must be static");
 static_assert(!kIntrinsicIsStatic[kIntrinsicCas], "Cas must not be static");
-static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGet], "UnsafeGet_must_not_be_static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGet], "UnsafeGet must not be static");
 static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafePut], "UnsafePut must not be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicSystemArrayCopyCharArray],
               "SystemArrayCopyCharArray must be static");
diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc
index 12568a4..c5df134 100644
--- a/compiler/dex/quick/quick_cfi_test.cc
+++ b/compiler/dex/quick/quick_cfi_test.cc
@@ -69,6 +69,8 @@
       false,
       nullptr,
       nullptr,
+      false,
+      "",
       false);
     VerificationResults verification_results(&compiler_options);
     DexFileToMethodInlinerMap method_inliner_map;
@@ -88,8 +90,6 @@
                           0,
                           false,
                           false,
-                          "",
-                          false,
                           0,
                           -1,
                           nullptr,
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 3260a7a..ebc9a2c 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -520,7 +520,12 @@
 // In the rare cases we compile experimental opcodes, the runtime has an option to enable it,
 // which will force scanning for any unsupported opcodes.
 static bool SkipScanningUnsupportedOpcodes(InstructionSet instruction_set) {
-  if (UNLIKELY(kUnsupportedOpcodesSize[instruction_set] == 0U)) {
+  Runtime* runtime = Runtime::Current();
+  if (UNLIKELY(runtime->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods))) {
+    // Always need to scan opcodes if we have default methods since invoke-super for interface
+    // methods is never going to be supported in the quick compiler.
+    return false;
+  } else if (UNLIKELY(kUnsupportedOpcodesSize[instruction_set] == 0U)) {
     // All opcodes are supported no matter what. Usually not the case
     // since experimental opcodes are not implemented in the quick compiler.
     return true;
@@ -538,8 +543,28 @@
   }
 }
 
+bool QuickCompiler::CanCompileInstruction(const MIR* mir,
+                                          const DexFile& dex_file) const {
+  switch (mir->dalvikInsn.opcode) {
+    // Quick compiler won't support new instruction semantics to invoke-super into an interface
+    // method
+    case Instruction::INVOKE_SUPER:  // Fall-through
+    case Instruction::INVOKE_SUPER_RANGE: {
+      DCHECK(mir->dalvikInsn.IsInvoke());
+      uint32_t invoke_method_idx = mir->dalvikInsn.vB;
+      const DexFile::MethodId& method_id = dex_file.GetMethodId(invoke_method_idx);
+      const DexFile::ClassDef* class_def = dex_file.FindClassDef(method_id.class_idx_);
+      // False if we are an interface i.e. !(java_access_flags & kAccInterface)
+      return class_def != nullptr && ((class_def->GetJavaAccessFlags() & kAccInterface) == 0);
+    }
+    default:
+      return true;
+  }
+}
+
 // Skip the method that we do not support currently.
-bool QuickCompiler::CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
+bool QuickCompiler::CanCompileMethod(uint32_t method_idx,
+                                     const DexFile& dex_file,
                                      CompilationUnit* cu) const {
   // This is a limitation in mir_graph. See MirGraph::SetNumSSARegs.
   if (cu->mir_graph->GetNumOfCodeAndTempVRs() > kMaxAllowedDalvikRegisters) {
@@ -580,6 +605,9 @@
               << MIRGraph::extended_mir_op_names_[opcode - kMirOpFirst];
         }
         return false;
+      } else if (!CanCompileInstruction(mir, dex_file)) {
+        VLOG(compiler) << "Cannot compile dalvik opcode : " << mir->dalvikInsn.opcode;
+        return false;
       }
       // Check if it invokes a prototype that we cannot support.
       if (std::find(kInvokeOpcodes, kInvokeOpcodes + arraysize(kInvokeOpcodes), opcode)
diff --git a/compiler/dex/quick/quick_compiler.h b/compiler/dex/quick/quick_compiler.h
index d512b25..55f45f1 100644
--- a/compiler/dex/quick/quick_compiler.h
+++ b/compiler/dex/quick/quick_compiler.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_DEX_QUICK_QUICK_COMPILER_H_
 
 #include "compiler.h"
+#include "dex/mir_graph.h"
 
 namespace art {
 
@@ -74,6 +75,8 @@
   explicit QuickCompiler(CompilerDriver* driver);
 
  private:
+  bool CanCompileInstruction(const MIR* mir, const DexFile& dex_file) const;
+
   std::unique_ptr<PassManager> pre_opt_pass_manager_;
   std::unique_ptr<PassManager> post_opt_pass_manager_;
   DISALLOW_COPY_AND_ASSIGN(QuickCompiler);
diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
index b39fe4d..d63878d 100644
--- a/compiler/dex/quick/x86/quick_assemble_x86_test.cc
+++ b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
@@ -52,6 +52,8 @@
         false,
         nullptr,
         nullptr,
+        false,
+        "",
         false));
     verification_results_.reset(new VerificationResults(compiler_options_.get()));
     method_inliner_map_.reset(new DexFileToMethodInlinerMap());
@@ -69,8 +71,6 @@
         0,
         false,
         false,
-        "",
-        false,
         0,
         -1,
         nullptr,
diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc
index f18fa67..2e2d1f9 100644
--- a/compiler/driver/compiled_method_storage_test.cc
+++ b/compiler/driver/compiled_method_storage_test.cc
@@ -41,8 +41,6 @@
                         1u,
                         false,
                         false,
-                        "",
-                        false,
                         nullptr,
                         -1,
                         nullptr,
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 043bd93..d021525 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -345,7 +345,6 @@
     std::unordered_set<std::string>* compiled_classes,
     std::unordered_set<std::string>* compiled_methods,
     size_t thread_count, bool dump_stats, bool dump_passes,
-    const std::string& dump_cfg_file_name, bool dump_cfg_append,
     CumulativeLogger* timer, int swap_fd,
     const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map,
     const ProfileCompilationInfo* profile_compilation_info)
@@ -370,8 +369,6 @@
       stats_(new AOTCompilationStats),
       dump_stats_(dump_stats),
       dump_passes_(dump_passes),
-      dump_cfg_file_name_(dump_cfg_file_name),
-      dump_cfg_append_(dump_cfg_append),
       timings_logger_(timer),
       compiler_context_(nullptr),
       support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64),
@@ -1197,15 +1194,18 @@
   if (equals_referrers_class != nullptr) {
     *equals_referrers_class = (method_id.class_idx_ == type_idx);
   }
-  mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
-  if (referrer_class == nullptr) {
-    stats_->TypeNeedsAccessCheck();
-    return false;  // Incomplete referrer knowledge needs access check.
+  bool is_accessible = resolved_class->IsPublic();  // Public classes are always accessible.
+  if (!is_accessible) {
+    mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
+    if (referrer_class == nullptr) {
+      stats_->TypeNeedsAccessCheck();
+      return false;  // Incomplete referrer knowledge needs access check.
+    }
+    // Perform access check, will return true if access is ok or false if we're going to have to
+    // check this at runtime (for example for class loaders).
+    is_accessible = referrer_class->CanAccess(resolved_class);
   }
-  // Perform access check, will return true if access is ok or false if we're going to have to
-  // check this at runtime (for example for class loaders).
-  bool result = referrer_class->CanAccess(resolved_class);
-  if (result) {
+  if (is_accessible) {
     stats_->TypeDoesntNeedAccessCheck();
     if (type_known_final != nullptr) {
       *type_known_final = resolved_class->IsFinal() && !resolved_class->IsArrayClass();
@@ -1216,7 +1216,7 @@
   } else {
     stats_->TypeNeedsAccessCheck();
   }
-  return result;
+  return is_accessible;
 }
 
 bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx,
@@ -1236,14 +1236,18 @@
   }
   *finalizable = resolved_class->IsFinalizable();
   const DexFile::MethodId& method_id = dex_file.GetMethodId(referrer_idx);
-  mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
-  if (referrer_class == nullptr) {
-    stats_->TypeNeedsAccessCheck();
-    return false;  // Incomplete referrer knowledge needs access check.
+  bool is_accessible = resolved_class->IsPublic();  // Public classes are always accessible.
+  if (!is_accessible) {
+    mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
+    if (referrer_class == nullptr) {
+      stats_->TypeNeedsAccessCheck();
+      return false;  // Incomplete referrer knowledge needs access check.
+    }
+    // Perform access and instantiable checks, will return true if access is ok or false if we're
+    // going to have to check this at runtime (for example for class loaders).
+    is_accessible = referrer_class->CanAccess(resolved_class);
   }
-  // Perform access and instantiable checks, will return true if access is ok or false if we're
-  // going to have to check this at runtime (for example for class loaders).
-  bool result = referrer_class->CanAccess(resolved_class) && resolved_class->IsInstantiable();
+  bool result = is_accessible && resolved_class->IsInstantiable();
   if (result) {
     stats_->TypeDoesntNeedAccessCheck();
   } else {
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 3847c81..6a2f7bf 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -95,7 +95,6 @@
                  std::unordered_set<std::string>* compiled_classes,
                  std::unordered_set<std::string>* compiled_methods,
                  size_t thread_count, bool dump_stats, bool dump_passes,
-                 const std::string& dump_cfg_file_name, bool dump_cfg_append,
                  CumulativeLogger* timer, int swap_fd,
                  const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map,
                  const ProfileCompilationInfo* profile_compilation_info);
@@ -122,8 +121,10 @@
       return true;
     }
     auto it1 = dex_file_oat_filename_map_->find(d1);
+    DCHECK(it1 != dex_file_oat_filename_map_->end());
     auto it2 = dex_file_oat_filename_map_->find(d2);
-    return it1 == it2;
+    DCHECK(it2 != dex_file_oat_filename_map_->end());
+    return it1->second == it2->second;
   }
 
   void CompileAll(jobject class_loader,
@@ -421,14 +422,6 @@
     return dump_passes_;
   }
 
-  const std::string& GetDumpCfgFileName() const {
-    return dump_cfg_file_name_;
-  }
-
-  bool GetDumpCfgAppend() const {
-    return dump_cfg_append_;
-  }
-
   CumulativeLogger* GetTimingsLogger() const {
     return timings_logger_;
   }
@@ -666,8 +659,6 @@
 
   bool dump_stats_;
   const bool dump_passes_;
-  const std::string dump_cfg_file_name_;
-  const bool dump_cfg_append_;
 
   CumulativeLogger* const timings_logger_;
 
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 209bb5a..2644528 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -44,7 +44,9 @@
       verbose_methods_(nullptr),
       pass_manager_options_(),
       abort_on_hard_verifier_failure_(false),
-      init_failure_output_(nullptr) {
+      init_failure_output_(nullptr),
+      dump_cfg_file_name_(""),
+      dump_cfg_append_(false) {
 }
 
 CompilerOptions::~CompilerOptions() {
@@ -71,7 +73,9 @@
                                  bool compile_pic,
                                  const std::vector<std::string>* verbose_methods,
                                  std::ostream* init_failure_output,
-                                 bool abort_on_hard_verifier_failure
+                                 bool abort_on_hard_verifier_failure,
+                                 const std::string& dump_cfg_file_name,
+                                 bool dump_cfg_append
                                  ) :  // NOLINT(whitespace/parens)
     compiler_filter_(compiler_filter),
     huge_method_threshold_(huge_method_threshold),
@@ -94,7 +98,9 @@
     verbose_methods_(verbose_methods),
     pass_manager_options_(),
     abort_on_hard_verifier_failure_(abort_on_hard_verifier_failure),
-    init_failure_output_(init_failure_output) {
+    init_failure_output_(init_failure_output),
+    dump_cfg_file_name_(dump_cfg_file_name),
+    dump_cfg_append_(dump_cfg_append) {
 }
 
 void CompilerOptions::ParseHugeMethodMax(const StringPiece& option, UsageFn Usage) {
@@ -211,11 +217,9 @@
     generate_debug_info_ = false;
   } else if (option == "--debuggable") {
     debuggable_ = true;
-    generate_debug_info_ = true;
   } else if (option == "--native-debuggable") {
     native_debuggable_ = true;
     debuggable_ = true;
-    generate_debug_info_ = true;
   } else if (option.starts_with("--top-k-profile-threshold=")) {
     ParseDouble(option.data(), '=', 0.0, 100.0, &top_k_profile_threshold_, Usage);
   } else if (option == "--include-patch-information") {
@@ -240,6 +244,10 @@
     ParsePassOptions(option, Usage);
   } else if (option.starts_with("--dump-init-failures=")) {
     ParseDumpInitFailures(option, Usage);
+  } else if (option.starts_with("--dump-cfg=")) {
+    dump_cfg_file_name_ = option.substr(strlen("--dump-cfg=")).data();
+  } else if (option.starts_with("--dump-cfg-append")) {
+    dump_cfg_append_ = true;
   } else {
     // Option not recognized.
     return false;
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index f8032bb..d47fc2a 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -50,7 +50,7 @@
   static const size_t kDefaultNumDexMethodsThreshold = 900;
   static constexpr double kDefaultTopKProfileThreshold = 90.0;
   static const bool kDefaultNativeDebuggable = false;
-  static const bool kDefaultGenerateDebugInfo = kIsDebugBuild;
+  static const bool kDefaultGenerateDebugInfo = false;
   static const bool kDefaultIncludePatchInformation = false;
   static const size_t kDefaultInlineDepthLimit = 3;
   static const size_t kDefaultInlineMaxCodeUnits = 32;
@@ -83,7 +83,9 @@
                   bool compile_pic,
                   const std::vector<std::string>* verbose_methods,
                   std::ostream* init_failure_output,
-                  bool abort_on_hard_verifier_failure);
+                  bool abort_on_hard_verifier_failure,
+                  const std::string& dump_cfg_file_name,
+                  bool dump_cfg_append);
 
   CompilerFilter GetCompilerFilter() const {
     return compiler_filter_;
@@ -224,6 +226,14 @@
 
   bool ParseCompilerOption(const StringPiece& option, UsageFn Usage);
 
+  const std::string& GetDumpCfgFileName() const {
+    return dump_cfg_file_name_;
+  }
+
+  bool GetDumpCfgAppend() const {
+    return dump_cfg_append_;
+  }
+
  private:
   void ParseDumpInitFailures(const StringPiece& option, UsageFn Usage);
   void ParsePassOptions(const StringPiece& option, UsageFn Usage);
@@ -273,6 +283,9 @@
   // Log initialization of initialization failures to this stream if not null.
   std::unique_ptr<std::ostream> init_failure_output_;
 
+  std::string dump_cfg_file_name_;
+  bool dump_cfg_append_;
+
   friend class Dex2Oat;
 
   DISALLOW_COPY_AND_ASSIGN(CompilerOptions);
diff --git a/compiler/dwarf/method_debug_info.h b/compiler/dwarf/method_debug_info.h
index a391e4d..e8ba914 100644
--- a/compiler/dwarf/method_debug_info.h
+++ b/compiler/dwarf/method_debug_info.h
@@ -30,8 +30,8 @@
   uint32_t access_flags_;
   const DexFile::CodeItem* code_item_;
   bool deduped_;
-  uint32_t low_pc_;
-  uint32_t high_pc_;
+  uintptr_t low_pc_;
+  uintptr_t high_pc_;
   CompiledMethod* compiled_method_;
 };
 
diff --git a/compiler/dwarf/register.h b/compiler/dwarf/register.h
index b67e8dd..35b3e15 100644
--- a/compiler/dwarf/register.h
+++ b/compiler/dwarf/register.h
@@ -29,7 +29,7 @@
   // TODO: Arm S0–S31 register mapping is obsolescent.
   //   We should use VFP-v3/Neon D0-D31 mapping instead.
   //   However, D0 is aliased to pair of S0 and S1, so using that
-  //   mapping we can not easily say S0 is spilled and S1 is not.
+  //   mapping we cannot easily say S0 is spilled and S1 is not.
   //   There are ways around this in DWARF but they are complex.
   //   It would be much simpler to always spill whole D registers.
   //   Arm64 mapping is correct since we already do this there.
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index bb07cc2..a7461a5 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -148,6 +148,12 @@
       }
     }
 
+    // Returns true if the section was written to disk.
+    // (Used to check whether we have .text when writing JIT debug info)
+    bool Exists() const {
+      return finished_;
+    }
+
     // Get the location of this section in virtual memory.
     Elf_Addr GetAddress() const {
       CHECK(started_);
@@ -247,16 +253,18 @@
     }
 
     // Buffer symbol for this section.  It will be written later.
+    // If the symbol's section is null, it will be considered absolute (SHN_ABS).
+    // (we use this in JIT to reference code which is stored outside the debug ELF file)
     void Add(Elf_Word name, const Section* section,
              Elf_Addr addr, bool is_relative, Elf_Word size,
              uint8_t binding, uint8_t type, uint8_t other = 0) {
-      CHECK(section != nullptr);
       Elf_Sym sym = Elf_Sym();
       sym.st_name = name;
       sym.st_value = addr + (is_relative ? section->GetAddress() : 0);
       sym.st_size = size;
       sym.st_other = other;
-      sym.st_shndx = section->GetSectionIndex();
+      sym.st_shndx = (section != nullptr ? section->GetSectionIndex()
+                                         : static_cast<Elf_Word>(SHN_ABS));
       sym.st_info = (binding << 4) + (type & 0xf);
       symbols_.push_back(sym);
     }
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index 2bc8c89..e03614f 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -22,16 +22,20 @@
 #include "base/casts.h"
 #include "base/stl_util.h"
 #include "compiled_method.h"
-#include "driver/compiler_driver.h"
 #include "dex_file-inl.h"
+#include "driver/compiler_driver.h"
 #include "dwarf/dedup_vector.h"
 #include "dwarf/headers.h"
 #include "dwarf/method_debug_info.h"
 #include "dwarf/register.h"
 #include "elf_builder.h"
+#include "linker/vector_output_stream.h"
+#include "mirror/array.h"
+#include "mirror/class-inl.h"
+#include "mirror/class.h"
 #include "oat_writer.h"
-#include "utils.h"
 #include "stack_map.h"
+#include "utils.h"
 
 namespace art {
 namespace dwarf {
@@ -208,7 +212,7 @@
     case kNone:
       break;
   }
-  LOG(FATAL) << "Can not write CIE frame for ISA " << isa;
+  LOG(FATAL) << "Cannot write CIE frame for ISA " << isa;
   UNREACHABLE();
 }
 
@@ -219,6 +223,10 @@
   CHECK(format == DW_DEBUG_FRAME_FORMAT || format == DW_EH_FRAME_FORMAT);
   typedef typename ElfTypes::Addr Elf_Addr;
 
+  if (method_infos.empty()) {
+    return;
+  }
+
   std::vector<uint32_t> binary_search_table;
   std::vector<uintptr_t> patch_locations;
   if (format == DW_EH_FRAME_FORMAT) {
@@ -234,7 +242,9 @@
   {
     cfi_section->Start();
     const bool is64bit = Is64BitInstructionSet(builder->GetIsa());
-    const Elf_Addr text_address = builder->GetText()->GetAddress();
+    const Elf_Addr text_address = builder->GetText()->Exists()
+        ? builder->GetText()->GetAddress()
+        : 0;
     const Elf_Addr cfi_address = cfi_section->GetAddress();
     const Elf_Addr cie_address = cfi_address;
     Elf_Addr buffer_address = cfi_address;
@@ -305,8 +315,8 @@
   struct CompilationUnit {
     std::vector<const MethodDebugInfo*> methods_;
     size_t debug_line_offset_ = 0;
-    uint32_t low_pc_ = 0xFFFFFFFFU;
-    uint32_t high_pc_ = 0;
+    uintptr_t low_pc_ = std::numeric_limits<uintptr_t>::max();
+    uintptr_t high_pc_ = 0;
   };
 
   typedef std::vector<DexFile::LocalInfo> LocalInfos;
@@ -439,14 +449,17 @@
 
     void Write(const CompilationUnit& compilation_unit) {
       CHECK(!compilation_unit.methods_.empty());
-      const Elf_Addr text_address = owner_->builder_->GetText()->GetAddress();
+      const Elf_Addr text_address = owner_->builder_->GetText()->Exists()
+          ? owner_->builder_->GetText()->GetAddress()
+          : 0;
+      const uintptr_t cu_size = compilation_unit.high_pc_ - compilation_unit.low_pc_;
 
       info_.StartTag(DW_TAG_compile_unit);
       info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat"));
       info_.WriteData1(DW_AT_language, DW_LANG_Java);
       info_.WriteStrp(DW_AT_comp_dir, owner_->WriteString("$JAVA_SRC_ROOT"));
       info_.WriteAddr(DW_AT_low_pc, text_address + compilation_unit.low_pc_);
-      info_.WriteUdata(DW_AT_high_pc, compilation_unit.high_pc_ - compilation_unit.low_pc_);
+      info_.WriteUdata(DW_AT_high_pc, dchecked_integral_cast<uint32_t>(cu_size));
       info_.WriteSecOffset(DW_AT_stmt_list, compilation_unit.debug_line_offset_);
 
       const char* last_dex_class_desc = nullptr;
@@ -464,8 +477,16 @@
           if (last_dex_class_desc != nullptr) {
             EndClassTag(last_dex_class_desc);
           }
-          size_t offset = StartClassTag(dex_class_desc);
-          type_cache_.emplace(dex_class_desc, offset);
+          // Write reference tag for the class we are about to declare.
+          size_t reference_tag_offset = info_.StartTag(DW_TAG_reference_type);
+          type_cache_.emplace(std::string(dex_class_desc), reference_tag_offset);
+          size_t type_attrib_offset = info_.size();
+          info_.WriteRef4(DW_AT_type, 0);
+          info_.EndTag();
+          // Declare the class that owns this method.
+          size_t class_offset = StartClassTag(dex_class_desc);
+          info_.UpdateUint32(type_attrib_offset, class_offset);
+          info_.WriteFlag(DW_AT_declaration, true);
           // Check that each class is defined only once.
           bool unique = owner_->defined_dex_classes_.insert(dex_class_desc).second;
           CHECK(unique) << "Redefinition of " << dex_class_desc;
@@ -476,7 +497,7 @@
         info_.StartTag(DW_TAG_subprogram);
         WriteName(dex->GetMethodName(dex_method));
         info_.WriteAddr(DW_AT_low_pc, text_address + mi->low_pc_);
-        info_.WriteUdata(DW_AT_high_pc, mi->high_pc_ - mi->low_pc_);
+        info_.WriteUdata(DW_AT_high_pc, dchecked_integral_cast<uint32_t>(mi->high_pc_-mi->low_pc_));
         uint8_t frame_base[] = { DW_OP_call_frame_cfa };
         info_.WriteExprLoc(DW_AT_frame_base, &frame_base, sizeof(frame_base));
         WriteLazyType(dex->GetReturnTypeDescriptor(dex_proto));
@@ -562,6 +583,107 @@
       owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size());
     }
 
+    void Write(const ArrayRef<mirror::Class*>& types) SHARED_REQUIRES(Locks::mutator_lock_) {
+      info_.StartTag(DW_TAG_compile_unit);
+      info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat"));
+      info_.WriteData1(DW_AT_language, DW_LANG_Java);
+
+      for (mirror::Class* type : types) {
+        if (type->IsPrimitive()) {
+          // For primitive types the definition and the declaration is the same.
+          if (type->GetPrimitiveType() != Primitive::kPrimVoid) {
+            WriteTypeDeclaration(type->GetDescriptor(nullptr));
+          }
+        } else if (type->IsArrayClass()) {
+          mirror::Class* element_type = type->GetComponentType();
+          uint32_t component_size = type->GetComponentSize();
+          uint32_t data_offset = mirror::Array::DataOffset(component_size).Uint32Value();
+          uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
+
+          info_.StartTag(DW_TAG_array_type);
+          std::string descriptor_string;
+          WriteLazyType(element_type->GetDescriptor(&descriptor_string));
+          info_.WriteUdata(DW_AT_data_member_location, data_offset);
+          info_.StartTag(DW_TAG_subrange_type);
+          DCHECK_LT(length_offset, 32u);
+          uint8_t count[] = {
+            DW_OP_push_object_address,
+            static_cast<uint8_t>(DW_OP_lit0 + length_offset),
+            DW_OP_plus,
+            DW_OP_deref_size,
+            4  // Array length is always 32-bit wide.
+          };
+          info_.WriteExprLoc(DW_AT_count, &count, sizeof(count));
+          info_.EndTag();  // DW_TAG_subrange_type.
+          info_.EndTag();  // DW_TAG_array_type.
+        } else {
+          std::string descriptor_string;
+          const char* desc = type->GetDescriptor(&descriptor_string);
+          StartClassTag(desc);
+
+          if (!type->IsVariableSize()) {
+            info_.WriteUdata(DW_AT_byte_size, type->GetObjectSize());
+          }
+
+          // Base class.
+          mirror::Class* base_class = type->GetSuperClass();
+          if (base_class != nullptr) {
+            info_.StartTag(DW_TAG_inheritance);
+            WriteLazyType(base_class->GetDescriptor(&descriptor_string));
+            info_.WriteUdata(DW_AT_data_member_location, 0);
+            info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_public);
+            info_.EndTag();  // DW_TAG_inheritance.
+          }
+
+          // Member variables.
+          for (uint32_t i = 0, count = type->NumInstanceFields(); i < count; ++i) {
+            ArtField* field = type->GetInstanceField(i);
+            info_.StartTag(DW_TAG_member);
+            WriteName(field->GetName());
+            WriteLazyType(field->GetTypeDescriptor());
+            info_.WriteUdata(DW_AT_data_member_location, field->GetOffset().Uint32Value());
+            uint32_t access_flags = field->GetAccessFlags();
+            if (access_flags & kAccPublic) {
+              info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_public);
+            } else if (access_flags & kAccProtected) {
+              info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_protected);
+            } else if (access_flags & kAccPrivate) {
+              info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_private);
+            }
+            info_.EndTag();  // DW_TAG_member.
+          }
+
+          if (type->IsStringClass()) {
+            // Emit debug info about an artifical class member for java.lang.String which represents
+            // the first element of the data stored in a string instance. Consumers of the debug
+            // info will be able to read the content of java.lang.String based on the count (real
+            // field) and based on the location of this data member.
+            info_.StartTag(DW_TAG_member);
+            WriteName("value");
+            // We don't support fields with C like array types so we just say its type is java char.
+            WriteLazyType("C");  // char.
+            info_.WriteUdata(DW_AT_data_member_location,
+                             mirror::String::ValueOffset().Uint32Value());
+            info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_private);
+            info_.EndTag();  // DW_TAG_member.
+          }
+
+          EndClassTag(desc);
+        }
+      }
+
+      CHECK_EQ(info_.Depth(), 1);
+      FinishLazyTypes();
+      info_.EndTag();  // DW_TAG_compile_unit.
+      std::vector<uint8_t> buffer;
+      buffer.reserve(info_.data()->size() + KB);
+      const size_t offset = owner_->builder_->GetDebugInfo()->GetSize();
+      const size_t debug_abbrev_offset =
+          owner_->debug_abbrev_.Insert(debug_abbrev_.data(), debug_abbrev_.size());
+      WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_);
+      owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size());
+    }
+
     // Write table into .debug_loc which describes location of dex register.
     // The dex register might be valid only at some points and it might
     // move between machine registers and stack.
@@ -715,14 +837,14 @@
     // just define all types lazily at the end of compilation unit.
     void WriteLazyType(const char* type_descriptor) {
       if (type_descriptor != nullptr && type_descriptor[0] != 'V') {
-        lazy_types_.emplace(type_descriptor, info_.size());
+        lazy_types_.emplace(std::string(type_descriptor), info_.size());
         info_.WriteRef4(DW_AT_type, 0);
       }
     }
 
     void FinishLazyTypes() {
       for (const auto& lazy_type : lazy_types_) {
-        info_.UpdateUint32(lazy_type.second, WriteType(lazy_type.first));
+        info_.UpdateUint32(lazy_type.second, WriteTypeDeclaration(lazy_type.first));
       }
       lazy_types_.clear();
     }
@@ -747,30 +869,41 @@
 
     // Convert dex type descriptor to DWARF.
     // Returns offset in the compilation unit.
-    size_t WriteType(const char* desc) {
+    size_t WriteTypeDeclaration(const std::string& desc) {
+      DCHECK(!desc.empty());
       const auto& it = type_cache_.find(desc);
       if (it != type_cache_.end()) {
         return it->second;
       }
 
       size_t offset;
-      if (*desc == 'L') {
+      if (desc[0] == 'L') {
         // Class type. For example: Lpackage/name;
-        offset = StartClassTag(desc);
+        size_t class_offset = StartClassTag(desc.c_str());
         info_.WriteFlag(DW_AT_declaration, true);
-        EndClassTag(desc);
-      } else if (*desc == '[') {
+        EndClassTag(desc.c_str());
+        // Reference to the class type.
+        offset = info_.StartTag(DW_TAG_reference_type);
+        info_.WriteRef(DW_AT_type, class_offset);
+        info_.EndTag();
+      } else if (desc[0] == '[') {
         // Array type.
-        size_t element_type = WriteType(desc + 1);
-        offset = info_.StartTag(DW_TAG_array_type);
+        size_t element_type = WriteTypeDeclaration(desc.substr(1));
+        size_t array_type = info_.StartTag(DW_TAG_array_type);
+        info_.WriteFlag(DW_AT_declaration, true);
         info_.WriteRef(DW_AT_type, element_type);
         info_.EndTag();
+        offset = info_.StartTag(DW_TAG_reference_type);
+        info_.WriteRef4(DW_AT_type, array_type);
+        info_.EndTag();
       } else {
         // Primitive types.
+        DCHECK_EQ(desc.size(), 1u);
+
         const char* name;
         uint32_t encoding;
         uint32_t byte_size;
-        switch (*desc) {
+        switch (desc[0]) {
         case 'B':
           name = "byte";
           encoding = DW_ATE_signed;
@@ -815,7 +948,7 @@
           LOG(FATAL) << "Void type should not be encoded";
           UNREACHABLE();
         default:
-          LOG(FATAL) << "Unknown dex type descriptor: " << desc;
+          LOG(FATAL) << "Unknown dex type descriptor: \"" << desc << "\"";
           UNREACHABLE();
         }
         offset = info_.StartTag(DW_TAG_base_type);
@@ -865,9 +998,10 @@
     // Temporary buffer to create and store the entries.
     DebugInfoEntryWriter<> info_;
     // Cache of already translated type descriptors.
-    std::map<const char*, size_t, CStringLess> type_cache_;  // type_desc -> definition_offset.
+    std::map<std::string, size_t> type_cache_;  // type_desc -> definition_offset.
     // 32-bit references which need to be resolved to a type later.
-    std::multimap<const char*, size_t, CStringLess> lazy_types_;  // type_desc -> patch_offset.
+    // Given type may be used multiple times.  Therefore we need a multimap.
+    std::multimap<std::string, size_t> lazy_types_;  // type_desc -> patch_offset.
   };
 
  public:
@@ -883,6 +1017,11 @@
     writer.Write(compilation_unit);
   }
 
+  void WriteTypes(const ArrayRef<mirror::Class*>& types) SHARED_REQUIRES(Locks::mutator_lock_) {
+    CompilationUnitWriter writer(this);
+    writer.Write(types);
+  }
+
   void End() {
     builder_->GetDebugInfo()->End();
     builder_->WritePatches(".debug_info.oat_patches",
@@ -924,7 +1063,9 @@
   // Returns the number of bytes written.
   size_t WriteCompilationUnit(CompilationUnit& compilation_unit) {
     const bool is64bit = Is64BitInstructionSet(builder_->GetIsa());
-    const Elf_Addr text_address = builder_->GetText()->GetAddress();
+    const Elf_Addr text_address = builder_->GetText()->Exists()
+        ? builder_->GetText()->GetAddress()
+        : 0;
 
     compilation_unit.debug_line_offset_ = builder_->GetDebugLine()->GetSize();
 
@@ -1103,8 +1244,8 @@
 };
 
 template<typename ElfTypes>
-void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
-                        const ArrayRef<const MethodDebugInfo>& method_infos) {
+static void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
+                               const ArrayRef<const MethodDebugInfo>& method_infos) {
   // Group the methods into compilation units based on source file.
   std::vector<CompilationUnit> compilation_units;
   const char* last_source_file = nullptr;
@@ -1122,7 +1263,7 @@
   }
 
   // Write .debug_line section.
-  {
+  if (!compilation_units.empty()) {
     DebugLineWriter<ElfTypes> line_writer(builder);
     line_writer.Start();
     for (auto& compilation_unit : compilation_units) {
@@ -1132,7 +1273,7 @@
   }
 
   // Write .debug_info section.
-  {
+  if (!compilation_units.empty()) {
     DebugInfoWriter<ElfTypes> info_writer(builder);
     info_writer.Start();
     for (const auto& compilation_unit : compilation_units) {
@@ -1173,11 +1314,13 @@
       name += " [DEDUPED]";
     }
 
+    const auto* text = builder->GetText()->Exists() ? builder->GetText() : nullptr;
+    const bool is_relative = (text != nullptr);
     uint32_t low_pc = info.low_pc_;
     // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
     low_pc += info.compiled_method_->CodeDelta();
-    symtab->Add(strtab->Write(name), builder->GetText(), low_pc,
-                true, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC);
+    symtab->Add(strtab->Write(name), text, low_pc,
+                is_relative, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC);
 
     // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
     // instructions, so that disassembler tools can correctly disassemble.
@@ -1185,8 +1328,8 @@
     // requires it to match function symbol.  Just address 0 does not work.
     if (info.compiled_method_->GetInstructionSet() == kThumb2) {
       if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) {
-        symtab->Add(strtab->Write("$t"), builder->GetText(), info.low_pc_ & ~1,
-                    true, 0, STB_LOCAL, STT_NOTYPE);
+        symtab->Add(strtab->Write("$t"), text, info.low_pc_ & ~1,
+                    is_relative, 0, STB_LOCAL, STT_NOTYPE);
         generated_mapping_symbol = true;
       }
     }
@@ -1204,13 +1347,74 @@
 void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
                     const ArrayRef<const MethodDebugInfo>& method_infos,
                     CFIFormat cfi_format) {
-  if (!method_infos.empty()) {
-    // Add methods to .symtab.
-    WriteDebugSymbols(builder, method_infos);
-    // Generate CFI (stack unwinding information).
-    WriteCFISection(builder, method_infos, cfi_format);
-    // Write DWARF .debug_* sections.
-    WriteDebugSections(builder, method_infos);
+  // Add methods to .symtab.
+  WriteDebugSymbols(builder, method_infos);
+  // Generate CFI (stack unwinding information).
+  WriteCFISection(builder, method_infos, cfi_format);
+  // Write DWARF .debug_* sections.
+  WriteDebugSections(builder, method_infos);
+}
+
+template <typename ElfTypes>
+static ArrayRef<const uint8_t> WriteDebugElfFileForMethodInternal(
+    const dwarf::MethodDebugInfo& method_info) {
+  const InstructionSet isa = method_info.compiled_method_->GetInstructionSet();
+  std::vector<uint8_t> buffer;
+  buffer.reserve(KB);
+  VectorOutputStream out("Debug ELF file", &buffer);
+  std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out));
+  builder->Start();
+  WriteDebugInfo(builder.get(),
+                 ArrayRef<const MethodDebugInfo>(&method_info, 1),
+                 DW_DEBUG_FRAME_FORMAT);
+  builder->End();
+  CHECK(builder->Good());
+  // Make a copy of the buffer.  We want to shrink it anyway.
+  uint8_t* result = new uint8_t[buffer.size()];
+  CHECK(result != nullptr);
+  memcpy(result, buffer.data(), buffer.size());
+  return ArrayRef<const uint8_t>(result, buffer.size());
+}
+
+ArrayRef<const uint8_t> WriteDebugElfFileForMethod(const dwarf::MethodDebugInfo& method_info) {
+  const InstructionSet isa = method_info.compiled_method_->GetInstructionSet();
+  if (Is64BitInstructionSet(isa)) {
+    return WriteDebugElfFileForMethodInternal<ElfTypes64>(method_info);
+  } else {
+    return WriteDebugElfFileForMethodInternal<ElfTypes32>(method_info);
+  }
+}
+
+template <typename ElfTypes>
+static ArrayRef<const uint8_t> WriteDebugElfFileForClassesInternal(
+    const InstructionSet isa, const ArrayRef<mirror::Class*>& types)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  std::vector<uint8_t> buffer;
+  buffer.reserve(KB);
+  VectorOutputStream out("Debug ELF file", &buffer);
+  std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out));
+  builder->Start();
+
+  DebugInfoWriter<ElfTypes> info_writer(builder.get());
+  info_writer.Start();
+  info_writer.WriteTypes(types);
+  info_writer.End();
+
+  builder->End();
+  CHECK(builder->Good());
+  // Make a copy of the buffer.  We want to shrink it anyway.
+  uint8_t* result = new uint8_t[buffer.size()];
+  CHECK(result != nullptr);
+  memcpy(result, buffer.data(), buffer.size());
+  return ArrayRef<const uint8_t>(result, buffer.size());
+}
+
+ArrayRef<const uint8_t> WriteDebugElfFileForClasses(const InstructionSet isa,
+                                                    const ArrayRef<mirror::Class*>& types) {
+  if (Is64BitInstructionSet(isa)) {
+    return WriteDebugElfFileForClassesInternal<ElfTypes64>(isa, types);
+  } else {
+    return WriteDebugElfFileForClassesInternal<ElfTypes32>(isa, types);
   }
 }
 
diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h
index 7ec0be1..e4bc856 100644
--- a/compiler/elf_writer_debug.h
+++ b/compiler/elf_writer_debug.h
@@ -17,19 +17,30 @@
 #ifndef ART_COMPILER_ELF_WRITER_DEBUG_H_
 #define ART_COMPILER_ELF_WRITER_DEBUG_H_
 
-#include "elf_builder.h"
+#include "base/macros.h"
+#include "base/mutex.h"
 #include "dwarf/dwarf_constants.h"
-#include "oat_writer.h"
+#include "elf_builder.h"
 #include "utils/array_ref.h"
 
 namespace art {
+namespace mirror {
+class Class;
+}
 namespace dwarf {
+struct MethodDebugInfo;
 
 template <typename ElfTypes>
 void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
                     const ArrayRef<const MethodDebugInfo>& method_infos,
                     CFIFormat cfi_format);
 
+ArrayRef<const uint8_t> WriteDebugElfFileForMethod(const dwarf::MethodDebugInfo& method_info);
+
+ArrayRef<const uint8_t> WriteDebugElfFileForClasses(const InstructionSet isa,
+                                                    const ArrayRef<mirror::Class*>& types)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
 }  // namespace dwarf
 }  // namespace art
 
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 85216b7..3a3275a 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -22,11 +22,14 @@
 #include "base/stringpiece.h"
 #include "base/time_utils.h"
 #include "base/timing_logger.h"
+#include "base/unix_file/fd_file.h"
 #include "compiler_callbacks.h"
 #include "dex/pass_manager.h"
 #include "dex/quick_compiler_callbacks.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "elf_writer_debug.h"
+#include "jit/debugger_interface.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
 #include "oat_file-inl.h"
@@ -42,11 +45,12 @@
   return new JitCompiler();
 }
 
-extern "C" void* jit_load(CompilerCallbacks** callbacks) {
+extern "C" void* jit_load(CompilerCallbacks** callbacks, bool* generate_debug_info) {
   VLOG(jit) << "loading jit compiler";
   auto* const jit_compiler = JitCompiler::Create();
   CHECK(jit_compiler != nullptr);
   *callbacks = jit_compiler->GetCompilerCallbacks();
+  *generate_debug_info = jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo();
   VLOG(jit) << "Done loading jit compiler";
   return jit_compiler;
 }
@@ -63,6 +67,17 @@
   return jit_compiler->CompileMethod(self, method);
 }
 
+extern "C" void jit_types_loaded(void* handle, mirror::Class** types, size_t count)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  auto* jit_compiler = reinterpret_cast<JitCompiler*>(handle);
+  DCHECK(jit_compiler != nullptr);
+  if (jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo()) {
+    const ArrayRef<mirror::Class*> types_array(types, count);
+    ArrayRef<const uint8_t> elf_file = dwarf::WriteDebugElfFileForClasses(kRuntimeISA, types_array);
+    CreateJITCodeEntry(std::unique_ptr<const uint8_t[]>(elf_file.data()), elf_file.size());
+  }
+}
+
 // Callers of this method assume it has NO_RETURN.
 NO_RETURN static void Usage(const char* fmt, ...) {
   va_list ap;
@@ -95,7 +110,9 @@
       /* pic */ true,  // TODO: Support non-PIC in optimizing.
       /* verbose_methods */ nullptr,
       /* init_failure_output */ nullptr,
-      /* abort_on_hard_verifier_failure */ false));
+      /* abort_on_hard_verifier_failure */ false,
+      /* dump_cfg_file_name */ "",
+      /* dump_cfg_append */ false));
   for (const std::string& argument : Runtime::Current()->GetCompilerOptions()) {
     compiler_options_->ParseCompilerOption(argument, Usage);
   }
@@ -151,8 +168,6 @@
       /* thread_count */ 1,
       /* dump_stats */ false,
       /* dump_passes */ false,
-      /* dump_cfg_file_name */ "",
-      /* dump_cfg_append */ false,
       cumulative_logger_.get(),
       /* swap_fd */ -1,
       /* dex to oat map */ nullptr,
@@ -160,9 +175,28 @@
   // Disable dedupe so we can remove compiled methods.
   compiler_driver_->SetDedupeEnabled(false);
   compiler_driver_->SetSupportBootImageFixup(false);
+
+  if (compiler_options_->GetGenerateDebugInfo()) {
+#ifdef __ANDROID__
+    const char* prefix = GetAndroidData();
+#else
+    const char* prefix = "/tmp";
+#endif
+    DCHECK_EQ(compiler_driver_->GetThreadCount(), 1u)
+        << "Generating debug info only works with one compiler thread";
+    std::string perf_filename = std::string(prefix) + "/perf-" + std::to_string(getpid()) + ".map";
+    perf_file_.reset(OS::CreateEmptyFileWriteOnly(perf_filename.c_str()));
+    if (perf_file_ == nullptr) {
+      LOG(FATAL) << "Could not create perf file at " << perf_filename;
+    }
+  }
 }
 
 JitCompiler::~JitCompiler() {
+  if (perf_file_ != nullptr) {
+    UNUSED(perf_file_->Flush());
+    UNUSED(perf_file_->Close());
+  }
 }
 
 bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) {
@@ -188,6 +222,20 @@
     ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
     JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
     success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile);
+    if (success && compiler_options_->GetGenerateDebugInfo()) {
+      const void* ptr = method_to_compile->GetEntryPointFromQuickCompiledCode();
+      std::ostringstream stream;
+      stream << std::hex
+             << reinterpret_cast<uintptr_t>(ptr)
+             << " "
+             << code_cache->GetMemorySizeOfCodePointer(ptr)
+             << " "
+             << PrettyMethod(method_to_compile)
+             << std::endl;
+      std::string str = stream.str();
+      bool res = perf_file_->WriteFully(str.c_str(), str.size());
+      CHECK(res);
+    }
   }
 
   // Trim maps to reduce memory usage.
diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h
index 913a6d0..037a18a 100644
--- a/compiler/jit/jit_compiler.h
+++ b/compiler/jit/jit_compiler.h
@@ -43,6 +43,9 @@
   size_t GetTotalCompileTime() const {
     return total_time_;
   }
+  CompilerOptions* GetCompilerOptions() const {
+    return compiler_options_.get();
+  }
 
  private:
   uint64_t total_time_;
@@ -53,6 +56,7 @@
   std::unique_ptr<CompilerCallbacks> callbacks_;
   std::unique_ptr<CompilerDriver> compiler_driver_;
   std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
+  std::unique_ptr<File> perf_file_;
 
   JitCompiler();
 
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index b10cc35..bf8e786 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -47,7 +47,7 @@
         driver_(&compiler_options_, &verification_results_, &inliner_map_,
                 Compiler::kQuick, instruction_set, nullptr,
                 false, nullptr, nullptr, nullptr, 1u,
-                false, false, "", false, nullptr, -1, nullptr, nullptr),
+                false, false, nullptr, -1, nullptr, nullptr),
         error_msg_(),
         instruction_set_(instruction_set),
         features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)),
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 9f7ffa5..7a2b74e 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -117,8 +117,6 @@
                                               2,
                                               true,
                                               true,
-                                              "",
-                                              false,
                                               timer_.get(),
                                               -1,
                                               nullptr,
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index dc75ff1..eee6116 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1142,7 +1142,7 @@
           loop->IsDefinedOutOfTheLoop(array_get->InputAt(1))) {
         SideEffects loop_effects = side_effects_.GetLoopEffects(loop->GetHeader());
         if (!array_get->GetSideEffects().MayDependOn(loop_effects)) {
-          HoistToPreheaderOrDeoptBlock(loop, array_get);
+          HoistToPreHeaderOrDeoptBlock(loop, array_get);
         }
       }
     }
@@ -1280,7 +1280,8 @@
       // as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit
       // iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests
       // correctly guard against any possible OOB (including arithmetic wrap-around cases).
-      HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+      TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+      HBasicBlock* block = GetPreHeader(loop, instruction);
       induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper);
       if (lower != nullptr) {
         InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper));
@@ -1295,8 +1296,13 @@
    */
   bool DynamicBCESeemsProfitable(HLoopInformation* loop, HBasicBlock* block) {
     if (loop != nullptr) {
+      // The loop preheader of an irreducible loop does not dominate all the blocks in
+      // the loop. We would need to find the common dominator of all blocks in the loop.
+      if (loop->IsIrreducible()) {
+        return false;
+      }
       // A try boundary preheader is hard to handle.
-      // TODO: remove this restriction
+      // TODO: remove this restriction.
       if (loop->GetPreHeader()->GetLastInstruction()->IsTryBoundary()) {
         return false;
       }
@@ -1353,7 +1359,7 @@
       return true;
     } else if (length->IsArrayLength() && length->GetBlock()->GetLoopInformation() == loop) {
       if (CanHandleNullCheck(loop, length->InputAt(0), needs_taken_test)) {
-        HoistToPreheaderOrDeoptBlock(loop, length);
+        HoistToPreHeaderOrDeoptBlock(loop, length);
         return true;
       }
     }
@@ -1371,7 +1377,8 @@
       HInstruction* array = check->InputAt(0);
       if (loop->IsDefinedOutOfTheLoop(array)) {
         // Generate: if (array == null) deoptimize;
-        HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+        TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+        HBasicBlock* block = GetPreHeader(loop, check);
         HInstruction* cond =
             new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant());
         InsertDeopt(loop, block, cond);
@@ -1418,6 +1425,28 @@
     return true;
   }
 
+  /**
+   * Returns appropriate preheader for the loop, depending on whether the
+   * instruction appears in the loop header or proper loop-body.
+   */
+  HBasicBlock* GetPreHeader(HLoopInformation* loop, HInstruction* instruction) {
+    // Use preheader unless there is an earlier generated deoptimization block since
+    // hoisted expressions may depend on and/or used by the deoptimization tests.
+    HBasicBlock* header = loop->GetHeader();
+    const uint32_t loop_id = header->GetBlockId();
+    auto it = taken_test_loop_.find(loop_id);
+    if (it != taken_test_loop_.end()) {
+      HBasicBlock* block = it->second;
+      // If always taken, keep it that way by returning the original preheader,
+      // which can be found by following the predecessor of the true-block twice.
+      if (instruction->GetBlock() == header) {
+        return block->GetSinglePredecessor()->GetSinglePredecessor();
+      }
+      return block;
+    }
+    return loop->GetPreHeader();
+  }
+
   /** Inserts a deoptimization test. */
   void InsertDeopt(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
     HInstruction* suspend = loop->GetSuspendCheck();
@@ -1432,28 +1461,17 @@
   }
 
   /** Hoists instruction out of the loop to preheader or deoptimization block. */
-  void HoistToPreheaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) {
-    // Use preheader unless there is an earlier generated deoptimization block since
-    // hoisted expressions may depend on and/or used by the deoptimization tests.
-    const uint32_t loop_id = loop->GetHeader()->GetBlockId();
-    HBasicBlock* preheader = loop->GetPreHeader();
-    HBasicBlock* block = preheader;
-    auto it = taken_test_loop_.find(loop_id);
-    if (it != taken_test_loop_.end()) {
-      block = it->second;
-    }
-    // Hoist the instruction.
+  void HoistToPreHeaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) {
+    HBasicBlock* block = GetPreHeader(loop, instruction);
     DCHECK(!instruction->HasEnvironment());
     instruction->MoveBefore(block->GetLastInstruction());
   }
 
   /**
-   * Adds a new taken-test structure to a loop if needed (and not already done).
+   * Adds a new taken-test structure to a loop if needed and not already done.
    * The taken-test protects range analysis evaluation code to avoid any
    * deoptimization caused by incorrect trip-count evaluation in non-taken loops.
    *
-   * Returns block in which deoptimizations/invariants can be put.
-   *
    *          old_preheader
    *               |
    *            if_block          <- taken-test protects deoptimization block
@@ -1485,16 +1503,11 @@
    *     array[i] = 0;
    *   }
    */
-  HBasicBlock* TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) {
-    // Not needed (can use preheader), or already done (can reuse)?
+  void TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) {
+    // Not needed (can use preheader) or already done (can reuse)?
     const uint32_t loop_id = loop->GetHeader()->GetBlockId();
-    if (!needs_taken_test) {
-      return loop->GetPreHeader();
-    } else {
-      auto it = taken_test_loop_.find(loop_id);
-      if (it != taken_test_loop_.end()) {
-        return it->second;
-      }
+    if (!needs_taken_test || taken_test_loop_.find(loop_id) != taken_test_loop_.end()) {
+      return;
     }
 
     // Generate top test structure.
@@ -1523,7 +1536,6 @@
     if_block->AddInstruction(new (GetGraph()->GetArena()) HIf(condition));
 
     taken_test_loop_.Put(loop_id, true_block);
-    return true_block;
   }
 
   /**
@@ -1538,7 +1550,7 @@
    *            \       /
    *           x_1 = phi(x_0, null)   <- synthetic phi
    *               |
-   *             header
+   *          new_preheader
    */
   void InsertPhiNodes() {
     // Scan all new deoptimization blocks.
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index dbeb1cc..b7c24ff 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -42,7 +42,6 @@
 
   void RunBCE() {
     graph_->BuildDominatorTree();
-    graph_->AnalyzeNaturalLoops();
 
     InstructionSimplifier(graph_).Run();
 
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 1af6846..3721813 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -1271,44 +1271,14 @@
 
   // Add move-result for StringFactory method.
   uint32_t orig_this_reg = is_range ? register_index : args[0];
-  HInstruction* fake_string = LoadLocal(orig_this_reg, Primitive::kPrimNot, invoke->GetDexPc());
-  invoke->SetArgumentAt(argument_index, fake_string);
+  HInstruction* new_instance = LoadLocal(orig_this_reg, Primitive::kPrimNot, invoke->GetDexPc());
+  invoke->SetArgumentAt(argument_index, new_instance);
   current_block_->AddInstruction(invoke);
-  PotentiallySimplifyFakeString(orig_this_reg, invoke->GetDexPc(), invoke);
 
   latest_result_ = invoke;
-
   return true;
 }
 
-void HGraphBuilder::PotentiallySimplifyFakeString(uint16_t original_dex_register,
-                                                  uint32_t dex_pc,
-                                                  HInvoke* actual_string) {
-  if (!graph_->IsDebuggable()) {
-    // Notify that we cannot compile with baseline. The dex registers aliasing
-    // with `original_dex_register` will be handled when we optimize
-    // (see HInstructionSimplifer::VisitFakeString).
-    can_use_baseline_for_string_init_ = false;
-    return;
-  }
-  const VerifiedMethod* verified_method =
-      compiler_driver_->GetVerifiedMethod(dex_file_, dex_compilation_unit_->GetDexMethodIndex());
-  if (verified_method != nullptr) {
-    UpdateLocal(original_dex_register, actual_string, dex_pc);
-    const SafeMap<uint32_t, std::set<uint32_t>>& string_init_map =
-        verified_method->GetStringInitPcRegMap();
-    auto map_it = string_init_map.find(dex_pc);
-    if (map_it != string_init_map.end()) {
-      for (uint32_t reg : map_it->second) {
-        HInstruction* load_local = LoadLocal(original_dex_register, Primitive::kPrimNot, dex_pc);
-        UpdateLocal(reg, load_local, dex_pc);
-      }
-    }
-  } else {
-    can_use_baseline_for_string_init_ = false;
-  }
-}
-
 static Primitive::Type GetFieldAccessType(const DexFile& dex_file, uint16_t field_index) {
   const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
   const char* type = dex_file.GetFieldTypeDescriptor(field_id);
@@ -2698,18 +2668,10 @@
     }
 
     case Instruction::NEW_INSTANCE: {
-      uint16_t type_index = instruction.VRegB_21c();
-      if (compiler_driver_->IsStringTypeIndex(type_index, dex_file_)) {
-        int32_t register_index = instruction.VRegA();
-        HFakeString* fake_string = new (arena_) HFakeString(dex_pc);
-        current_block_->AddInstruction(fake_string);
-        UpdateLocal(register_index, fake_string, dex_pc);
-      } else {
-        if (!BuildNewInstance(type_index, dex_pc)) {
-          return false;
-        }
-        UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
+      if (!BuildNewInstance(instruction.VRegB_21c(), dex_pc)) {
+        return false;
       }
+      UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
       break;
     }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 26bf1cb..1d604e7 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -56,7 +56,6 @@
         return_type_(Primitive::GetType(dex_compilation_unit_->GetShorty()[0])),
         code_start_(nullptr),
         latest_result_(nullptr),
-        can_use_baseline_for_string_init_(true),
         compilation_stats_(compiler_stats),
         interpreter_metadata_(interpreter_metadata),
         dex_cache_(dex_cache) {}
@@ -77,7 +76,6 @@
         return_type_(return_type),
         code_start_(nullptr),
         latest_result_(nullptr),
-        can_use_baseline_for_string_init_(true),
         compilation_stats_(nullptr),
         interpreter_metadata_(nullptr),
         null_dex_cache_(),
@@ -85,10 +83,6 @@
 
   bool BuildGraph(const DexFile::CodeItem& code);
 
-  bool CanUseBaselineForStringInit() const {
-    return can_use_baseline_for_string_init_;
-  }
-
   static constexpr const char* kBuilderPassName = "builder";
 
   // The number of entries in a packed switch before we use a jump table or specified
@@ -363,11 +357,6 @@
   // used by move-result instructions.
   HInstruction* latest_result_;
 
-  // We need to know whether we have built a graph that has calls to StringFactory
-  // and hasn't gone through the verifier. If the following flag is `false`, then
-  // we cannot compile with baseline.
-  bool can_use_baseline_for_string_init_;
-
   OptimizingCompilerStats* compilation_stats_;
 
   const uint8_t* interpreter_metadata_;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 53d3615..a3bbfdb 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -142,23 +142,6 @@
   return pointer_size * index;
 }
 
-void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
-  Initialize();
-  if (!is_leaf) {
-    MarkNotLeaf();
-  }
-  const bool is_64_bit = Is64BitInstructionSet(GetInstructionSet());
-  InitializeCodeGeneration(GetGraph()->GetNumberOfLocalVRegs()
-                             + GetGraph()->GetTemporariesVRegSlots()
-                             + 1 /* filler */,
-                           0, /* the baseline compiler does not have live registers at slow path */
-                           0, /* the baseline compiler does not have live registers at slow path */
-                           GetGraph()->GetMaximumNumberOfOutVRegs()
-                             + (is_64_bit ? 2 : 1) /* current method */,
-                           GetGraph()->GetBlocks());
-  CompileInternal(allocator, /* is_baseline */ true);
-}
-
 bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const {
   DCHECK_EQ((*block_order_)[current_block_index_], current);
   return GetNextBlockToEmit() == FirstNonEmptyBlock(next);
@@ -220,8 +203,12 @@
   current_slow_path_ = nullptr;
 }
 
-void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) {
-  is_baseline_ = is_baseline;
+void CodeGenerator::Compile(CodeAllocator* allocator) {
+  // The register allocator already called `InitializeCodeGeneration`,
+  // where the frame size has been computed.
+  DCHECK(block_order_ != nullptr);
+  Initialize();
+
   HGraphVisitor* instruction_visitor = GetInstructionVisitor();
   DCHECK_EQ(current_block_index_, 0u);
 
@@ -242,9 +229,6 @@
     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* current = it.Current();
       DisassemblyScope disassembly_scope(current, *this);
-      if (is_baseline) {
-        InitLocationsBaseline(current);
-      }
       DCHECK(CheckTypeConsistency(current));
       current->Accept(instruction_visitor);
     }
@@ -254,7 +238,7 @@
 
   // Emit catch stack maps at the end of the stack map stream as expected by the
   // runtime exception handler.
-  if (!is_baseline && graph_->HasTryCatch()) {
+  if (graph_->HasTryCatch()) {
     RecordCatchBlockInfo();
   }
 
@@ -262,14 +246,6 @@
   Finalize(allocator);
 }
 
-void CodeGenerator::CompileOptimized(CodeAllocator* allocator) {
-  // The register allocator already called `InitializeCodeGeneration`,
-  // where the frame size has been computed.
-  DCHECK(block_order_ != nullptr);
-  Initialize();
-  CompileInternal(allocator, /* is_baseline */ false);
-}
-
 void CodeGenerator::Finalize(CodeAllocator* allocator) {
   size_t code_size = GetAssembler()->CodeSize();
   uint8_t* buffer = allocator->Allocate(code_size);
@@ -282,29 +258,6 @@
   // No linker patches by default.
 }
 
-size_t CodeGenerator::FindFreeEntry(bool* array, size_t length) {
-  for (size_t i = 0; i < length; ++i) {
-    if (!array[i]) {
-      array[i] = true;
-      return i;
-    }
-  }
-  LOG(FATAL) << "Could not find a register in baseline register allocator";
-  UNREACHABLE();
-}
-
-size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length) {
-  for (size_t i = 0; i < length - 1; i += 2) {
-    if (!array[i] && !array[i + 1]) {
-      array[i] = true;
-      array[i + 1] = true;
-      return i;
-    }
-  }
-  LOG(FATAL) << "Could not find a register in baseline register allocator";
-  UNREACHABLE();
-}
-
 void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
                                              size_t maximum_number_of_live_core_registers,
                                              size_t maximum_number_of_live_fpu_registers,
@@ -592,123 +545,6 @@
   }
 }
 
-void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const {
-  LocationSummary* locations = instruction->GetLocations();
-  if (locations == nullptr) return;
-
-  for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) {
-    blocked_core_registers_[i] = false;
-  }
-
-  for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) {
-    blocked_fpu_registers_[i] = false;
-  }
-
-  for (size_t i = 0, e = number_of_register_pairs_; i < e; ++i) {
-    blocked_register_pairs_[i] = false;
-  }
-
-  // Mark all fixed input, temp and output registers as used.
-  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
-    BlockIfInRegister(locations->InAt(i));
-  }
-
-  for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) {
-    Location loc = locations->GetTemp(i);
-    BlockIfInRegister(loc);
-  }
-  Location result_location = locations->Out();
-  if (locations->OutputCanOverlapWithInputs()) {
-    BlockIfInRegister(result_location, /* is_out */ true);
-  }
-
-  SetupBlockedRegisters(/* is_baseline */ true);
-
-  // Allocate all unallocated input locations.
-  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
-    Location loc = locations->InAt(i);
-    HInstruction* input = instruction->InputAt(i);
-    if (loc.IsUnallocated()) {
-      if ((loc.GetPolicy() == Location::kRequiresRegister)
-          || (loc.GetPolicy() == Location::kRequiresFpuRegister)) {
-        loc = AllocateFreeRegister(input->GetType());
-      } else {
-        DCHECK_EQ(loc.GetPolicy(), Location::kAny);
-        HLoadLocal* load = input->AsLoadLocal();
-        if (load != nullptr) {
-          loc = GetStackLocation(load);
-        } else {
-          loc = AllocateFreeRegister(input->GetType());
-        }
-      }
-      locations->SetInAt(i, loc);
-    }
-  }
-
-  // Allocate all unallocated temp locations.
-  for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) {
-    Location loc = locations->GetTemp(i);
-    if (loc.IsUnallocated()) {
-      switch (loc.GetPolicy()) {
-        case Location::kRequiresRegister:
-          // Allocate a core register (large enough to fit a 32-bit integer).
-          loc = AllocateFreeRegister(Primitive::kPrimInt);
-          break;
-
-        case Location::kRequiresFpuRegister:
-          // Allocate a core register (large enough to fit a 64-bit double).
-          loc = AllocateFreeRegister(Primitive::kPrimDouble);
-          break;
-
-        default:
-          LOG(FATAL) << "Unexpected policy for temporary location "
-                     << loc.GetPolicy();
-      }
-      locations->SetTempAt(i, loc);
-    }
-  }
-  if (result_location.IsUnallocated()) {
-    switch (result_location.GetPolicy()) {
-      case Location::kAny:
-      case Location::kRequiresRegister:
-      case Location::kRequiresFpuRegister:
-        result_location = AllocateFreeRegister(instruction->GetType());
-        break;
-      case Location::kSameAsFirstInput:
-        result_location = locations->InAt(0);
-        break;
-    }
-    locations->UpdateOut(result_location);
-  }
-}
-
-void CodeGenerator::InitLocationsBaseline(HInstruction* instruction) {
-  AllocateLocations(instruction);
-  if (instruction->GetLocations() == nullptr) {
-    if (instruction->IsTemporary()) {
-      HInstruction* previous = instruction->GetPrevious();
-      Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-      Move(previous, temp_location, instruction);
-    }
-    return;
-  }
-  AllocateRegistersLocally(instruction);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
-    Location location = instruction->GetLocations()->InAt(i);
-    HInstruction* input = instruction->InputAt(i);
-    if (location.IsValid()) {
-      // Move the input to the desired location.
-      if (input->GetNext()->IsTemporary()) {
-        // If the input was stored in a temporary, use that temporary to
-        // perform the move.
-        Move(input->GetNext(), location, instruction);
-      } else {
-        Move(input, location, instruction);
-      }
-    }
-  }
-}
-
 void CodeGenerator::AllocateLocations(HInstruction* instruction) {
   instruction->Accept(GetLocationBuilder());
   DCHECK(CheckTypeConsistency(instruction));
@@ -789,132 +625,6 @@
   }
 }
 
-void CodeGenerator::BuildNativeGCMap(
-    ArenaVector<uint8_t>* data, const CompilerDriver& compiler_driver) const {
-  const std::vector<uint8_t>& gc_map_raw =
-      compiler_driver.GetVerifiedMethod(&GetGraph()->GetDexFile(), GetGraph()->GetMethodIdx())
-          ->GetDexGcMap();
-  verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]);
-
-  uint32_t max_native_offset = stack_map_stream_.ComputeMaxNativePcOffset();
-
-  size_t num_stack_maps = stack_map_stream_.GetNumberOfStackMaps();
-  GcMapBuilder builder(data, num_stack_maps, max_native_offset, dex_gc_map.RegWidth());
-  for (size_t i = 0; i != num_stack_maps; ++i) {
-    const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
-    uint32_t native_offset = stack_map_entry.native_pc_offset;
-    uint32_t dex_pc = stack_map_entry.dex_pc;
-    const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false);
-    CHECK(references != nullptr) << "Missing ref for dex pc 0x" << std::hex << dex_pc;
-    builder.AddEntry(native_offset, references);
-  }
-}
-
-void CodeGenerator::BuildMappingTable(ArenaVector<uint8_t>* data) const {
-  uint32_t pc2dex_data_size = 0u;
-  uint32_t pc2dex_entries = stack_map_stream_.GetNumberOfStackMaps();
-  uint32_t pc2dex_offset = 0u;
-  int32_t pc2dex_dalvik_offset = 0;
-  uint32_t dex2pc_data_size = 0u;
-  uint32_t dex2pc_entries = 0u;
-  uint32_t dex2pc_offset = 0u;
-  int32_t dex2pc_dalvik_offset = 0;
-
-  for (size_t i = 0; i < pc2dex_entries; i++) {
-    const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
-    pc2dex_data_size += UnsignedLeb128Size(stack_map_entry.native_pc_offset - pc2dex_offset);
-    pc2dex_data_size += SignedLeb128Size(stack_map_entry.dex_pc - pc2dex_dalvik_offset);
-    pc2dex_offset = stack_map_entry.native_pc_offset;
-    pc2dex_dalvik_offset = stack_map_entry.dex_pc;
-  }
-
-  // Walk over the blocks and find which ones correspond to catch block entries.
-  for (HBasicBlock* block : graph_->GetBlocks()) {
-    if (block->IsCatchBlock()) {
-      intptr_t native_pc = GetAddressOf(block);
-      ++dex2pc_entries;
-      dex2pc_data_size += UnsignedLeb128Size(native_pc - dex2pc_offset);
-      dex2pc_data_size += SignedLeb128Size(block->GetDexPc() - dex2pc_dalvik_offset);
-      dex2pc_offset = native_pc;
-      dex2pc_dalvik_offset = block->GetDexPc();
-    }
-  }
-
-  uint32_t total_entries = pc2dex_entries + dex2pc_entries;
-  uint32_t hdr_data_size = UnsignedLeb128Size(total_entries) + UnsignedLeb128Size(pc2dex_entries);
-  uint32_t data_size = hdr_data_size + pc2dex_data_size + dex2pc_data_size;
-  data->resize(data_size);
-
-  uint8_t* data_ptr = &(*data)[0];
-  uint8_t* write_pos = data_ptr;
-
-  write_pos = EncodeUnsignedLeb128(write_pos, total_entries);
-  write_pos = EncodeUnsignedLeb128(write_pos, pc2dex_entries);
-  DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size);
-  uint8_t* write_pos2 = write_pos + pc2dex_data_size;
-
-  pc2dex_offset = 0u;
-  pc2dex_dalvik_offset = 0u;
-  dex2pc_offset = 0u;
-  dex2pc_dalvik_offset = 0u;
-
-  for (size_t i = 0; i < pc2dex_entries; i++) {
-    const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
-    DCHECK(pc2dex_offset <= stack_map_entry.native_pc_offset);
-    write_pos = EncodeUnsignedLeb128(write_pos, stack_map_entry.native_pc_offset - pc2dex_offset);
-    write_pos = EncodeSignedLeb128(write_pos, stack_map_entry.dex_pc - pc2dex_dalvik_offset);
-    pc2dex_offset = stack_map_entry.native_pc_offset;
-    pc2dex_dalvik_offset = stack_map_entry.dex_pc;
-  }
-
-  for (HBasicBlock* block : graph_->GetBlocks()) {
-    if (block->IsCatchBlock()) {
-      intptr_t native_pc = GetAddressOf(block);
-      write_pos2 = EncodeUnsignedLeb128(write_pos2, native_pc - dex2pc_offset);
-      write_pos2 = EncodeSignedLeb128(write_pos2, block->GetDexPc() - dex2pc_dalvik_offset);
-      dex2pc_offset = native_pc;
-      dex2pc_dalvik_offset = block->GetDexPc();
-    }
-  }
-
-
-  DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size + pc2dex_data_size);
-  DCHECK_EQ(static_cast<size_t>(write_pos2 - data_ptr), data_size);
-
-  if (kIsDebugBuild) {
-    // Verify the encoded table holds the expected data.
-    MappingTable table(data_ptr);
-    CHECK_EQ(table.TotalSize(), total_entries);
-    CHECK_EQ(table.PcToDexSize(), pc2dex_entries);
-    auto it = table.PcToDexBegin();
-    auto it2 = table.DexToPcBegin();
-    for (size_t i = 0; i < pc2dex_entries; i++) {
-      const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
-      CHECK_EQ(stack_map_entry.native_pc_offset, it.NativePcOffset());
-      CHECK_EQ(stack_map_entry.dex_pc, it.DexPc());
-      ++it;
-    }
-    for (HBasicBlock* block : graph_->GetBlocks()) {
-      if (block->IsCatchBlock()) {
-        CHECK_EQ(GetAddressOf(block), it2.NativePcOffset());
-        CHECK_EQ(block->GetDexPc(), it2.DexPc());
-        ++it2;
-      }
-    }
-    CHECK(it == table.PcToDexEnd());
-    CHECK(it2 == table.DexToPcEnd());
-  }
-}
-
-void CodeGenerator::BuildVMapTable(ArenaVector<uint8_t>* data) const {
-  Leb128Encoder<ArenaVector<uint8_t>> vmap_encoder(data);
-  // We currently don't use callee-saved registers.
-  size_t size = 0 + 1 /* marker */ + 0;
-  vmap_encoder.Reserve(size + 1u);  // All values are likely to be one byte in ULEB128 (<128).
-  vmap_encoder.PushBackUnsigned(size);
-  vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker);
-}
-
 size_t CodeGenerator::ComputeStackMapsSize() {
   return stack_map_stream_.PrepareForFillIn();
 }
@@ -997,6 +707,12 @@
   stack_map_stream_.EndStackMapEntry();
 }
 
+bool CodeGenerator::HasStackMapAtCurrentPc() {
+  uint32_t pc = GetAssembler()->CodeSize();
+  size_t count = stack_map_stream_.GetNumberOfStackMaps();
+  return count > 0 && stack_map_stream_.GetStackMap(count - 1).native_pc_offset == pc;
+}
+
 void CodeGenerator::RecordCatchBlockInfo() {
   ArenaAllocator* arena = graph_->GetArena();
 
@@ -1320,12 +1036,6 @@
         << "instruction->DebugName()=" << instruction->DebugName()
         << " slow_path->GetDescription()=" << slow_path->GetDescription();
     DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) ||
-           // Control flow would not come back into the code if a fatal slow
-           // path is taken, so we do not care if it triggers GC.
-           slow_path->IsFatal() ||
-           // HDeoptimize is a special case: we know we are not coming back from
-           // it into the code.
-           instruction->IsDeoptimize() ||
            // When read barriers are enabled, some instructions use a
            // slow path to emit a read barrier, which does not trigger
            // GC, is not fatal, nor is emitted by HDeoptimize
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index eade05d..4f8f146 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -158,10 +158,8 @@
 
 class CodeGenerator {
  public:
-  // Compiles the graph to executable instructions. Returns whether the compilation
-  // succeeded.
-  void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false);
-  void CompileOptimized(CodeAllocator* allocator);
+  // Compiles the graph to executable instructions.
+  void Compile(CodeAllocator* allocator);
   static CodeGenerator* Create(HGraph* graph,
                                InstructionSet instruction_set,
                                const InstructionSetFeatures& isa_features,
@@ -214,7 +212,7 @@
 
   size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
   size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
-  virtual void SetupBlockedRegisters(bool is_baseline) const = 0;
+  virtual void SetupBlockedRegisters() const = 0;
 
   virtual void ComputeSpillMask() {
     core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
@@ -269,6 +267,8 @@
 
   // Record native to dex mapping for a suspend point.  Required by runtime.
   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
+  // Check whether we have already recorded mapping at this PC.
+  bool HasStackMapAtCurrentPc();
 
   bool CanMoveNullCheckToUser(HNullCheck* null_check);
   void MaybeRecordImplicitNullCheck(HInstruction* instruction);
@@ -288,17 +288,9 @@
     slow_paths_.push_back(slow_path);
   }
 
-  void BuildMappingTable(ArenaVector<uint8_t>* vector) const;
-  void BuildVMapTable(ArenaVector<uint8_t>* vector) const;
-  void BuildNativeGCMap(
-      ArenaVector<uint8_t>* vector, const CompilerDriver& compiler_driver) const;
   void BuildStackMaps(MemoryRegion region);
   size_t ComputeStackMapsSize();
 
-  bool IsBaseline() const {
-    return is_baseline_;
-  }
-
   bool IsLeafMethod() const {
     return is_leaf_;
   }
@@ -487,7 +479,6 @@
         fpu_callee_save_mask_(fpu_callee_save_mask),
         stack_map_stream_(graph->GetArena()),
         block_order_(nullptr),
-        is_baseline_(false),
         disasm_info_(nullptr),
         stats_(stats),
         graph_(graph),
@@ -500,15 +491,6 @@
     slow_paths_.reserve(8);
   }
 
-  // Register allocation logic.
-  void AllocateRegistersLocally(HInstruction* instruction) const;
-
-  // Backend specific implementation for allocating a register.
-  virtual Location AllocateFreeRegister(Primitive::Type type) const = 0;
-
-  static size_t FindFreeEntry(bool* array, size_t length);
-  static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length);
-
   virtual Location GetStackLocation(HLoadLocal* load) const = 0;
 
   virtual HGraphVisitor* GetLocationBuilder() = 0;
@@ -591,16 +573,11 @@
   // The order to use for code generation.
   const ArenaVector<HBasicBlock*>* block_order_;
 
-  // Whether we are using baseline.
-  bool is_baseline_;
-
   DisassemblyInformation* disasm_info_;
 
  private:
-  void InitLocationsBaseline(HInstruction* instruction);
   size_t GetStackOffsetOfSavedRegister(size_t index);
   void GenerateSlowPaths();
-  void CompileInternal(CodeAllocator* allocator, bool is_baseline);
   void BlockIfInRegister(Location location, bool is_out = false) const;
   void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
 
@@ -611,7 +588,7 @@
 
   ArenaVector<SlowPathCode*> slow_paths_;
 
-  // The current slow path that we're generating code for.
+  // The current slow-path that we're generating code for.
   SlowPathCode* current_slow_path_;
 
   // The current block index in `block_order_` of the block
@@ -672,6 +649,122 @@
   DISALLOW_COPY_AND_ASSIGN(CallingConvention);
 };
 
+/**
+ * A templated class SlowPathGenerator with a templated method NewSlowPath()
+ * that can be used by any code generator to share equivalent slow-paths with
+ * the objective of reducing generated code size.
+ *
+ * InstructionType:  instruction that requires SlowPathCodeType
+ * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *)
+ */
+template <typename InstructionType>
+class SlowPathGenerator {
+  static_assert(std::is_base_of<HInstruction, InstructionType>::value,
+                "InstructionType is not a subclass of art::HInstruction");
+
+ public:
+  SlowPathGenerator(HGraph* graph, CodeGenerator* codegen)
+      : graph_(graph),
+        codegen_(codegen),
+        slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {}
+
+  // Creates and adds a new slow-path, if needed, or returns existing one otherwise.
+  // Templating the method (rather than the whole class) on the slow-path type enables
+  // keeping this code at a generic, non architecture-specific place.
+  //
+  // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType.
+  //       To relax this requirement, we would need some RTTI on the stored slow-paths,
+  //       or template the class as a whole on SlowPathType.
+  template <typename SlowPathCodeType>
+  SlowPathCodeType* NewSlowPath(InstructionType* instruction) {
+    static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value,
+                  "SlowPathCodeType is not a subclass of art::SlowPathCode");
+    static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value,
+                  "SlowPathCodeType is not constructible from InstructionType*");
+    // Iterate over potential candidates for sharing. Currently, only same-typed
+    // slow-paths with exactly the same dex-pc are viable candidates.
+    // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing?
+    const uint32_t dex_pc = instruction->GetDexPc();
+    auto iter = slow_path_map_.find(dex_pc);
+    if (iter != slow_path_map_.end()) {
+      auto candidates = iter->second;
+      for (const auto& it : candidates) {
+        InstructionType* other_instruction = it.first;
+        SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
+        // Determine if the instructions allow for slow-path sharing.
+        if (HaveSameLiveRegisters(instruction, other_instruction) &&
+            HaveSameStackMap(instruction, other_instruction)) {
+          // Can share: reuse existing one.
+          return other_slow_path;
+        }
+      }
+    } else {
+      // First time this dex-pc is seen.
+      iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}});
+    }
+    // Cannot share: create and add new slow-path for this particular dex-pc.
+    SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction);
+    iter->second.emplace_back(std::make_pair(instruction, slow_path));
+    codegen_->AddSlowPath(slow_path);
+    return slow_path;
+  }
+
+ private:
+  // Tests if both instructions have same set of live physical registers. This ensures
+  // the slow-path has exactly the same preamble on saving these registers to stack.
+  bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const {
+    const uint32_t core_spill = ~codegen_->GetCoreSpillMask();
+    const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask();
+    RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters();
+    RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters();
+    return (((live1->GetCoreRegisters() & core_spill) ==
+             (live2->GetCoreRegisters() & core_spill)) &&
+            ((live1->GetFloatingPointRegisters() & fpu_spill) ==
+             (live2->GetFloatingPointRegisters() & fpu_spill)));
+  }
+
+  // Tests if both instructions have the same stack map. This ensures the interpreter
+  // will find exactly the same dex-registers at the same entries.
+  bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const {
+    DCHECK(i1->HasEnvironment());
+    DCHECK(i2->HasEnvironment());
+    // We conservatively test if the two instructions find exactly the same instructions
+    // and location in each dex-register. This guarantees they will have the same stack map.
+    HEnvironment* e1 = i1->GetEnvironment();
+    HEnvironment* e2 = i2->GetEnvironment();
+    if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) {
+      return false;
+    }
+    for (size_t i = 0, sz = e1->Size(); i < sz; ++i) {
+      if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) ||
+          !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  HGraph* const graph_;
+  CodeGenerator* const codegen_;
+
+  // Map from dex-pc to vector of already existing instruction/slow-path pairs.
+  ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_;
+
+  DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator);
+};
+
+class InstructionCodeGenerator : public HGraphVisitor {
+ public:
+  InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen)
+      : HGraphVisitor(graph),
+        deopt_slow_paths_(graph, codegen) {}
+
+ protected:
+  // Add slow-path generator for each instruction/slow-path combination that desires sharing.
+  // TODO: under current regime, only deopt sharing make sense; extend later.
+  SlowPathGenerator<HDeoptimize> deopt_slow_paths_;
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index b32a81a..2725792 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -47,9 +47,7 @@
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = R0;
 
-// We unconditionally allocate R5 to ensure we can do long operations
-// with baseline.
-static constexpr Register kCoreSavedRegisterForBaseline = R5;
+static constexpr Register kCoreAlwaysSpillRegister = R5;
 static constexpr Register kCoreCalleeSaves[] =
     { R5, R6, R7, R8, R10, R11, LR };
 static constexpr SRegister kFpuCalleeSaves[] =
@@ -350,24 +348,24 @@
 
 class DeoptimizationSlowPathARM : public SlowPathCode {
  public:
-  explicit DeoptimizationSlowPathARM(HInstruction* instruction)
+  explicit DeoptimizationSlowPathARM(HDeoptimize* instruction)
     : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
-    DCHECK(instruction_->IsDeoptimize());
-    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
-    uint32_t dex_pc = deoptimize->GetDexPc();
-    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
-    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
     CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; }
 
  private:
-  HInstruction* const instruction_;
+  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM);
 };
 
@@ -728,6 +726,24 @@
   UNREACHABLE();
 }
 
+inline Condition ARMFPCondition(IfCondition cond, bool gt_bias) {
+  // The ARM condition codes can express all the necessary branches, see the
+  // "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual.
+  // There is no dex instruction or HIR that would need the missing conditions
+  // "equal or unordered" or "not equal".
+  switch (cond) {
+    case kCondEQ: return EQ;
+    case kCondNE: return NE /* unordered */;
+    case kCondLT: return gt_bias ? CC : LT /* unordered */;
+    case kCondLE: return gt_bias ? LS : LE /* unordered */;
+    case kCondGT: return gt_bias ? HI /* unordered */ : GT;
+    case kCondGE: return gt_bias ? CS /* unordered */ : GE;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+  }
+}
+
 void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << Register(reg);
 }
@@ -815,58 +831,7 @@
   CodeGenerator::Finalize(allocator);
 }
 
-Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const {
-  switch (type) {
-    case Primitive::kPrimLong: {
-      size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
-      ArmManagedRegister pair =
-          ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
-
-      blocked_core_registers_[pair.AsRegisterPairLow()] = true;
-      blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
-      UpdateBlockedPairRegisters();
-      return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
-    }
-
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      int reg = FindFreeEntry(blocked_core_registers_, kNumberOfCoreRegisters);
-      // Block all register pairs that contain `reg`.
-      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
-        ArmManagedRegister current =
-            ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
-        if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
-          blocked_register_pairs_[i] = true;
-        }
-      }
-      return Location::RegisterLocation(reg);
-    }
-
-    case Primitive::kPrimFloat: {
-      int reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfSRegisters);
-      return Location::FpuRegisterLocation(reg);
-    }
-
-    case Primitive::kPrimDouble: {
-      int reg = FindTwoFreeConsecutiveAlignedEntries(blocked_fpu_registers_, kNumberOfSRegisters);
-      DCHECK_EQ(reg % 2, 0);
-      return Location::FpuRegisterPairLocation(reg, reg + 1);
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  return Location::NoLocation();
-}
-
-void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorARM::SetupBlockedRegisters() const {
   // Don't allocate the dalvik style register pair passing.
   blocked_register_pairs_[R1_R2] = true;
 
@@ -881,15 +846,7 @@
   // Reserve temp register.
   blocked_core_registers_[IP] = true;
 
-  if (is_baseline) {
-    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-      blocked_core_registers_[kCoreCalleeSaves[i]] = true;
-    }
-
-    blocked_core_registers_[kCoreSavedRegisterForBaseline] = false;
-  }
-
-  if (is_baseline || GetGraph()->IsDebuggable()) {
+  if (GetGraph()->IsDebuggable()) {
     // Stubs do not save callee-save floating point registers. If the graph
     // is debuggable, we need to deal with these registers differently. For
     // now, just block them.
@@ -913,17 +870,16 @@
 }
 
 InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
 void CodeGeneratorARM::ComputeSpillMask() {
   core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
-  // Save one extra register for baseline. Note that on thumb2, there is no easy
-  // instruction to restore just the PC, so this actually helps both baseline
-  // and non-baseline to save and restore at least two registers at entry and exit.
-  core_spill_mask_ |= (1 << kCoreSavedRegisterForBaseline);
   DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
+  // There is no easy instruction to restore just the PC on thumb2. We spill and
+  // restore another arbitrary register.
+  core_spill_mask_ |= (1 << kCoreAlwaysSpillRegister);
   fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
   // We use vpush and vpop for saving and restoring floating point registers, which take
   // a SRegister and the number of registers to save/restore after that SRegister. We
@@ -1414,28 +1370,11 @@
 void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
-void InstructionCodeGeneratorARM::GenerateCompareWithImmediate(Register left, int32_t right) {
-  ShifterOperand operand;
-  if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, right, &operand)) {
-    __ cmp(left, operand);
-  } else {
-    Register temp = IP;
-    __ LoadImmediate(temp, right);
-    __ cmp(left, ShifterOperand(temp));
-  }
-}
-
 void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond,
                                                   Label* true_label,
-                                                  Label* false_label) {
+                                                  Label* false_label ATTRIBUTE_UNUSED) {
   __ vmstat();  // transfer FP status register to ARM APSR.
-  // TODO: merge into a single branch (except "equal or unordered" and "not equal")
-  if (cond->IsFPConditionTrueIfNaN()) {
-    __ b(true_label, VS);  // VS for unordered.
-  } else if (cond->IsFPConditionFalseIfNaN()) {
-    __ b(false_label, VS);  // VS for unordered.
-  }
-  __ b(true_label, ARMCondition(cond->GetCondition()));
+  __ b(true_label, ARMFPCondition(cond->GetCondition(), cond->IsGtBias()));
 }
 
 void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
@@ -1490,7 +1429,7 @@
     int32_t val_low = Low32Bits(value);
     int32_t val_high = High32Bits(value);
 
-    GenerateCompareWithImmediate(left_high, val_high);
+    __ CmpConstant(left_high, val_high);
     if (if_cond == kCondNE) {
       __ b(true_label, ARMCondition(true_high_cond));
     } else if (if_cond == kCondEQ) {
@@ -1500,7 +1439,7 @@
       __ b(false_label, ARMCondition(false_high_cond));
     }
     // Must be equal high, so compare the lows.
-    GenerateCompareWithImmediate(left_low, val_low);
+    __ CmpConstant(left_low, val_low);
   } else {
     Register right_high = right.AsRegisterPairHigh<Register>();
     Register right_low = right.AsRegisterPairLow<Register>();
@@ -1624,7 +1563,7 @@
       __ cmp(left, ShifterOperand(right.AsRegister<Register>()));
     } else {
       DCHECK(right.IsConstant());
-      GenerateCompareWithImmediate(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+      __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
     }
     if (true_target == nullptr) {
       __ b(false_target, ARMCondition(condition->GetOppositeCondition()));
@@ -1666,8 +1605,7 @@
 }
 
 void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCode* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathARM(deoptimize);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM>(deoptimize);
   GenerateTestAndBranch(deoptimize,
                         /* condition_input_index */ 0,
                         slow_path->GetEntryLabel(),
@@ -1679,6 +1617,10 @@
 }
 
 void InstructionCodeGeneratorARM::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  if (codegen_->HasStackMapAtCurrentPc()) {
+    // Ensure that we do not collide with the stack map of the previous instruction.
+    __ nop();
+  }
   codegen_->RecordPcInfo(info, info->GetDexPc());
 }
 
@@ -1731,8 +1673,8 @@
         __ cmp(left.AsRegister<Register>(), ShifterOperand(right.AsRegister<Register>()));
       } else {
         DCHECK(right.IsConstant());
-        GenerateCompareWithImmediate(left.AsRegister<Register>(),
-                                     CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+        __ CmpConstant(left.AsRegister<Register>(),
+                       CodeGenerator::GetInt32ValueOf(right.GetConstant()));
       }
       __ it(ARMCondition(cond->GetCondition()), kItElse);
       __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(1),
@@ -1980,9 +1922,9 @@
 }
 
 void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
                                          codegen_->GetAssembler(),
@@ -2012,9 +1954,9 @@
 }
 
 void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -2902,8 +2844,7 @@
   Register dividend = locations->InAt(0).AsRegister<Register>();
   Register temp = locations->GetTemp(0).AsRegister<Register>();
   int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
-  uint32_t abs_imm = static_cast<uint32_t>(std::abs(imm));
-  DCHECK(IsPowerOfTwo(abs_imm));
+  uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
   int ctz_imm = CTZ(abs_imm);
 
   if (ctz_imm == 1) {
@@ -2979,7 +2920,7 @@
     // Do not generate anything. DivZeroCheck would prevent any code to be executed.
   } else if (imm == 1 || imm == -1) {
     DivRemOneOrMinusOne(instruction);
-  } else if (IsPowerOfTwo(std::abs(imm))) {
+  } else if (IsPowerOfTwo(AbsOrMin(imm))) {
     DivRemByPowerOfTwo(instruction);
   } else {
     DCHECK(imm <= -2 || imm >= 2);
@@ -3008,12 +2949,12 @@
         locations->SetInAt(0, Location::RequiresRegister());
         locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant()));
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-        int32_t abs_imm = std::abs(div->InputAt(1)->AsIntConstant()->GetValue());
-        if (abs_imm <= 1) {
+        int32_t value = div->InputAt(1)->AsIntConstant()->GetValue();
+        if (value == 1 || value == 0 || value == -1) {
           // No temp register required.
         } else {
           locations->AddTemp(Location::RequiresRegister());
-          if (!IsPowerOfTwo(abs_imm)) {
+          if (!IsPowerOfTwo(AbsOrMin(value))) {
             locations->AddTemp(Location::RequiresRegister());
           }
         }
@@ -3134,12 +3075,12 @@
         locations->SetInAt(0, Location::RequiresRegister());
         locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant()));
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-        int32_t abs_imm = std::abs(rem->InputAt(1)->AsIntConstant()->GetValue());
-        if (abs_imm <= 1) {
+        int32_t value = rem->InputAt(1)->AsIntConstant()->GetValue();
+        if (value == 1 || value == 0 || value == -1) {
           // No temp register required.
         } else {
           locations->AddTemp(Location::RequiresRegister());
-          if (!IsPowerOfTwo(abs_imm)) {
+          if (!IsPowerOfTwo(AbsOrMin(value))) {
             locations->AddTemp(Location::RequiresRegister());
           }
         }
@@ -3660,20 +3601,34 @@
 void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
-  InvokeRuntimeCallingConvention calling_convention;
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  if (instruction->IsStringAlloc()) {
+    locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
+  } else {
+    InvokeRuntimeCallingConvention calling_convention;
+    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  }
   locations->SetOut(Location::RegisterLocation(R0));
 }
 
 void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) {
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
-  codegen_->InvokeRuntime(instruction->GetEntrypoint(),
-                          instruction,
-                          instruction->GetDexPc(),
-                          nullptr);
-  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  if (instruction->IsStringAlloc()) {
+    // String is allocated through StringFactory. Call NewEmptyString entry point.
+    Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize);
+    __ LoadFromOffset(kLoadWord, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
+    __ LoadFromOffset(kLoadWord, LR, temp, code_offset.Int32Value());
+    __ blx(LR);
+    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  } else {
+    codegen_->InvokeRuntime(instruction->GetEntrypoint(),
+                            instruction,
+                            instruction->GetDexPc(),
+                            nullptr);
+    CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  }
 }
 
 void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) {
@@ -3798,6 +3753,7 @@
 
   Label less, greater, done;
   Primitive::Type type = compare->InputAt(0)->GetType();
+  Condition less_cond;
   switch (type) {
     case Primitive::kPrimLong: {
       __ cmp(left.AsRegisterPairHigh<Register>(),
@@ -3808,6 +3764,7 @@
       __ LoadImmediate(out, 0);
       __ cmp(left.AsRegisterPairLow<Register>(),
              ShifterOperand(right.AsRegisterPairLow<Register>()));  // Unsigned compare.
+      less_cond = LO;
       break;
     }
     case Primitive::kPrimFloat:
@@ -3820,14 +3777,15 @@
                  FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
       }
       __ vmstat();  // transfer FP status register to ARM APSR.
-      __ b(compare->IsGtBias() ? &greater : &less, VS);  // VS for unordered.
+      less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
       break;
     }
     default:
       LOG(FATAL) << "Unexpected compare type " << type;
+      UNREACHABLE();
   }
   __ b(&done, EQ);
-  __ b(&less, LO);  // LO is for both: unsigned compare for longs and 'less than' for floats.
+  __ b(&less, less_cond);
 
   __ Bind(&greater);
   __ LoadImmediate(out, 1);
@@ -5525,7 +5483,7 @@
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck: {
       // Note that we indeed only call on slow path, but we always go
-      // into the slow path for the unresolved & interface check
+      // into the slow path for the unresolved and interface check
       // cases.
       //
       // We cannot directly call the InstanceofNonTrivial runtime
@@ -5735,8 +5693,8 @@
 
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved &
-      // interface check cases.
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
       //
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
@@ -6022,6 +5980,7 @@
           new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root, root);
       codegen_->AddSlowPath(slow_path);
 
+      // IP = Thread::Current()->GetIsGcMarking()
       __ LoadFromOffset(
           kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmWordSize>().Int32Value());
       __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
@@ -6100,11 +6059,8 @@
   //   }
   //
   // Note: the original implementation in ReadBarrier::Barrier is
-  // slightly more complex as:
-  // - it implements the load-load fence using a data dependency on
-  //   the high-bits of rb_state, which are expected to be all zeroes;
-  // - it performs additional checks that we do not do here for
-  //   performance reasons.
+  // slightly more complex as it performs additional checks that we do
+  // not do here for performance reasons.
 
   Register ref_reg = ref.AsRegister<Register>();
   Register temp_reg = temp.AsRegister<Register>();
@@ -6234,8 +6190,16 @@
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method) {
-  if (desired_dispatch_info.code_ptr_location ==
-      HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) {
+  HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
+  // We disable pc-relative load when there is an irreducible loop, as the optimization
+  // is incompatible with it.
+  if (GetGraph()->HasIrreducibleLoops() &&
+      (dispatch_info.method_load_kind ==
+          HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative)) {
+    dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod;
+  }
+
+  if (dispatch_info.code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) {
     const DexFile& outer_dex_file = GetGraph()->GetDexFile();
     if (&outer_dex_file != target_method.dex_file) {
       // Calls across dex files are more likely to exceed the available BL range,
@@ -6246,14 +6210,14 @@
           ? HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup
           : HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
       return HInvokeStaticOrDirect::DispatchInfo {
-        desired_dispatch_info.method_load_kind,
+        dispatch_info.method_load_kind,
         code_ptr_location,
-        desired_dispatch_info.method_load_data,
+        dispatch_info.method_load_data,
         0u
       };
     }
   }
-  return desired_dispatch_info;
+  return dispatch_info;
 }
 
 Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
@@ -6495,18 +6459,6 @@
   LOG(FATAL) << "Unreachable";
 }
 
-void LocationsBuilderARM::VisitFakeString(HFakeString* instruction) {
-  DCHECK(codegen_->IsBaseline());
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant()));
-}
-
-void InstructionCodeGeneratorARM::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) {
-  DCHECK(codegen_->IsBaseline());
-  // Will be generated at use site.
-}
-
 // Simple implementation of packed switch - generate cascaded compare/jumps.
 void LocationsBuilderARM::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   LocationSummary* locations =
@@ -6550,7 +6502,7 @@
     }
     if (num_entries - last_index == 2) {
       // The last missing case_value.
-      GenerateCompareWithImmediate(temp_reg, 1);
+      __ CmpConstant(temp_reg, 1);
       __ b(codegen_->GetLabelOf(successors[last_index + 1]), EQ);
     }
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index f9c49a5..d45ea97 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -188,7 +188,7 @@
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM);
 };
 
-class InstructionCodeGeneratorARM : public HGraphVisitor {
+class InstructionCodeGeneratorARM : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen);
 
@@ -273,7 +273,6 @@
                              size_t condition_input_index,
                              Label* true_target,
                              Label* false_target);
-  void GenerateCompareWithImmediate(Register left, int32_t right);
   void GenerateCompareTestAndBranch(HCondition* condition,
                                     Label* true_target,
                                     Label* false_target);
@@ -341,9 +340,7 @@
     return GetLabelOf(block)->Position();
   }
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
@@ -445,7 +442,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              Register obj,
                                              uint32_t offset,
                                              Location temp,
@@ -453,7 +450,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference array load when Baker's read barriers are used.
   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              Register obj,
                                              uint32_t data_offset,
                                              Location index,
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index b49f42b..2cb2741 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -93,6 +93,24 @@
   UNREACHABLE();
 }
 
+inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
+  // The ARM64 condition codes can express all the necessary branches, see the
+  // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
+  // There is no dex instruction or HIR that would need the missing conditions
+  // "equal or unordered" or "not equal".
+  switch (cond) {
+    case kCondEQ: return eq;
+    case kCondNE: return ne /* unordered */;
+    case kCondLT: return gt_bias ? cc : lt /* unordered */;
+    case kCondLE: return gt_bias ? ls : le /* unordered */;
+    case kCondGT: return gt_bias ? hi /* unordered */ : gt;
+    case kCondGE: return gt_bias ? cs /* unordered */ : ge;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+  }
+}
+
 Location ARM64ReturnLocation(Primitive::Type return_type) {
   // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
   // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
@@ -477,24 +495,24 @@
 
 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  explicit DeoptimizationSlowPathARM64(HInstruction* instruction)
+  explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
       : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
-    DCHECK(instruction_->IsDeoptimize());
-    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
-    uint32_t dex_pc = deoptimize->GetDexPc();
-    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
-    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
     CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
 
  private:
-  HInstruction* const instruction_;
+  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
 };
 
@@ -604,30 +622,13 @@
     DCHECK(!instruction_->IsInvoke() ||
            (instruction_->IsInvokeStaticOrDirect() &&
             instruction_->GetLocations()->Intrinsified()));
+    // The read barrier instrumentation does not support the
+    // HArm64IntermediateAddress instruction yet.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()));
 
     __ Bind(GetEntryLabel());
 
-    // Note: In the case of a HArrayGet instruction, when the base
-    // address is a HArm64IntermediateAddress instruction, it does not
-    // point to the array object itself, but to an offset within this
-    // object. However, the read barrier entry point needs the array
-    // object address to be passed as first argument. So we
-    // temporarily set back `obj_` to that address, and restore its
-    // initial value later.
-    if (instruction_->IsArrayGet() &&
-        instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) {
-      if (kIsDebugBuild) {
-        HArm64IntermediateAddress* intermediate_address =
-            instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress();
-        uint32_t intermediate_address_offset =
-            intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64();
-        DCHECK_EQ(intermediate_address_offset, offset_);
-        DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_);
-      }
-      Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt);
-      __ Sub(obj_reg, obj_reg, offset_);
-    }
-
     SaveLiveRegisters(codegen, locations);
 
     // We may have to change the index's value, but as `index_` is a
@@ -728,22 +729,6 @@
 
     RestoreLiveRegisters(codegen, locations);
 
-    // Restore the value of `obj_` when it corresponds to a
-    // HArm64IntermediateAddress instruction.
-    if (instruction_->IsArrayGet() &&
-        instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) {
-      if (kIsDebugBuild) {
-        HArm64IntermediateAddress* intermediate_address =
-            instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress();
-        uint32_t intermediate_address_offset =
-            intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64();
-        DCHECK_EQ(intermediate_address_offset, offset_);
-        DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_);
-      }
-      Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt);
-      __ Add(obj_reg, obj_reg, offset_);
-    }
-
     __ B(GetExitLabel());
   }
 
@@ -1032,13 +1017,6 @@
   Primitive::Type type = instruction->GetType();
   DCHECK_NE(type, Primitive::kPrimVoid);
 
-  if (instruction->IsFakeString()) {
-    // The fake string is an alias for null.
-    DCHECK(IsBaseline());
-    instruction = locations->Out().GetConstant();
-    DCHECK(instruction->IsNullConstant()) << instruction->DebugName();
-  }
-
   if (instruction->IsCurrentMethod()) {
     MoveLocation(location,
                  Location::DoubleStackSlot(kCurrentMethodStackOffset),
@@ -1134,7 +1112,7 @@
   }
 }
 
-void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorARM64::SetupBlockedRegisters() const {
   // Blocked core registers:
   //      lr        : Runtime reserved.
   //      tr        : Runtime reserved.
@@ -1155,40 +1133,17 @@
     blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().code()] = true;
   }
 
-  if (is_baseline) {
-    CPURegList reserved_core_baseline_registers = callee_saved_core_registers;
-    while (!reserved_core_baseline_registers.IsEmpty()) {
-      blocked_core_registers_[reserved_core_baseline_registers.PopLowestIndex().code()] = true;
-    }
-  }
-
-  if (is_baseline || GetGraph()->IsDebuggable()) {
+  if (GetGraph()->IsDebuggable()) {
     // Stubs do not save callee-save floating point registers. If the graph
     // is debuggable, we need to deal with these registers differently. For
     // now, just block them.
-    CPURegList reserved_fp_baseline_registers = callee_saved_fp_registers;
-    while (!reserved_fp_baseline_registers.IsEmpty()) {
-      blocked_fpu_registers_[reserved_fp_baseline_registers.PopLowestIndex().code()] = true;
+    CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
+    while (!reserved_fp_registers_debuggable.IsEmpty()) {
+      blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().code()] = true;
     }
   }
 }
 
-Location CodeGeneratorARM64::AllocateFreeRegister(Primitive::Type type) const {
-  if (type == Primitive::kPrimVoid) {
-    LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  if (Primitive::IsFloatingPointType(type)) {
-    ssize_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfAllocatableFPRegisters);
-    DCHECK_NE(reg, -1);
-    return Location::FpuRegisterLocation(reg);
-  } else {
-    ssize_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfAllocatableRegisters);
-    DCHECK_NE(reg, -1);
-    return Location::RegisterLocation(reg);
-  }
-}
-
 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
   __ Str(reg, MemOperand(sp, stack_index));
@@ -1605,7 +1560,7 @@
 
 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
                                                              CodeGeneratorARM64* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
@@ -1977,6 +1932,9 @@
 }
 
 void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) {
+  // The read barrier instrumentation does not support the
+  // HArm64IntermediateAddress instruction yet.
+  DCHECK(!kEmitCompilerReadBarrier);
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
@@ -1986,6 +1944,9 @@
 
 void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress(
     HArm64IntermediateAddress* instruction) {
+  // The read barrier instrumentation does not support the
+  // HArm64IntermediateAddress instruction yet.
+  DCHECK(!kEmitCompilerReadBarrier);
   __ Add(OutputRegister(instruction),
          InputRegisterAt(instruction, 0),
          Operand(InputOperandAt(instruction, 1)));
@@ -2074,6 +2035,9 @@
   } else {
     Register temp = temps.AcquireSameSizeAs(obj);
     if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+      // The read barrier instrumentation does not support the
+      // HArm64IntermediateAddress instruction yet.
+      DCHECK(!kEmitCompilerReadBarrier);
       // We do not need to compute the intermediate address from the array: the
       // input instruction has done it already. See the comment in
       // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
@@ -2100,11 +2064,6 @@
     if (index.IsConstant()) {
       codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
     } else {
-      // Note: when `obj_loc` is a HArm64IntermediateAddress, it does
-      // not contain the base address of the array object, which is
-      // needed by the read barrier entry point. So the read barrier
-      // slow path will temporarily set back `obj_loc` to the right
-      // address (see ReadBarrierForHeapReferenceSlowPathARM64::EmitNativeCode).
       codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset, index);
     }
   }
@@ -2168,6 +2127,9 @@
       UseScratchRegisterScope temps(masm);
       Register temp = temps.AcquireSameSizeAs(array);
       if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+        // The read barrier instrumentation does not support the
+        // HArm64IntermediateAddress instruction yet.
+        DCHECK(!kEmitCompilerReadBarrier);
         // We do not need to compute the intermediate address from the array: the
         // input instruction has done it already. See the comment in
         // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
@@ -2414,12 +2376,8 @@
       } else {
         __ Fcmp(left, InputFPRegisterAt(compare, 1));
       }
-      if (compare->IsGtBias()) {
-        __ Cset(result, ne);
-      } else {
-        __ Csetm(result, ne);
-      }
-      __ Cneg(result, result, compare->IsGtBias() ? mi : gt);
+      __ Cset(result, ne);
+      __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
       break;
     }
     default:
@@ -2455,7 +2413,6 @@
   LocationSummary* locations = instruction->GetLocations();
   Register res = RegisterFrom(locations->Out(), instruction->GetType());
   IfCondition if_cond = instruction->GetCondition();
-  Condition arm64_cond = ARM64Condition(if_cond);
 
   if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
     FPRegister lhs = InputFPRegisterAt(instruction, 0);
@@ -2466,20 +2423,13 @@
     } else {
       __ Fcmp(lhs, InputFPRegisterAt(instruction, 1));
     }
-    __ Cset(res, arm64_cond);
-    if (instruction->IsFPConditionTrueIfNaN()) {
-      // res = IsUnordered(arm64_cond) ? 1 : res  <=>  res = IsNotUnordered(arm64_cond) ? res : 1
-      __ Csel(res, res, Operand(1), vc);  // VC for "not unordered".
-    } else if (instruction->IsFPConditionFalseIfNaN()) {
-      // res = IsUnordered(arm64_cond) ? 0 : res  <=>  res = IsNotUnordered(arm64_cond) ? res : 0
-      __ Csel(res, res, Operand(0), vc);  // VC for "not unordered".
-    }
+    __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
   } else {
     // Integer cases.
     Register lhs = InputRegisterAt(instruction, 0);
     Operand rhs = InputOperandAt(instruction, 1);
     __ Cmp(lhs, rhs);
-    __ Cset(res, arm64_cond);
+    __ Cset(res, ARM64Condition(if_cond));
   }
 }
 
@@ -2534,8 +2484,7 @@
   Register out = OutputRegister(instruction);
   Register dividend = InputRegisterAt(instruction, 0);
   int64_t imm = Int64FromConstant(second.GetConstant());
-  uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm));
-  DCHECK(IsPowerOfTwo(abs_imm));
+  uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
   int ctz_imm = CTZ(abs_imm);
 
   UseScratchRegisterScope temps(GetVIXLAssembler());
@@ -2627,7 +2576,7 @@
       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
     } else if (imm == 1 || imm == -1) {
       DivRemOneOrMinusOne(instruction);
-    } else if (IsPowerOfTwo(std::abs(imm))) {
+    } else if (IsPowerOfTwo(AbsOrMin(imm))) {
       DivRemByPowerOfTwo(instruction);
     } else {
       DCHECK(imm <= -2 || imm >= 2);
@@ -2850,15 +2799,11 @@
       } else {
         __ Fcmp(lhs, InputFPRegisterAt(condition, 1));
       }
-      if (condition->IsFPConditionTrueIfNaN()) {
-        __ B(vs, true_target == nullptr ? &fallthrough_target : true_target);
-      } else if (condition->IsFPConditionFalseIfNaN()) {
-        __ B(vs, false_target == nullptr ? &fallthrough_target : false_target);
-      }
       if (true_target == nullptr) {
-        __ B(ARM64Condition(condition->GetOppositeCondition()), false_target);
+        IfCondition opposite_condition = condition->GetOppositeCondition();
+        __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
       } else {
-        __ B(ARM64Condition(condition->GetCondition()), true_target);
+        __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
       }
     } else {
       // Integer cases.
@@ -2940,9 +2885,8 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena())
-      DeoptimizationSlowPathARM64(deoptimize);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCodeARM64* slow_path =
+      deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
   GenerateTestAndBranch(deoptimize,
                         /* condition_input_index */ 0,
                         slow_path->GetEntryLabel(),
@@ -2954,6 +2898,10 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  if (codegen_->HasStackMapAtCurrentPc()) {
+    // Ensure that we do not collide with the stack map of the previous instruction.
+    __ Nop();
+  }
   codegen_->RecordPcInfo(info, info->GetDexPc());
 }
 
@@ -3493,9 +3441,9 @@
 }
 
 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena());
   if (intrinsic.TryDispatch(invoke)) {
@@ -3743,9 +3691,9 @@
 
 
 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -4059,19 +4007,33 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  if (instruction->IsStringAlloc()) {
+    locations->AddTemp(LocationFrom(kArtMethodRegister));
+  } else {
+    locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  }
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
 }
 
 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
-  codegen_->InvokeRuntime(instruction->GetEntrypoint(),
-                          instruction,
-                          instruction->GetDexPc(),
-                          nullptr);
-  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  if (instruction->IsStringAlloc()) {
+    // String is allocated through StringFactory. Call NewEmptyString entry point.
+    Location temp = instruction->GetLocations()->GetTemp(0);
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
+    __ Ldr(XRegisterFrom(temp), MemOperand(tr, QUICK_ENTRY_POINT(pNewEmptyString)));
+    __ Ldr(lr, MemOperand(XRegisterFrom(temp), code_offset.Int32Value()));
+    __ Blr(lr);
+    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  } else {
+    codegen_->InvokeRuntime(instruction->GetEntrypoint(),
+                            instruction,
+                            instruction->GetDexPc(),
+                            nullptr);
+    CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  }
 }
 
 void LocationsBuilderARM64::VisitNot(HNot* instruction) {
@@ -4557,18 +4519,6 @@
   LOG(FATAL) << "Unreachable";
 }
 
-void LocationsBuilderARM64::VisitFakeString(HFakeString* instruction) {
-  DCHECK(codegen_->IsBaseline());
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant()));
-}
-
-void InstructionCodeGeneratorARM64::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) {
-  DCHECK(codegen_->IsBaseline());
-  // Will be generated at use site.
-}
-
 // Simple implementation of packed switch - generate cascaded compare/jumps.
 void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   LocationSummary* locations =
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 0e90ac6..8eb9fcc 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -186,7 +186,7 @@
   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionARM64);
 };
 
-class InstructionCodeGeneratorARM64 : public HGraphVisitor {
+class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen);
 
@@ -339,10 +339,7 @@
 
   // Register allocation.
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-  // AllocateFreeRegister() is only used when allocating registers locally
-  // during CompileBaseline().
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 4648606..5bd136a 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -444,19 +444,16 @@
 
 class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS {
  public:
-  explicit DeoptimizationSlowPathMIPS(HInstruction* instruction)
+  explicit DeoptimizationSlowPathMIPS(HDeoptimize* instruction)
     : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
-    DCHECK(instruction_->IsDeoptimize());
-    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
-    uint32_t dex_pc = deoptimize->GetDexPc();
-    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
     mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
                                 instruction_,
-                                dex_pc,
+                                instruction_->GetDexPc(),
                                 this,
                                 IsDirectEntrypoint(kQuickDeoptimize));
     CheckEntrypointTypes<kQuickDeoptimize, void, void>();
@@ -465,7 +462,7 @@
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; }
 
  private:
-  HInstruction* const instruction_;
+  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS);
 };
 
@@ -608,9 +605,9 @@
     // then swap the high 32 bits of the same FPR. mtc1 makes the high 32 bits of an FPR
     // unpredictable and the following mfch1 will fail.
     __ Mfc1(TMP, f1);
-    __ Mfhc1(AT, f1);
+    __ MoveFromFpuHigh(AT, f1);
     __ Mtc1(r2_l, f1);
-    __ Mthc1(r2_h, f1);
+    __ MoveToFpuHigh(r2_h, f1);
     __ Move(r2_l, TMP);
     __ Move(r2_h, AT);
   } else if (loc1.IsStackSlot() && loc2.IsStackSlot()) {
@@ -862,7 +859,7 @@
       Register dst_low =  destination.AsRegisterPairLow<Register>();
       FRegister src = source.AsFpuRegister<FRegister>();
       __ Mfc1(dst_low, src);
-      __ Mfhc1(dst_high, src);
+      __ MoveFromFpuHigh(dst_high, src);
     } else {
       DCHECK(source.IsDoubleStackSlot()) << "Cannot move from " << source << " to " << destination;
       int32_t off = source.GetStackIndex();
@@ -875,7 +872,7 @@
       Register src_high = source.AsRegisterPairHigh<Register>();
       Register src_low = source.AsRegisterPairLow<Register>();
       __ Mtc1(src_low, dst);
-      __ Mthc1(src_high, dst);
+      __ MoveToFpuHigh(src_high, dst);
     } else if (source.IsFpuRegister()) {
       __ MovD(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>());
     } else {
@@ -1045,7 +1042,7 @@
   __ Bind(&done);
 }
 
-void CodeGeneratorMIPS::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorMIPS::SetupBlockedRegisters() const {
   // Don't allocate the dalvik style register pair passing.
   blocked_register_pairs_[A1_A2] = true;
 
@@ -1075,16 +1072,6 @@
     blocked_fpu_registers_[i] = true;
   }
 
-  if (is_baseline) {
-    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-      blocked_core_registers_[kCoreCalleeSaves[i]] = true;
-    }
-
-    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
-      blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
-    }
-  }
-
   UpdateBlockedPairRegisters();
 }
 
@@ -1099,52 +1086,6 @@
   }
 }
 
-Location CodeGeneratorMIPS::AllocateFreeRegister(Primitive::Type type) const {
-  switch (type) {
-    case Primitive::kPrimLong: {
-      size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
-      MipsManagedRegister pair =
-          MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
-
-      blocked_core_registers_[pair.AsRegisterPairLow()] = true;
-      blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
-      UpdateBlockedPairRegisters();
-      return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
-    }
-
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      int reg = FindFreeEntry(blocked_core_registers_, kNumberOfCoreRegisters);
-      // Block all register pairs that contain `reg`.
-      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
-        MipsManagedRegister current =
-            MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
-        if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
-          blocked_register_pairs_[i] = true;
-        }
-      }
-      return Location::RegisterLocation(reg);
-    }
-
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      int reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFRegisters);
-      return Location::FpuRegisterLocation(reg);
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  UNREACHABLE();
-}
-
 size_t CodeGeneratorMIPS::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   __ StoreToOffset(kStoreWord, Register(reg_id), SP, stack_index);
   return kMipsWordSize;
@@ -1241,7 +1182,7 @@
 
 InstructionCodeGeneratorMIPS::InstructionCodeGeneratorMIPS(HGraph* graph,
                                                            CodeGeneratorMIPS* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
@@ -1511,7 +1452,7 @@
 }
 
 void LocationsBuilderMIPS::HandleShift(HBinaryOperation* instr) {
-  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor());
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
   Primitive::Type type = instr->GetResultType();
@@ -1534,7 +1475,7 @@
 static constexpr size_t kMipsBitsPerWord = kMipsWordSize * kBitsPerByte;
 
 void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
-  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor());
   LocationSummary* locations = instr->GetLocations();
   Primitive::Type type = instr->GetType();
 
@@ -1542,30 +1483,58 @@
   bool use_imm = rhs_location.IsConstant();
   Register rhs_reg = use_imm ? ZERO : rhs_location.AsRegister<Register>();
   int64_t rhs_imm = use_imm ? CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()) : 0;
-  uint32_t shift_mask = (type == Primitive::kPrimInt) ? kMaxIntShiftValue : kMaxLongShiftValue;
-  uint32_t shift_value = rhs_imm & shift_mask;
-  // Is the INS (Insert Bit Field) instruction supported?
-  bool has_ins = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
+  const uint32_t shift_mask = (type == Primitive::kPrimInt)
+      ? kMaxIntShiftValue
+      : kMaxLongShiftValue;
+  const uint32_t shift_value = rhs_imm & shift_mask;
+  // Are the INS (Insert Bit Field) and ROTR instructions supported?
+  bool has_ins_rotr = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
 
   switch (type) {
     case Primitive::kPrimInt: {
       Register dst = locations->Out().AsRegister<Register>();
       Register lhs = locations->InAt(0).AsRegister<Register>();
       if (use_imm) {
-        if (instr->IsShl()) {
+        if (shift_value == 0) {
+          if (dst != lhs) {
+            __ Move(dst, lhs);
+          }
+        } else if (instr->IsShl()) {
           __ Sll(dst, lhs, shift_value);
         } else if (instr->IsShr()) {
           __ Sra(dst, lhs, shift_value);
-        } else {
+        } else if (instr->IsUShr()) {
           __ Srl(dst, lhs, shift_value);
+        } else {
+          if (has_ins_rotr) {
+            __ Rotr(dst, lhs, shift_value);
+          } else {
+            __ Sll(TMP, lhs, (kMipsBitsPerWord - shift_value) & shift_mask);
+            __ Srl(dst, lhs, shift_value);
+            __ Or(dst, dst, TMP);
+          }
         }
       } else {
         if (instr->IsShl()) {
           __ Sllv(dst, lhs, rhs_reg);
         } else if (instr->IsShr()) {
           __ Srav(dst, lhs, rhs_reg);
-        } else {
+        } else if (instr->IsUShr()) {
           __ Srlv(dst, lhs, rhs_reg);
+        } else {
+          if (has_ins_rotr) {
+            __ Rotrv(dst, lhs, rhs_reg);
+          } else {
+            __ Subu(TMP, ZERO, rhs_reg);
+            // 32-bit shift instructions use the 5 least significant bits of the shift count, so
+            // shifting by `-rhs_reg` is equivalent to shifting by `(32 - rhs_reg) & 31`. The case
+            // when `rhs_reg & 31 == 0` is OK even though we don't shift `lhs` left all the way out
+            // by 32, because the result in this case is computed as `(lhs >> 0) | (lhs << 0)`,
+            // IOW, the OR'd values are equal.
+            __ Sllv(TMP, lhs, TMP);
+            __ Srlv(dst, lhs, rhs_reg);
+            __ Or(dst, dst, TMP);
+          }
         }
       }
       break;
@@ -1580,7 +1549,7 @@
           if (shift_value == 0) {
             codegen_->Move64(locations->Out(), locations->InAt(0));
           } else if (shift_value < kMipsBitsPerWord) {
-            if (has_ins) {
+            if (has_ins_rotr) {
               if (instr->IsShl()) {
                 __ Srl(dst_high, lhs_low, kMipsBitsPerWord - shift_value);
                 __ Ins(dst_high, lhs_high, shift_value, kMipsBitsPerWord - shift_value);
@@ -1589,10 +1558,15 @@
                 __ Srl(dst_low, lhs_low, shift_value);
                 __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value);
                 __ Sra(dst_high, lhs_high, shift_value);
+              } else if (instr->IsUShr()) {
+                __ Srl(dst_low, lhs_low, shift_value);
+                __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value);
+                __ Srl(dst_high, lhs_high, shift_value);
               } else {
                 __ Srl(dst_low, lhs_low, shift_value);
                 __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value);
                 __ Srl(dst_high, lhs_high, shift_value);
+                __ Ins(dst_high, lhs_low, kMipsBitsPerWord - shift_value, shift_value);
               }
             } else {
               if (instr->IsShl()) {
@@ -1605,24 +1579,51 @@
                 __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
                 __ Srl(dst_low, lhs_low, shift_value);
                 __ Or(dst_low, dst_low, TMP);
-              } else {
+              } else if (instr->IsUShr()) {
                 __ Srl(dst_high, lhs_high, shift_value);
                 __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
                 __ Srl(dst_low, lhs_low, shift_value);
                 __ Or(dst_low, dst_low, TMP);
+              } else {
+                __ Srl(TMP, lhs_low, shift_value);
+                __ Sll(dst_low, lhs_high, kMipsBitsPerWord - shift_value);
+                __ Or(dst_low, dst_low, TMP);
+                __ Srl(TMP, lhs_high, shift_value);
+                __ Sll(dst_high, lhs_low, kMipsBitsPerWord - shift_value);
+                __ Or(dst_high, dst_high, TMP);
               }
             }
           } else {
-            shift_value -= kMipsBitsPerWord;
+            const uint32_t shift_value_high = shift_value - kMipsBitsPerWord;
             if (instr->IsShl()) {
-              __ Sll(dst_high, lhs_low, shift_value);
+              __ Sll(dst_high, lhs_low, shift_value_high);
               __ Move(dst_low, ZERO);
             } else if (instr->IsShr()) {
-              __ Sra(dst_low, lhs_high, shift_value);
+              __ Sra(dst_low, lhs_high, shift_value_high);
               __ Sra(dst_high, dst_low, kMipsBitsPerWord - 1);
-            } else {
-              __ Srl(dst_low, lhs_high, shift_value);
+            } else if (instr->IsUShr()) {
+              __ Srl(dst_low, lhs_high, shift_value_high);
               __ Move(dst_high, ZERO);
+            } else {
+              if (shift_value == kMipsBitsPerWord) {
+                // 64-bit rotation by 32 is just a swap.
+                __ Move(dst_low, lhs_high);
+                __ Move(dst_high, lhs_low);
+              } else {
+                if (has_ins_rotr) {
+                  __ Srl(dst_low, lhs_high, shift_value_high);
+                  __ Ins(dst_low, lhs_low, kMipsBitsPerWord - shift_value_high, shift_value_high);
+                  __ Srl(dst_high, lhs_low, shift_value_high);
+                  __ Ins(dst_high, lhs_high, kMipsBitsPerWord - shift_value_high, shift_value_high);
+                } else {
+                  __ Sll(TMP, lhs_low, kMipsBitsPerWord - shift_value_high);
+                  __ Srl(dst_low, lhs_high, shift_value_high);
+                  __ Or(dst_low, dst_low, TMP);
+                  __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value_high);
+                  __ Srl(dst_high, lhs_low, shift_value_high);
+                  __ Or(dst_high, dst_high, TMP);
+                }
+              }
             }
           }
       } else {
@@ -1649,7 +1650,7 @@
           __ Beqz(TMP, &done);
           __ Move(dst_low, dst_high);
           __ Sra(dst_high, dst_high, 31);
-        } else {
+        } else if (instr->IsUShr()) {
           __ Srlv(dst_high, lhs_high, rhs_reg);
           __ Nor(AT, ZERO, rhs_reg);
           __ Sll(TMP, lhs_high, 1);
@@ -1660,6 +1661,21 @@
           __ Beqz(TMP, &done);
           __ Move(dst_low, dst_high);
           __ Move(dst_high, ZERO);
+        } else {
+          __ Nor(AT, ZERO, rhs_reg);
+          __ Srlv(TMP, lhs_low, rhs_reg);
+          __ Sll(dst_low, lhs_high, 1);
+          __ Sllv(dst_low, dst_low, AT);
+          __ Or(dst_low, dst_low, TMP);
+          __ Srlv(TMP, lhs_high, rhs_reg);
+          __ Sll(dst_high, lhs_low, 1);
+          __ Sllv(dst_high, dst_high, AT);
+          __ Or(dst_high, dst_high, TMP);
+          __ Andi(TMP, rhs_reg, kMipsBitsPerWord);
+          __ Beqz(TMP, &done);
+          __ Move(TMP, dst_high);
+          __ Move(dst_high, dst_low);
+          __ Move(dst_low, TMP);
         }
         __ Bind(&done);
       }
@@ -2314,8 +2330,7 @@
   Register out = locations->Out().AsRegister<Register>();
   Register dividend = locations->InAt(0).AsRegister<Register>();
   int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
-  uint32_t abs_imm = static_cast<uint32_t>(std::abs(imm));
-  DCHECK(IsPowerOfTwo(abs_imm));
+  uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
   int ctz_imm = CTZ(abs_imm);
 
   if (instruction->IsDiv()) {
@@ -2418,7 +2433,7 @@
       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
     } else if (imm == 1 || imm == -1) {
       DivRemOneOrMinusOne(instruction);
-    } else if (IsPowerOfTwo(std::abs(imm))) {
+    } else if (IsPowerOfTwo(AbsOrMin(imm))) {
       DivRemByPowerOfTwo(instruction);
     } else {
       DCHECK(imm <= -2 || imm >= 2);
@@ -3358,8 +3373,8 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathMIPS(deoptimize);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCodeMIPS* slow_path =
+      deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS>(deoptimize);
   GenerateTestAndBranch(deoptimize,
                         /* condition_input_index */ 0,
                         slow_path->GetEntryLabel(),
@@ -3371,6 +3386,10 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  if (codegen_->HasStackMapAtCurrentPc()) {
+    // Ensure that we do not collide with the stack map of the previous instruction.
+    __ Nop();
+  }
   codegen_->RecordPcInfo(info, info->GetDexPc());
 }
 
@@ -3457,8 +3476,8 @@
       // Need to move to FP regs since FP results are returned in core registers.
       __ Mtc1(locations->GetTemp(1).AsRegister<Register>(),
               locations->Out().AsFpuRegister<FRegister>());
-      __ Mthc1(locations->GetTemp(2).AsRegister<Register>(),
-               locations->Out().AsFpuRegister<FRegister>());
+      __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
+                       locations->Out().AsFpuRegister<FRegister>());
     }
   } else {
     if (!Primitive::IsFloatingPointType(type)) {
@@ -3578,8 +3597,8 @@
       // Pass FP parameters in core registers.
       __ Mfc1(locations->GetTemp(1).AsRegister<Register>(),
               locations->InAt(1).AsFpuRegister<FRegister>());
-      __ Mfhc1(locations->GetTemp(2).AsRegister<Register>(),
-               locations->InAt(1).AsFpuRegister<FRegister>());
+      __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
+                         locations->InAt(1).AsFpuRegister<FRegister>());
     }
     codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pA64Store),
                             instruction,
@@ -3760,9 +3779,9 @@
 }
 
 void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderMIPS intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -3898,9 +3917,9 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -4282,19 +4301,34 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  if (instruction->IsStringAlloc()) {
+    locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
+  } else {
+    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  }
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
 }
 
 void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) {
-  codegen_->InvokeRuntime(
-      GetThreadOffset<kMipsWordSize>(instruction->GetEntrypoint()).Int32Value(),
-      instruction,
-      instruction->GetDexPc(),
-      nullptr,
-      IsDirectEntrypoint(kQuickAllocObjectWithAccessCheck));
-  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  if (instruction->IsStringAlloc()) {
+    // String is allocated through StringFactory. Call NewEmptyString entry point.
+    Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsWordSize);
+    __ LoadFromOffset(kLoadWord, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
+    __ LoadFromOffset(kLoadWord, T9, temp, code_offset.Int32Value());
+    __ Jalr(T9);
+    __ Nop();
+    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  } else {
+    codegen_->InvokeRuntime(
+        GetThreadOffset<kMipsWordSize>(instruction->GetEntrypoint()).Int32Value(),
+        instruction,
+        instruction->GetDexPc(),
+        nullptr,
+        IsDirectEntrypoint(kQuickAllocObjectWithAccessCheck));
+    CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  }
 }
 
 void LocationsBuilderMIPS::VisitNot(HNot* instruction) {
@@ -4536,14 +4570,12 @@
   codegen_->GenerateFrameExit();
 }
 
-void LocationsBuilderMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
+void LocationsBuilderMIPS::VisitRor(HRor* ror) {
+  HandleShift(ror);
 }
 
-void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
+void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror) {
+  HandleShift(ror);
 }
 
 void LocationsBuilderMIPS::VisitShl(HShl* shl) {
@@ -4731,6 +4763,7 @@
   Primitive::Type input_type = conversion->GetInputType();
   Primitive::Type result_type = conversion->GetResultType();
   DCHECK_NE(input_type, result_type);
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
 
   if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) ||
       (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) {
@@ -4738,8 +4771,9 @@
   }
 
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
-      (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) {
+  if (!isR6 &&
+      ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
+       (result_type == Primitive::kPrimLong && Primitive::IsFloatingPointType(input_type)))) {
     call_kind = LocationSummary::kCall;
   }
 
@@ -4777,6 +4811,8 @@
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
   bool has_sign_extension = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+  bool fpu_32bit = codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint();
 
   DCHECK_NE(input_type, result_type);
 
@@ -4822,7 +4858,37 @@
                    << " to " << result_type;
     }
   } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
-    if (input_type != Primitive::kPrimLong) {
+    if (input_type == Primitive::kPrimLong) {
+      if (isR6) {
+        // cvt.s.l/cvt.d.l requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary
+        // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction.
+        Register src_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+        Register src_low = locations->InAt(0).AsRegisterPairLow<Register>();
+        FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+        __ Mtc1(src_low, FTMP);
+        __ Mthc1(src_high, FTMP);
+        if (result_type == Primitive::kPrimFloat) {
+          __ Cvtsl(dst, FTMP);
+        } else {
+          __ Cvtdl(dst, FTMP);
+        }
+      } else {
+        int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
+                                                                      : QUICK_ENTRY_POINT(pL2d);
+        bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f)
+                                                             : IsDirectEntrypoint(kQuickL2d);
+        codegen_->InvokeRuntime(entry_offset,
+                                conversion,
+                                conversion->GetDexPc(),
+                                nullptr,
+                                direct);
+        if (result_type == Primitive::kPrimFloat) {
+          CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+        } else {
+          CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+        }
+      }
+    } else {
       Register src = locations->InAt(0).AsRegister<Register>();
       FRegister dst = locations->Out().AsFpuRegister<FRegister>();
       __ Mtc1(src, FTMP);
@@ -4831,54 +4897,168 @@
       } else {
         __ Cvtdw(dst, FTMP);
       }
-    } else {
-      int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
-                                                                    : QUICK_ENTRY_POINT(pL2d);
-      bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f)
-                                                           : IsDirectEntrypoint(kQuickL2d);
-      codegen_->InvokeRuntime(entry_offset,
-                              conversion,
-                              conversion->GetDexPc(),
-                              nullptr,
-                              direct);
-      if (result_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickL2f, float, int64_t>();
-      } else {
-        CheckEntrypointTypes<kQuickL2d, double, int64_t>();
-      }
     }
   } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
     CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
-    int32_t entry_offset;
-    bool direct;
-    if (result_type != Primitive::kPrimLong) {
-      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz)
-                                                           : QUICK_ENTRY_POINT(pD2iz);
-      direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2iz)
-                                                      : IsDirectEntrypoint(kQuickD2iz);
-    } else {
-      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
-                                                           : QUICK_ENTRY_POINT(pD2l);
-      direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l)
-                                                      : IsDirectEntrypoint(kQuickD2l);
-    }
-    codegen_->InvokeRuntime(entry_offset,
-                            conversion,
-                            conversion->GetDexPc(),
-                            nullptr,
-                            direct);
-    if (result_type != Primitive::kPrimLong) {
-      if (input_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickF2iz, int32_t, float>();
+    if (result_type == Primitive::kPrimLong) {
+      if (isR6) {
+        // trunc.l.s/trunc.l.d requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary
+        // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction.
+        FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+        Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
+        Register dst_low = locations->Out().AsRegisterPairLow<Register>();
+        MipsLabel truncate;
+        MipsLabel done;
+
+        // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
+        // value when the input is either a NaN or is outside of the range of the output type
+        // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
+        // the same result.
+        //
+        // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
+        // value of the output type if the input is outside of the range after the truncation or
+        // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
+        // results. This matches the desired float/double-to-int/long conversion exactly.
+        //
+        // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction.
+        //
+        // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
+        // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
+        // even though it must be NAN2008=1 on R6.
+        //
+        // The code takes care of the different behaviors by first comparing the input to the
+        // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
+        // If the input is greater than or equal to the minimum, it procedes to the truncate
+        // instruction, which will handle such an input the same way irrespective of NAN2008.
+        // Otherwise the input is compared to itself to determine whether it is a NaN or not
+        // in order to return either zero or the minimum value.
+        //
+        // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
+        // truncate instruction for MIPS64R6.
+        if (input_type == Primitive::kPrimFloat) {
+          uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min());
+          __ LoadConst32(TMP, min_val);
+          __ Mtc1(TMP, FTMP);
+          __ CmpLeS(FTMP, FTMP, src);
+        } else {
+          uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min());
+          __ LoadConst32(TMP, High32Bits(min_val));
+          __ Mtc1(ZERO, FTMP);
+          __ Mthc1(TMP, FTMP);
+          __ CmpLeD(FTMP, FTMP, src);
+        }
+
+        __ Bc1nez(FTMP, &truncate);
+
+        if (input_type == Primitive::kPrimFloat) {
+          __ CmpEqS(FTMP, src, src);
+        } else {
+          __ CmpEqD(FTMP, src, src);
+        }
+        __ Move(dst_low, ZERO);
+        __ LoadConst32(dst_high, std::numeric_limits<int32_t>::min());
+        __ Mfc1(TMP, FTMP);
+        __ And(dst_high, dst_high, TMP);
+
+        __ B(&done);
+
+        __ Bind(&truncate);
+
+        if (input_type == Primitive::kPrimFloat) {
+          __ TruncLS(FTMP, src);
+        } else {
+          __ TruncLD(FTMP, src);
+        }
+        __ Mfc1(dst_low, FTMP);
+        __ Mfhc1(dst_high, FTMP);
+
+        __ Bind(&done);
       } else {
-        CheckEntrypointTypes<kQuickD2iz, int32_t, double>();
+        int32_t entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
+                                                                     : QUICK_ENTRY_POINT(pD2l);
+        bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l)
+                                                             : IsDirectEntrypoint(kQuickD2l);
+        codegen_->InvokeRuntime(entry_offset, conversion, conversion->GetDexPc(), nullptr, direct);
+        if (input_type == Primitive::kPrimFloat) {
+          CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+        } else {
+          CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+        }
       }
     } else {
+      FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+      Register dst = locations->Out().AsRegister<Register>();
+      MipsLabel truncate;
+      MipsLabel done;
+
+      // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
+      // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
+      // even though it must be NAN2008=1 on R6.
+      //
+      // For details see the large comment above for the truncation of float/double to long on R6.
+      //
+      // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
+      // truncate instruction for MIPS64R6.
       if (input_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+        uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
+        __ LoadConst32(TMP, min_val);
+        __ Mtc1(TMP, FTMP);
       } else {
-        CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+        uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
+        __ LoadConst32(TMP, High32Bits(min_val));
+        __ Mtc1(ZERO, FTMP);
+        if (fpu_32bit) {
+          __ Mtc1(TMP, static_cast<FRegister>(FTMP + 1));
+        } else {
+          __ Mthc1(TMP, FTMP);
+        }
       }
+
+      if (isR6) {
+        if (input_type == Primitive::kPrimFloat) {
+          __ CmpLeS(FTMP, FTMP, src);
+        } else {
+          __ CmpLeD(FTMP, FTMP, src);
+        }
+        __ Bc1nez(FTMP, &truncate);
+
+        if (input_type == Primitive::kPrimFloat) {
+          __ CmpEqS(FTMP, src, src);
+        } else {
+          __ CmpEqD(FTMP, src, src);
+        }
+        __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
+        __ Mfc1(TMP, FTMP);
+        __ And(dst, dst, TMP);
+      } else {
+        if (input_type == Primitive::kPrimFloat) {
+          __ ColeS(0, FTMP, src);
+        } else {
+          __ ColeD(0, FTMP, src);
+        }
+        __ Bc1t(0, &truncate);
+
+        if (input_type == Primitive::kPrimFloat) {
+          __ CeqS(0, src, src);
+        } else {
+          __ CeqD(0, src, src);
+        }
+        __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
+        __ Movf(dst, ZERO, 0);
+      }
+
+      __ B(&done);
+
+      __ Bind(&truncate);
+
+      if (input_type == Primitive::kPrimFloat) {
+        __ TruncWS(FTMP, src);
+      } else {
+        __ TruncWD(FTMP, src);
+      }
+      __ Mfc1(dst, FTMP);
+
+      __ Bind(&done);
     }
   } else if (Primitive::IsFloatingPointType(result_type) &&
              Primitive::IsFloatingPointType(input_type)) {
@@ -5001,18 +5181,6 @@
   HandleCondition(comp);
 }
 
-void LocationsBuilderMIPS::VisitFakeString(HFakeString* instruction) {
-  DCHECK(codegen_->IsBaseline());
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant()));
-}
-
-void InstructionCodeGeneratorMIPS::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) {
-  DCHECK(codegen_->IsBaseline());
-  // Will be generated at use site.
-}
-
 void LocationsBuilderMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 38302ad..2cde0ed 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -197,7 +197,7 @@
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS);
 };
 
-class InstructionCodeGeneratorMIPS : public HGraphVisitor {
+class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorMIPS(HGraph* graph, CodeGeneratorMIPS* codegen);
 
@@ -290,10 +290,7 @@
 
   // Register allocation.
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-  // AllocateFreeRegister() is only used when allocating registers locally
-  // during CompileBaseline().
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 05834ff..0505486 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -391,24 +391,24 @@
 
 class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  explicit DeoptimizationSlowPathMIPS64(HInstruction* instruction)
+  explicit DeoptimizationSlowPathMIPS64(HDeoptimize* instruction)
     : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
-    DCHECK(instruction_->IsDeoptimize());
-    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
-    uint32_t dex_pc = deoptimize->GetDexPc();
-    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
-    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
     CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; }
 
  private:
-  HInstruction* const instruction_;
+  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64);
 };
 
@@ -979,7 +979,7 @@
   __ Bind(&done);
 }
 
-void CodeGeneratorMIPS64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const {
+void CodeGeneratorMIPS64::SetupBlockedRegisters() const {
   // ZERO, K0, K1, GP, SP, RA are always reserved and can't be allocated.
   blocked_core_registers_[ZERO] = true;
   blocked_core_registers_[K0] = true;
@@ -1003,8 +1003,7 @@
 
   // TODO: review; anything else?
 
-  // TODO: make these two for's conditional on is_baseline once
-  // all the issues with register saving/restoring are sorted out.
+  // TODO: remove once all the issues with register saving/restoring are sorted out.
   for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
     blocked_core_registers_[kCoreCalleeSaves[i]] = true;
   }
@@ -1014,20 +1013,6 @@
   }
 }
 
-Location CodeGeneratorMIPS64::AllocateFreeRegister(Primitive::Type type) const {
-  if (type == Primitive::kPrimVoid) {
-    LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  if (Primitive::IsFloatingPointType(type)) {
-    size_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFpuRegisters);
-    return Location::FpuRegisterLocation(reg);
-  } else {
-    size_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfGpuRegisters);
-    return Location::RegisterLocation(reg);
-  }
-}
-
 size_t CodeGeneratorMIPS64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   __ StoreToOffset(kStoreDoubleword, GpuRegister(reg_id), SP, stack_index);
   return kMips64WordSize;
@@ -1113,7 +1098,7 @@
 
 InstructionCodeGeneratorMIPS64::InstructionCodeGeneratorMIPS64(HGraph* graph,
                                                                CodeGeneratorMIPS64* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
@@ -1247,7 +1232,7 @@
 }
 
 void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) {
-  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor());
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
   Primitive::Type type = instr->GetResultType();
@@ -1265,7 +1250,7 @@
 }
 
 void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) {
-  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor());
   LocationSummary* locations = instr->GetLocations();
   Primitive::Type type = instr->GetType();
 
@@ -1290,13 +1275,19 @@
           ? static_cast<uint32_t>(rhs_imm & kMaxIntShiftValue)
           : static_cast<uint32_t>(rhs_imm & kMaxLongShiftValue);
 
-        if (type == Primitive::kPrimInt) {
+        if (shift_value == 0) {
+          if (dst != lhs) {
+            __ Move(dst, lhs);
+          }
+        } else if (type == Primitive::kPrimInt) {
           if (instr->IsShl()) {
             __ Sll(dst, lhs, shift_value);
           } else if (instr->IsShr()) {
             __ Sra(dst, lhs, shift_value);
-          } else {
+          } else if (instr->IsUShr()) {
             __ Srl(dst, lhs, shift_value);
+          } else {
+            __ Rotr(dst, lhs, shift_value);
           }
         } else {
           if (shift_value < 32) {
@@ -1304,8 +1295,10 @@
               __ Dsll(dst, lhs, shift_value);
             } else if (instr->IsShr()) {
               __ Dsra(dst, lhs, shift_value);
-            } else {
+            } else if (instr->IsUShr()) {
               __ Dsrl(dst, lhs, shift_value);
+            } else {
+              __ Drotr(dst, lhs, shift_value);
             }
           } else {
             shift_value -= 32;
@@ -1313,8 +1306,10 @@
               __ Dsll32(dst, lhs, shift_value);
             } else if (instr->IsShr()) {
               __ Dsra32(dst, lhs, shift_value);
-            } else {
+            } else if (instr->IsUShr()) {
               __ Dsrl32(dst, lhs, shift_value);
+            } else {
+              __ Drotr32(dst, lhs, shift_value);
             }
           }
         }
@@ -1324,16 +1319,20 @@
             __ Sllv(dst, lhs, rhs_reg);
           } else if (instr->IsShr()) {
             __ Srav(dst, lhs, rhs_reg);
-          } else {
+          } else if (instr->IsUShr()) {
             __ Srlv(dst, lhs, rhs_reg);
+          } else {
+            __ Rotrv(dst, lhs, rhs_reg);
           }
         } else {
           if (instr->IsShl()) {
             __ Dsllv(dst, lhs, rhs_reg);
           } else if (instr->IsShr()) {
             __ Dsrav(dst, lhs, rhs_reg);
-          } else {
+          } else if (instr->IsUShr()) {
             __ Dsrlv(dst, lhs, rhs_reg);
+          } else {
+            __ Drotrv(dst, lhs, rhs_reg);
           }
         }
       }
@@ -1955,8 +1954,7 @@
   GpuRegister out = locations->Out().AsRegister<GpuRegister>();
   GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
   int64_t imm = Int64FromConstant(second.GetConstant());
-  uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm));
-  DCHECK(IsPowerOfTwo(abs_imm));
+  uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
   int ctz_imm = CTZ(abs_imm);
 
   if (instruction->IsDiv()) {
@@ -2138,7 +2136,7 @@
       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
     } else if (imm == 1 || imm == -1) {
       DivRemOneOrMinusOne(instruction);
-    } else if (IsPowerOfTwo(std::abs(imm))) {
+    } else if (IsPowerOfTwo(AbsOrMin(imm))) {
       DivRemByPowerOfTwo(instruction);
     } else {
       DCHECK(imm <= -2 || imm >= 2);
@@ -2736,9 +2734,8 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena())
-      DeoptimizationSlowPathMIPS64(deoptimize);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCodeMIPS64* slow_path =
+      deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS64>(deoptimize);
   GenerateTestAndBranch(deoptimize,
                         /* condition_input_index */ 0,
                         slow_path->GetEntryLabel(),
@@ -2750,6 +2747,10 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  if (codegen_->HasStackMapAtCurrentPc()) {
+    // Ensure that we do not collide with the stack map of the previous instruction.
+    __ Nop();
+  }
   codegen_->RecordPcInfo(info, info->GetDexPc());
 }
 
@@ -3015,9 +3016,9 @@
 }
 
 void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderMIPS64 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -3166,9 +3167,9 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -3499,17 +3500,32 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  if (instruction->IsStringAlloc()) {
+    locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
+  } else {
+    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  }
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
 }
 
 void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) {
-  codegen_->InvokeRuntime(instruction->GetEntrypoint(),
-                          instruction,
-                          instruction->GetDexPc(),
-                          nullptr);
-  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  if (instruction->IsStringAlloc()) {
+    // String is allocated through StringFactory. Call NewEmptyString entry point.
+    GpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
+    __ LoadFromOffset(kLoadDoubleword, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
+    __ LoadFromOffset(kLoadDoubleword, T9, temp, code_offset.Int32Value());
+    __ Jalr(T9);
+    __ Nop();
+    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  } else {
+    codegen_->InvokeRuntime(instruction->GetEntrypoint(),
+                            instruction,
+                            instruction->GetDexPc(),
+                            nullptr);
+    CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitNot(HNot* instruction) {
@@ -3722,14 +3738,12 @@
   codegen_->GenerateFrameExit();
 }
 
-void LocationsBuilderMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
+void LocationsBuilderMIPS64::VisitRor(HRor* ror) {
+  HandleShift(ror);
 }
 
-void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
+void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror) {
+  HandleShift(ror);
 }
 
 void LocationsBuilderMIPS64::VisitShl(HShl* shl) {
@@ -3918,36 +3932,18 @@
     LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
   }
 
-  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
-      (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) {
-    call_kind = LocationSummary::kCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion);
+
+  if (Primitive::IsFloatingPointType(input_type)) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
   }
 
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
-
-  if (call_kind == LocationSummary::kNoCall) {
-    if (Primitive::IsFloatingPointType(input_type)) {
-      locations->SetInAt(0, Location::RequiresFpuRegister());
-    } else {
-      locations->SetInAt(0, Location::RequiresRegister());
-    }
-
-    if (Primitive::IsFloatingPointType(result_type)) {
-      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
-    } else {
-      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-    }
+  if (Primitive::IsFloatingPointType(result_type)) {
+    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
-    InvokeRuntimeCallingConvention calling_convention;
-
-    if (Primitive::IsFloatingPointType(input_type)) {
-      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
-    } else {
-      locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    }
-
-    locations->SetOut(calling_convention.GetReturnLocation(result_type));
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
 }
 
@@ -3992,55 +3988,107 @@
                    << " to " << result_type;
     }
   } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
-    if (input_type != Primitive::kPrimLong) {
-      FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
-      GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
+    FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+    GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
+    if (input_type == Primitive::kPrimLong) {
+      __ Dmtc1(src, FTMP);
+      if (result_type == Primitive::kPrimFloat) {
+        __ Cvtsl(dst, FTMP);
+      } else {
+        __ Cvtdl(dst, FTMP);
+      }
+    } else {
       __ Mtc1(src, FTMP);
       if (result_type == Primitive::kPrimFloat) {
         __ Cvtsw(dst, FTMP);
       } else {
         __ Cvtdw(dst, FTMP);
       }
-    } else {
-      int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
-                                                                    : QUICK_ENTRY_POINT(pL2d);
-      codegen_->InvokeRuntime(entry_offset,
-                              conversion,
-                              conversion->GetDexPc(),
-                              nullptr);
-      if (result_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickL2f, float, int64_t>();
-      } else {
-        CheckEntrypointTypes<kQuickL2d, double, int64_t>();
-      }
     }
   } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
     CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
-    int32_t entry_offset;
-    if (result_type != Primitive::kPrimLong) {
-      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz)
-                                                           : QUICK_ENTRY_POINT(pD2iz);
+    GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+    FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>();
+    Mips64Label truncate;
+    Mips64Label done;
+
+    // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
+    // value when the input is either a NaN or is outside of the range of the output type
+    // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
+    // the same result.
+    //
+    // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
+    // value of the output type if the input is outside of the range after the truncation or
+    // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
+    // results. This matches the desired float/double-to-int/long conversion exactly.
+    //
+    // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction.
+    //
+    // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
+    // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
+    // even though it must be NAN2008=1 on R6.
+    //
+    // The code takes care of the different behaviors by first comparing the input to the
+    // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
+    // If the input is greater than or equal to the minimum, it procedes to the truncate
+    // instruction, which will handle such an input the same way irrespective of NAN2008.
+    // Otherwise the input is compared to itself to determine whether it is a NaN or not
+    // in order to return either zero or the minimum value.
+    //
+    // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
+    // truncate instruction for MIPS64R6.
+    if (input_type == Primitive::kPrimFloat) {
+      uint32_t min_val = (result_type == Primitive::kPrimLong)
+          ? bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min())
+          : bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
+      __ LoadConst32(TMP, min_val);
+      __ Mtc1(TMP, FTMP);
+      __ CmpLeS(FTMP, FTMP, src);
     } else {
-      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
-                                                           : QUICK_ENTRY_POINT(pD2l);
+      uint64_t min_val = (result_type == Primitive::kPrimLong)
+          ? bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min())
+          : bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
+      __ LoadConst64(TMP, min_val);
+      __ Dmtc1(TMP, FTMP);
+      __ CmpLeD(FTMP, FTMP, src);
     }
-    codegen_->InvokeRuntime(entry_offset,
-                            conversion,
-                            conversion->GetDexPc(),
-                            nullptr);
-    if (result_type != Primitive::kPrimLong) {
+
+    __ Bc1nez(FTMP, &truncate);
+
+    if (input_type == Primitive::kPrimFloat) {
+      __ CmpEqS(FTMP, src, src);
+    } else {
+      __ CmpEqD(FTMP, src, src);
+    }
+    if (result_type == Primitive::kPrimLong) {
+      __ LoadConst64(dst, std::numeric_limits<int64_t>::min());
+    } else {
+      __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
+    }
+    __ Mfc1(TMP, FTMP);
+    __ And(dst, dst, TMP);
+
+    __ Bc(&done);
+
+    __ Bind(&truncate);
+
+    if (result_type == Primitive::kPrimLong) {
       if (input_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickF2iz, int32_t, float>();
+        __ TruncLS(FTMP, src);
       } else {
-        CheckEntrypointTypes<kQuickD2iz, int32_t, double>();
+        __ TruncLD(FTMP, src);
       }
+      __ Dmfc1(dst, FTMP);
     } else {
       if (input_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+        __ TruncWS(FTMP, src);
       } else {
-        CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+        __ TruncWD(FTMP, src);
       }
+      __ Mfc1(dst, FTMP);
     }
+
+    __ Bind(&done);
   } else if (Primitive::IsFloatingPointType(result_type) &&
              Primitive::IsFloatingPointType(input_type)) {
     FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
@@ -4162,18 +4210,6 @@
   HandleCondition(comp);
 }
 
-void LocationsBuilderMIPS64::VisitFakeString(HFakeString* instruction) {
-  DCHECK(codegen_->IsBaseline());
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant()));
-}
-
-void InstructionCodeGeneratorMIPS64::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) {
-  DCHECK(codegen_->IsBaseline());
-  // Will be generated at use site.
-}
-
 // Simple implementation of packed switch - generate cascaded compare/jumps.
 void LocationsBuilderMIPS64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   LocationSummary* locations =
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 60ff96d..140ff95 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -201,7 +201,7 @@
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS64);
 };
 
-class InstructionCodeGeneratorMIPS64 : public HGraphVisitor {
+class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorMIPS64(HGraph* graph, CodeGeneratorMIPS64* codegen);
 
@@ -289,10 +289,7 @@
 
   // Register allocation.
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-  // AllocateFreeRegister() is only used when allocating registers locally
-  // during CompileBaseline().
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index a808c27..f7ccdd8 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -365,11 +365,10 @@
 
 class DeoptimizationSlowPathX86 : public SlowPathCode {
  public:
-  explicit DeoptimizationSlowPathX86(HInstruction* instruction)
+  explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
     : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    DCHECK(instruction_->IsDeoptimize());
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
@@ -383,7 +382,7 @@
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; }
 
  private:
-  HInstruction* const instruction_;
+  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
 };
 
@@ -818,65 +817,13 @@
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
 
-Location CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type) const {
-  switch (type) {
-    case Primitive::kPrimLong: {
-      size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
-      X86ManagedRegister pair =
-          X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
-      blocked_core_registers_[pair.AsRegisterPairLow()] = true;
-      blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
-      UpdateBlockedPairRegisters();
-      return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
-    }
-
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      Register reg = static_cast<Register>(
-          FindFreeEntry(blocked_core_registers_, kNumberOfCpuRegisters));
-      // Block all register pairs that contain `reg`.
-      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
-        X86ManagedRegister current =
-            X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
-        if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
-          blocked_register_pairs_[i] = true;
-        }
-      }
-      return Location::RegisterLocation(reg);
-    }
-
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      return Location::FpuRegisterLocation(
-          FindFreeEntry(blocked_fpu_registers_, kNumberOfXmmRegisters));
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  return Location::NoLocation();
-}
-
-void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorX86::SetupBlockedRegisters() const {
   // Don't allocate the dalvik style register pair passing.
   blocked_register_pairs_[ECX_EDX] = true;
 
   // Stack register is always reserved.
   blocked_core_registers_[ESP] = true;
 
-  if (is_baseline) {
-    blocked_core_registers_[EBP] = true;
-    blocked_core_registers_[ESI] = true;
-    blocked_core_registers_[EDI] = true;
-  }
-
   UpdateBlockedPairRegisters();
 }
 
@@ -892,7 +839,7 @@
 }
 
 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
@@ -1611,9 +1558,7 @@
 }
 
 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCode* slow_path = new (GetGraph()->GetArena())
-      DeoptimizationSlowPathX86(deoptimize);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
   GenerateTestAndBranch(deoptimize,
                         /* condition_input_index */ 0,
                         slow_path->GetEntryLabel(),
@@ -1625,6 +1570,10 @@
 }
 
 void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  if (codegen_->HasStackMapAtCurrentPc()) {
+    // Ensure that we do not collide with the stack map of the previous instruction.
+    __ nop();
+  }
   codegen_->RecordPcInfo(info, info->GetDexPc());
 }
 
@@ -1980,9 +1929,9 @@
 }
 
 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -1998,17 +1947,6 @@
   if (invoke->HasPcRelativeDexCache()) {
     invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
   }
-
-  if (codegen_->IsBaseline()) {
-    // Baseline does not have enough registers if the current method also
-    // needs a register. We therefore do not require a register for it, and let
-    // the code generation of the invoke handle it.
-    LocationSummary* locations = invoke->GetLocations();
-    Location location = locations->InAt(invoke->GetSpecialInputIndex());
-    if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
-      locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation());
-    }
-  }
 }
 
 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
@@ -2021,9 +1959,9 @@
 }
 
 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -3223,11 +3161,12 @@
   Register out_register = locations->Out().AsRegister<Register>();
   Register input_register = locations->InAt(0).AsRegister<Register>();
   int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+  uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
 
-  DCHECK(IsPowerOfTwo(std::abs(imm)));
   Register num = locations->GetTemp(0).AsRegister<Register>();
 
-  __ leal(num, Address(input_register, std::abs(imm) - 1));
+  __ leal(num, Address(input_register, abs_imm - 1));
   __ testl(input_register, input_register);
   __ cmovl(kGreaterEqual, num, input_register);
   int shift = CTZ(imm);
@@ -3340,7 +3279,7 @@
           // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
         } else if (imm == 1 || imm == -1) {
           DivRemOneOrMinusOne(instruction);
-        } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
+        } else if (is_div && IsPowerOfTwo(AbsOrMin(imm))) {
           DivByPowerOfTwo(instruction->AsDiv());
         } else {
           DCHECK(imm <= -2 || imm >= 2);
@@ -3941,20 +3880,33 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   locations->SetOut(Location::RegisterLocation(EAX));
-  InvokeRuntimeCallingConvention calling_convention;
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  if (instruction->IsStringAlloc()) {
+    locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
+  } else {
+    InvokeRuntimeCallingConvention calling_convention;
+    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  }
 }
 
 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
-  codegen_->InvokeRuntime(instruction->GetEntrypoint(),
-                          instruction,
-                          instruction->GetDexPc(),
-                          nullptr);
-  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
-  DCHECK(!codegen_->IsLeafMethod());
+  if (instruction->IsStringAlloc()) {
+    // String is allocated through StringFactory. Call NewEmptyString entry point.
+    Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize);
+    __ fs()->movl(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString)));
+    __ call(Address(temp, code_offset.Int32Value()));
+    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  } else {
+    codegen_->InvokeRuntime(instruction->GetEntrypoint(),
+                            instruction,
+                            instruction->GetDexPc(),
+                            nullptr);
+    CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+    DCHECK(!codegen_->IsLeafMethod());
+  }
 }
 
 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
@@ -4185,19 +4137,28 @@
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method ATTRIBUTE_UNUSED) {
-  switch (desired_dispatch_info.code_ptr_location) {
+  HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
+
+  // We disable pc-relative load when there is an irreducible loop, as the optimization
+  // is incompatible with it.
+  if (GetGraph()->HasIrreducibleLoops() &&
+      (dispatch_info.method_load_kind ==
+          HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative)) {
+    dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod;
+  }
+  switch (dispatch_info.code_ptr_location) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
       // For direct code, we actually prefer to call via the code pointer from ArtMethod*.
       // (Though the direct CALL ptr16:32 is available for consideration).
       return HInvokeStaticOrDirect::DispatchInfo {
-        desired_dispatch_info.method_load_kind,
+        dispatch_info.method_load_kind,
         HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
-        desired_dispatch_info.method_load_data,
+        dispatch_info.method_load_data,
         0u
       };
     default:
-      return desired_dispatch_info;
+      return dispatch_info;
   }
 }
 
@@ -4262,7 +4223,7 @@
       if (current_method.IsRegister()) {
         method_reg = current_method.AsRegister<Register>();
       } else {
-        DCHECK(IsBaseline() || invoke->GetLocations()->Intrinsified());
+        DCHECK(invoke->GetLocations()->Intrinsified());
         DCHECK(!current_method.IsValid());
         method_reg = reg;
         __ movl(reg, Address(ESP, kCurrentMethodStackOffset));
@@ -5052,11 +5013,6 @@
 }
 
 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
-  // This location builder might end up asking to up to four registers, which is
-  // not currently possible for baseline. The situation in which we need four
-  // registers cannot be met by baseline though, because it has not run any
-  // optimization.
-
   Primitive::Type value_type = instruction->GetComponentType();
 
   bool needs_write_barrier =
@@ -6053,7 +6009,7 @@
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck: {
       // Note that we indeed only call on slow path, but we always go
-      // into the slow path for the unresolved & interface check
+      // into the slow path for the unresolved and interface check
       // cases.
       //
       // We cannot directly call the InstanceofNonTrivial runtime
@@ -6284,8 +6240,8 @@
 
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved &
-      // interface check cases.
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
       //
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
@@ -6564,6 +6520,8 @@
     // Plain GC root load with no read barrier.
     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
     __ movl(root_reg, Address(obj, offset));
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
   }
 }
 
@@ -6626,7 +6584,9 @@
   // Note: the original implementation in ReadBarrier::Barrier is
   // slightly more complex as:
   // - it implements the load-load fence using a data dependency on
-  //   the high-bits of rb_state, which are expected to be all zeroes;
+  //   the high-bits of rb_state, which are expected to be all zeroes
+  //   (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
+  //   which is a no-op thanks to the x86 memory model);
   // - it performs additional checks that we do not do here for
   //   performance reasons.
 
@@ -6744,18 +6704,6 @@
   LOG(FATAL) << "Unreachable";
 }
 
-void LocationsBuilderX86::VisitFakeString(HFakeString* instruction) {
-  DCHECK(codegen_->IsBaseline());
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant()));
-}
-
-void InstructionCodeGeneratorX86::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) {
-  DCHECK(codegen_->IsBaseline());
-  // Will be generated at use site.
-}
-
 // Simple implementation of packed switch - generate cascaded compare/jumps.
 void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   LocationSummary* locations =
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index df73476..43e9543 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -178,7 +178,7 @@
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86);
 };
 
-class InstructionCodeGeneratorX86 : public HGraphVisitor {
+class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen);
 
@@ -359,9 +359,7 @@
     return GetLabelOf(block)->Position();
   }
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
@@ -453,7 +451,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              Register obj,
                                              uint32_t offset,
                                              Location temp,
@@ -461,7 +459,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference array load when Baker's read barriers are used.
   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              Register obj,
                                              uint32_t data_offset,
                                              Location index,
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 76a4ce2..2ce2d91 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -387,18 +387,16 @@
 
 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
  public:
-  explicit DeoptimizationSlowPathX86_64(HInstruction* instruction)
+  explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
       : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
-    DCHECK(instruction_->IsDeoptimize());
-    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
-                                  deoptimize,
-                                  deoptimize->GetDexPc(),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
                                   this);
     CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
@@ -406,7 +404,7 @@
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
 
  private:
-  HInstruction* const instruction_;
+  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
 };
 
@@ -1000,51 +998,16 @@
 
 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
                                                                CodeGeneratorX86_64* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
-Location CodeGeneratorX86_64::AllocateFreeRegister(Primitive::Type type) const {
-  switch (type) {
-    case Primitive::kPrimLong:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      size_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfCpuRegisters);
-      return Location::RegisterLocation(reg);
-    }
-
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      size_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFloatRegisters);
-      return Location::FpuRegisterLocation(reg);
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  return Location::NoLocation();
-}
-
-void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorX86_64::SetupBlockedRegisters() const {
   // Stack register is always reserved.
   blocked_core_registers_[RSP] = true;
 
   // Block the register used as TMP.
   blocked_core_registers_[TMP] = true;
-
-  if (is_baseline) {
-    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-      blocked_core_registers_[kCoreCalleeSaves[i]] = true;
-    }
-    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
-      blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
-    }
-  }
 }
 
 static dwarf::Reg DWARFReg(Register reg) {
@@ -1594,9 +1557,7 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCode* slow_path = new (GetGraph()->GetArena())
-      DeoptimizationSlowPathX86_64(deoptimize);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
   GenerateTestAndBranch(deoptimize,
                         /* condition_input_index */ 0,
                         slow_path->GetEntryLabel(),
@@ -1608,6 +1569,10 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  if (codegen_->HasStackMapAtCurrentPc()) {
+    // Ensure that we do not collide with the stack map of the previous instruction.
+    __ nop();
+  }
   codegen_->RecordPcInfo(info, info->GetDexPc());
 }
 
@@ -2161,9 +2126,9 @@
 }
 
 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -2183,9 +2148,9 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -3350,13 +3315,13 @@
   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
 
   int64_t imm = Int64FromConstant(second.GetConstant());
-
-  DCHECK(IsPowerOfTwo(std::abs(imm)));
+  DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+  uint64_t abs_imm = AbsOrMin(imm);
 
   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
 
   if (instruction->GetResultType() == Primitive::kPrimInt) {
-    __ leal(tmp, Address(numerator, std::abs(imm) - 1));
+    __ leal(tmp, Address(numerator, abs_imm - 1));
     __ testl(numerator, numerator);
     __ cmov(kGreaterEqual, tmp, numerator);
     int shift = CTZ(imm);
@@ -3371,7 +3336,7 @@
     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
 
-    codegen_->Load64BitValue(rdx, std::abs(imm) - 1);
+    codegen_->Load64BitValue(rdx, abs_imm - 1);
     __ addq(rdx, numerator);
     __ testq(numerator, numerator);
     __ cmov(kGreaterEqual, rdx, numerator);
@@ -3529,7 +3494,7 @@
       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
     } else if (imm == 1 || imm == -1) {
       DivRemOneOrMinusOne(instruction);
-    } else if (instruction->IsDiv() && IsPowerOfTwo(std::abs(imm))) {
+    } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
       DivByPowerOfTwo(instruction->AsDiv());
     } else {
       DCHECK(imm <= -2 || imm >= 2);
@@ -3912,21 +3877,33 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  if (instruction->IsStringAlloc()) {
+    locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
+  } else {
+    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  }
   locations->SetOut(Location::RegisterLocation(RAX));
 }
 
 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
-  codegen_->InvokeRuntime(instruction->GetEntrypoint(),
-                          instruction,
-                          instruction->GetDexPc(),
-                          nullptr);
-  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
-
-  DCHECK(!codegen_->IsLeafMethod());
+  if (instruction->IsStringAlloc()) {
+    // String is allocated through StringFactory. Call NewEmptyString entry point.
+    CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize);
+    __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true));
+    __ call(Address(temp, code_offset.SizeValue()));
+    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  } else {
+    codegen_->InvokeRuntime(instruction->GetEntrypoint(),
+                            instruction,
+                            instruction->GetDexPc(),
+                            nullptr);
+    CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+    DCHECK(!codegen_->IsLeafMethod());
+  }
 }
 
 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
@@ -4686,13 +4663,13 @@
 
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-  bool may_need_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool object_array_set_with_read_barrier =
       kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      (may_need_runtime_call || object_array_set_with_read_barrier) ?
+      (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
           LocationSummary::kCallOnSlowPath :
           LocationSummary::kNoCall);
 
@@ -4721,7 +4698,7 @@
   Location index = locations->InAt(1);
   Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool may_need_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -4773,7 +4750,7 @@
         __ movl(address, Immediate(0));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         DCHECK(!needs_write_barrier);
-        DCHECK(!may_need_runtime_call);
+        DCHECK(!may_need_runtime_call_for_type_check);
         break;
       }
 
@@ -4782,7 +4759,7 @@
       NearLabel done, not_null, do_put;
       SlowPathCode* slow_path = nullptr;
       CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
-      if (may_need_runtime_call) {
+      if (may_need_runtime_call_for_type_check) {
         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
         codegen_->AddSlowPath(slow_path);
         if (instruction->GetValueCanBeNull()) {
@@ -4860,7 +4837,7 @@
       } else {
         __ movl(address, register_value);
       }
-      if (!may_need_runtime_call) {
+      if (!may_need_runtime_call_for_type_check) {
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
 
@@ -5649,7 +5626,7 @@
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck: {
       // Note that we indeed only call on slow path, but we always go
-      // into the slow path for the unresolved & interface check
+      // into the slow path for the unresolved and interface check
       // cases.
       //
       // We cannot directly call the InstanceofNonTrivial runtime
@@ -5880,8 +5857,8 @@
 
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved &
-      // interface check cases.
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
       //
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
@@ -6143,6 +6120,8 @@
     // Plain GC root load with no read barrier.
     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
     __ movl(root_reg, Address(obj, offset));
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
   }
 }
 
@@ -6205,7 +6184,9 @@
   // Note: the original implementation in ReadBarrier::Barrier is
   // slightly more complex as:
   // - it implements the load-load fence using a data dependency on
-  //   the high-bits of rb_state, which are expected to be all zeroes;
+  //   the high-bits of rb_state, which are expected to be all zeroes
+  //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
+  //   here, which is a no-op thanks to the x86-64 memory model);
   // - it performs additional checks that we do not do here for
   //   performance reasons.
 
@@ -6323,18 +6304,6 @@
   LOG(FATAL) << "Unreachable";
 }
 
-void LocationsBuilderX86_64::VisitFakeString(HFakeString* instruction) {
-  DCHECK(codegen_->IsBaseline());
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant()));
-}
-
-void InstructionCodeGeneratorX86_64::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) {
-  DCHECK(codegen_->IsBaseline());
-  // Will be generated at use site.
-}
-
 // Simple implementation of packed switch - generate cascaded compare/jumps.
 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   LocationSummary* locations =
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index c5e8a04..82aabb0 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -183,7 +183,7 @@
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64);
 };
 
-class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
+class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen);
 
@@ -347,8 +347,7 @@
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
   void Finalize(CodeAllocator* allocator) OVERRIDE;
@@ -401,7 +400,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              CpuRegister obj,
                                              uint32_t offset,
                                              Location temp,
@@ -409,7 +408,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference array load when Baker's read barriers are used.
   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              CpuRegister obj,
                                              uint32_t data_offset,
                                              Location index,
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index d970704..19d63de 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -40,6 +40,7 @@
 #include "dex_file.h"
 #include "dex_instruction.h"
 #include "driver/compiler_options.h"
+#include "graph_checker.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
 #include "prepare_for_register_allocation.h"
@@ -70,8 +71,8 @@
     AddAllocatedRegister(Location::RegisterLocation(arm::R7));
   }
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE {
-    arm::CodeGeneratorARM::SetupBlockedRegisters(is_baseline);
+  void SetupBlockedRegisters() const OVERRIDE {
+    arm::CodeGeneratorARM::SetupBlockedRegisters();
     blocked_core_registers_[arm::R4] = true;
     blocked_core_registers_[arm::R6] = false;
     blocked_core_registers_[arm::R7] = false;
@@ -90,8 +91,8 @@
     AddAllocatedRegister(Location::RegisterLocation(x86::EDI));
   }
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE {
-    x86::CodeGeneratorX86::SetupBlockedRegisters(is_baseline);
+  void SetupBlockedRegisters() const OVERRIDE {
+    x86::CodeGeneratorX86::SetupBlockedRegisters();
     // ebx is a callee-save register in C, but caller-save for ART.
     blocked_core_registers_[x86::EBX] = true;
     blocked_register_pairs_[x86::EAX_EBX] = true;
@@ -200,259 +201,228 @@
 }
 
 template <typename Expected>
-static void RunCodeBaseline(InstructionSet target_isa,
-                            HGraph* graph,
-                            bool has_result,
-                            Expected expected) {
-  InternalCodeAllocator allocator;
+static void RunCode(CodeGenerator* codegen,
+                    HGraph* graph,
+                    std::function<void(HGraph*)> hook_before_codegen,
+                    bool has_result,
+                    Expected expected) {
+  ASSERT_TRUE(graph->IsInSsaForm());
 
-  CompilerOptions compiler_options;
-  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
-      X86InstructionSetFeatures::FromCppDefines());
-  TestCodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
-  // We avoid doing a stack overflow check that requires the runtime being setup,
-  // by making sure the compiler knows the methods we are running are leaf methods.
-  codegenX86.CompileBaseline(&allocator, true);
-  if (target_isa == kX86) {
-    Run(allocator, codegenX86, has_result, expected);
-  }
+  SSAChecker graph_checker(graph);
+  graph_checker.Run();
+  ASSERT_TRUE(graph_checker.IsValid());
 
-  std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
-      ArmInstructionSetFeatures::FromCppDefines());
-  TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
-  codegenARM.CompileBaseline(&allocator, true);
-  if (target_isa == kArm || target_isa == kThumb2) {
-    Run(allocator, codegenARM, has_result, expected);
-  }
-
-  std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
-      X86_64InstructionSetFeatures::FromCppDefines());
-  x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
-  codegenX86_64.CompileBaseline(&allocator, true);
-  if (target_isa == kX86_64) {
-    Run(allocator, codegenX86_64, has_result, expected);
-  }
-
-  std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
-      Arm64InstructionSetFeatures::FromCppDefines());
-  arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
-  codegenARM64.CompileBaseline(&allocator, true);
-  if (target_isa == kArm64) {
-    Run(allocator, codegenARM64, has_result, expected);
-  }
-
-  std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
-      MipsInstructionSetFeatures::FromCppDefines());
-  mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
-  codegenMIPS.CompileBaseline(&allocator, true);
-  if (kRuntimeISA == kMips) {
-    Run(allocator, codegenMIPS, has_result, expected);
-  }
-
-  std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
-      Mips64InstructionSetFeatures::FromCppDefines());
-  mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
-  codegenMIPS64.CompileBaseline(&allocator, true);
-  if (target_isa == kMips64) {
-    Run(allocator, codegenMIPS64, has_result, expected);
-  }
-}
-
-template <typename Expected>
-static void RunCodeOptimized(CodeGenerator* codegen,
-                             HGraph* graph,
-                             std::function<void(HGraph*)> hook_before_codegen,
-                             bool has_result,
-                             Expected expected) {
-  // Tests may have already computed it.
-  if (graph->GetReversePostOrder().empty()) {
-    graph->BuildDominatorTree();
-  }
   SsaLivenessAnalysis liveness(graph, codegen);
-  liveness.Analyze();
 
-  RegisterAllocator register_allocator(graph->GetArena(), codegen, liveness);
-  register_allocator.AllocateRegisters();
+  PrepareForRegisterAllocation(graph).Run();
+  liveness.Analyze();
+  RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
   hook_before_codegen(graph);
 
   InternalCodeAllocator allocator;
-  codegen->CompileOptimized(&allocator);
+  codegen->Compile(&allocator);
   Run(allocator, *codegen, has_result, expected);
 }
 
 template <typename Expected>
-static void RunCodeOptimized(InstructionSet target_isa,
-                             HGraph* graph,
-                             std::function<void(HGraph*)> hook_before_codegen,
-                             bool has_result,
-                             Expected expected) {
+static void RunCode(InstructionSet target_isa,
+                    HGraph* graph,
+                    std::function<void(HGraph*)> hook_before_codegen,
+                    bool has_result,
+                    Expected expected) {
   CompilerOptions compiler_options;
   if (target_isa == kArm || target_isa == kThumb2) {
     std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
         ArmInstructionSetFeatures::FromCppDefines());
     TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
-    RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenARM, graph, hook_before_codegen, has_result, expected);
   } else if (target_isa == kArm64) {
     std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
         Arm64InstructionSetFeatures::FromCppDefines());
     arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
-    RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenARM64, graph, hook_before_codegen, has_result, expected);
   } else if (target_isa == kX86) {
     std::unique_ptr<const X86InstructionSetFeatures> features_x86(
         X86InstructionSetFeatures::FromCppDefines());
     x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
-    RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenX86, graph, hook_before_codegen, has_result, expected);
   } else if (target_isa == kX86_64) {
     std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
         X86_64InstructionSetFeatures::FromCppDefines());
     x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
-    RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
   } else if (target_isa == kMips) {
     std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
         MipsInstructionSetFeatures::FromCppDefines());
     mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
-    RunCodeOptimized(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
   } else if (target_isa == kMips64) {
     std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
         Mips64InstructionSetFeatures::FromCppDefines());
     mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
-    RunCodeOptimized(&codegenMIPS64, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenMIPS64, graph, hook_before_codegen, has_result, expected);
   }
 }
 
-static void TestCode(InstructionSet target_isa,
-                     const uint16_t* data,
+static ::std::vector<InstructionSet> GetTargetISAs() {
+  ::std::vector<InstructionSet> v;
+  // Add all ISAs that are executable on hardware or on simulator.
+  const ::std::vector<InstructionSet> executable_isa_candidates = {
+    kArm,
+    kArm64,
+    kThumb2,
+    kX86,
+    kX86_64,
+    kMips,
+    kMips64
+  };
+
+  for (auto target_isa : executable_isa_candidates) {
+    if (CanExecute(target_isa)) {
+      v.push_back(target_isa);
+    }
+  }
+
+  return v;
+}
+
+static void TestCode(const uint16_t* data,
                      bool has_result = false,
                      int32_t expected = 0) {
-  ArenaPool pool;
-  ArenaAllocator arena(&pool);
-  HGraph* graph = CreateGraph(&arena);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
-  // Remove suspend checks, they cannot be executed in this context.
-  RemoveSuspendChecks(graph);
-  RunCodeBaseline(target_isa, graph, has_result, expected);
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    HGraph* graph = CreateGraph(&arena);
+    HGraphBuilder builder(graph);
+    const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
+    bool graph_built = builder.BuildGraph(*item);
+    ASSERT_TRUE(graph_built);
+    // Remove suspend checks, they cannot be executed in this context.
+    RemoveSuspendChecks(graph);
+    TransformToSsa(graph);
+    RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected);
+  }
 }
 
-static void TestCodeLong(InstructionSet target_isa,
-                         const uint16_t* data,
+static void TestCodeLong(const uint16_t* data,
                          bool has_result,
                          int64_t expected) {
-  ArenaPool pool;
-  ArenaAllocator arena(&pool);
-  HGraph* graph = CreateGraph(&arena);
-  HGraphBuilder builder(graph, Primitive::kPrimLong);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
-  // Remove suspend checks, they cannot be executed in this context.
-  RemoveSuspendChecks(graph);
-  RunCodeBaseline(target_isa, graph, has_result, expected);
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    HGraph* graph = CreateGraph(&arena);
+    HGraphBuilder builder(graph, Primitive::kPrimLong);
+    const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
+    bool graph_built = builder.BuildGraph(*item);
+    ASSERT_TRUE(graph_built);
+    // Remove suspend checks, they cannot be executed in this context.
+    RemoveSuspendChecks(graph);
+    TransformToSsa(graph);
+    RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected);
+  }
 }
 
-class CodegenTest: public ::testing::TestWithParam<InstructionSet> {};
+class CodegenTest : public CommonCompilerTest {};
 
-TEST_P(CodegenTest, ReturnVoid) {
+TEST_F(CodegenTest, ReturnVoid) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(Instruction::RETURN_VOID);
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, CFG1) {
+TEST_F(CodegenTest, CFG1) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x100,
     Instruction::RETURN_VOID);
 
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, CFG2) {
+TEST_F(CodegenTest, CFG2) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x100,
     Instruction::GOTO | 0x100,
     Instruction::RETURN_VOID);
 
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, CFG3) {
+TEST_F(CodegenTest, CFG3) {
   const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x200,
     Instruction::RETURN_VOID,
     Instruction::GOTO | 0xFF00);
 
-  TestCode(GetParam(), data1);
+  TestCode(data1);
 
   const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO_16, 3,
     Instruction::RETURN_VOID,
     Instruction::GOTO_16, 0xFFFF);
 
-  TestCode(GetParam(), data2);
+  TestCode(data2);
 
   const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO_32, 4, 0,
     Instruction::RETURN_VOID,
     Instruction::GOTO_32, 0xFFFF, 0xFFFF);
 
-  TestCode(GetParam(), data3);
+  TestCode(data3);
 }
 
-TEST_P(CodegenTest, CFG4) {
+TEST_F(CodegenTest, CFG4) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::RETURN_VOID,
     Instruction::GOTO | 0x100,
     Instruction::GOTO | 0xFE00);
 
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, CFG5) {
+TEST_F(CodegenTest, CFG5) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,
     Instruction::GOTO | 0x100,
     Instruction::RETURN_VOID);
 
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, IntConstant) {
+TEST_F(CodegenTest, IntConstant) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN_VOID);
 
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, Return1) {
+TEST_F(CodegenTest, Return1) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN | 0);
 
-  TestCode(GetParam(), data, true, 0);
+  TestCode(data, true, 0);
 }
 
-TEST_P(CodegenTest, Return2) {
+TEST_F(CodegenTest, Return2) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 0 | 1 << 8,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(GetParam(), data, true, 0);
+  TestCode(data, true, 0);
 }
 
-TEST_P(CodegenTest, Return3) {
+TEST_F(CodegenTest, Return3) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(GetParam(), data, true, 1);
+  TestCode(data, true, 1);
 }
 
-TEST_P(CodegenTest, ReturnIf1) {
+TEST_F(CodegenTest, ReturnIf1) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
@@ -460,10 +430,10 @@
     Instruction::RETURN | 0 << 8,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(GetParam(), data, true, 1);
+  TestCode(data, true, 1);
 }
 
-TEST_P(CodegenTest, ReturnIf2) {
+TEST_F(CodegenTest, ReturnIf2) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
@@ -471,12 +441,12 @@
     Instruction::RETURN | 0 << 8,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(GetParam(), data, true, 0);
+  TestCode(data, true, 0);
 }
 
 // Exercise bit-wise (one's complement) not-int instruction.
 #define NOT_INT_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \
-TEST_P(CodegenTest, TEST_NAME) {                        \
+TEST_F(CodegenTest, TEST_NAME) {                        \
   const int32_t input = INPUT;                          \
   const uint16_t input_lo = Low16Bits(input);           \
   const uint16_t input_hi = High16Bits(input);          \
@@ -485,7 +455,7 @@
       Instruction::NOT_INT | 1 << 8 | 0 << 12 ,         \
       Instruction::RETURN | 1 << 8);                    \
                                                         \
-  TestCode(GetParam(), data, true, EXPECTED_OUTPUT);    \
+  TestCode(data, true, EXPECTED_OUTPUT);                \
 }
 
 NOT_INT_TEST(ReturnNotIntMinus2, -2, 1)
@@ -501,7 +471,7 @@
 
 // Exercise bit-wise (one's complement) not-long instruction.
 #define NOT_LONG_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT)                 \
-TEST_P(CodegenTest, TEST_NAME) {                                         \
+TEST_F(CodegenTest, TEST_NAME) {                                         \
   const int64_t input = INPUT;                                           \
   const uint16_t word0 = Low16Bits(Low32Bits(input));   /* LSW. */       \
   const uint16_t word1 = High16Bits(Low32Bits(input));                   \
@@ -512,7 +482,7 @@
       Instruction::NOT_LONG | 2 << 8 | 0 << 12,                          \
       Instruction::RETURN_WIDE | 2 << 8);                                \
                                                                          \
-  TestCodeLong(GetParam(), data, true, EXPECTED_OUTPUT);                 \
+  TestCodeLong(data, true, EXPECTED_OUTPUT);                             \
 }
 
 NOT_LONG_TEST(ReturnNotLongMinus2, INT64_C(-2), INT64_C(1))
@@ -551,7 +521,7 @@
 
 #undef NOT_LONG_TEST
 
-TEST_P(CodegenTest, IntToLongOfLongToInt) {
+TEST_F(CodegenTest, IntToLongOfLongToInt) {
   const int64_t input = INT64_C(4294967296);             // 2^32
   const uint16_t word0 = Low16Bits(Low32Bits(input));    // LSW.
   const uint16_t word1 = High16Bits(Low32Bits(input));
@@ -565,192 +535,146 @@
       Instruction::INT_TO_LONG | 2 << 8 | 4 << 12,
       Instruction::RETURN_WIDE | 2 << 8);
 
-  TestCodeLong(GetParam(), data, true, 1);
+  TestCodeLong(data, true, 1);
 }
 
-TEST_P(CodegenTest, ReturnAdd1) {
+TEST_F(CodegenTest, ReturnAdd1) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::ADD_INT, 1 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 7);
+  TestCode(data, true, 7);
 }
 
-TEST_P(CodegenTest, ReturnAdd2) {
+TEST_F(CodegenTest, ReturnAdd2) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::ADD_INT_2ADDR | 1 << 12,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 7);
+  TestCode(data, true, 7);
 }
 
-TEST_P(CodegenTest, ReturnAdd3) {
+TEST_F(CodegenTest, ReturnAdd3) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::ADD_INT_LIT8, 3 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 7);
+  TestCode(data, true, 7);
 }
 
-TEST_P(CodegenTest, ReturnAdd4) {
+TEST_F(CodegenTest, ReturnAdd4) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::ADD_INT_LIT16, 3,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 7);
+  TestCode(data, true, 7);
 }
 
-TEST_P(CodegenTest, NonMaterializedCondition) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-
-  HGraph* graph = CreateGraph(&allocator);
-  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(entry);
-  graph->SetEntryBlock(entry);
-  entry->AddInstruction(new (&allocator) HGoto());
-
-  HBasicBlock* first_block = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(first_block);
-  entry->AddSuccessor(first_block);
-  HIntConstant* constant0 = graph->GetIntConstant(0);
-  HIntConstant* constant1 = graph->GetIntConstant(1);
-  HEqual* equal = new (&allocator) HEqual(constant0, constant0);
-  first_block->AddInstruction(equal);
-  first_block->AddInstruction(new (&allocator) HIf(equal));
-
-  HBasicBlock* then = new (&allocator) HBasicBlock(graph);
-  HBasicBlock* else_ = new (&allocator) HBasicBlock(graph);
-  HBasicBlock* exit = new (&allocator) HBasicBlock(graph);
-
-  graph->AddBlock(then);
-  graph->AddBlock(else_);
-  graph->AddBlock(exit);
-  first_block->AddSuccessor(then);
-  first_block->AddSuccessor(else_);
-  then->AddSuccessor(exit);
-  else_->AddSuccessor(exit);
-
-  exit->AddInstruction(new (&allocator) HExit());
-  then->AddInstruction(new (&allocator) HReturn(constant0));
-  else_->AddInstruction(new (&allocator) HReturn(constant1));
-
-  ASSERT_TRUE(equal->NeedsMaterialization());
-  graph->BuildDominatorTree();
-  PrepareForRegisterAllocation(graph).Run();
-  ASSERT_FALSE(equal->NeedsMaterialization());
-
-  auto hook_before_codegen = [](HGraph* graph_in) {
-    HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
-    HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
-    block->InsertInstructionBefore(move, block->GetLastInstruction());
-  };
-
-  RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, 0);
-}
-
-TEST_P(CodegenTest, ReturnMulInt) {
+TEST_F(CodegenTest, ReturnMulInt) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::MUL_INT, 1 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 12);
+  TestCode(data, true, 12);
 }
 
-TEST_P(CodegenTest, ReturnMulInt2addr) {
+TEST_F(CodegenTest, ReturnMulInt2addr) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::MUL_INT_2ADDR | 1 << 12,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 12);
+  TestCode(data, true, 12);
 }
 
-TEST_P(CodegenTest, ReturnMulLong) {
+TEST_F(CodegenTest, ReturnMulLong) {
   const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM(
-    Instruction::CONST_4 | 3 << 12 | 0,
-    Instruction::CONST_4 | 0 << 12 | 1 << 8,
-    Instruction::CONST_4 | 4 << 12 | 2 << 8,
-    Instruction::CONST_4 | 0 << 12 | 3 << 8,
+    Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0,
+    Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0,
     Instruction::MUL_LONG, 2 << 8 | 0,
     Instruction::RETURN_WIDE);
 
-  TestCodeLong(GetParam(), data, true, 12);
+  TestCodeLong(data, true, 12);
 }
 
-TEST_P(CodegenTest, ReturnMulLong2addr) {
+TEST_F(CodegenTest, ReturnMulLong2addr) {
   const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM(
-    Instruction::CONST_4 | 3 << 12 | 0 << 8,
-    Instruction::CONST_4 | 0 << 12 | 1 << 8,
-    Instruction::CONST_4 | 4 << 12 | 2 << 8,
-    Instruction::CONST_4 | 0 << 12 | 3 << 8,
+    Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0,
+    Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0,
     Instruction::MUL_LONG_2ADDR | 2 << 12,
     Instruction::RETURN_WIDE);
 
-  TestCodeLong(GetParam(), data, true, 12);
+  TestCodeLong(data, true, 12);
 }
 
-TEST_P(CodegenTest, ReturnMulIntLit8) {
+TEST_F(CodegenTest, ReturnMulIntLit8) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::MUL_INT_LIT8, 3 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 12);
+  TestCode(data, true, 12);
 }
 
-TEST_P(CodegenTest, ReturnMulIntLit16) {
+TEST_F(CodegenTest, ReturnMulIntLit16) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::MUL_INT_LIT16, 3,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 12);
+  TestCode(data, true, 12);
 }
 
-TEST_P(CodegenTest, MaterializedCondition1) {
-  // Check that condition are materialized correctly. A materialized condition
-  // should yield `1` if it evaluated to true, and `0` otherwise.
-  // We force the materialization of comparisons for different combinations of
-  // inputs and check the results.
-
-  int lhs[] = {1, 2, -1, 2, 0xabc};
-  int rhs[] = {2, 1, 2, -1, 0xabc};
-
-  for (size_t i = 0; i < arraysize(lhs); i++) {
+TEST_F(CodegenTest, NonMaterializedCondition) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
     ArenaPool pool;
     ArenaAllocator allocator(&pool);
+
     HGraph* graph = CreateGraph(&allocator);
+    HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+    graph->AddBlock(entry);
+    graph->SetEntryBlock(entry);
+    entry->AddInstruction(new (&allocator) HGoto());
 
-    HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(entry_block);
-    graph->SetEntryBlock(entry_block);
-    entry_block->AddInstruction(new (&allocator) HGoto());
-    HBasicBlock* code_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(code_block);
+    HBasicBlock* first_block = new (&allocator) HBasicBlock(graph);
+    graph->AddBlock(first_block);
+    entry->AddSuccessor(first_block);
+    HIntConstant* constant0 = graph->GetIntConstant(0);
+    HIntConstant* constant1 = graph->GetIntConstant(1);
+    HEqual* equal = new (&allocator) HEqual(constant0, constant0);
+    first_block->AddInstruction(equal);
+    first_block->AddInstruction(new (&allocator) HIf(equal));
+
+    HBasicBlock* then_block = new (&allocator) HBasicBlock(graph);
+    HBasicBlock* else_block = new (&allocator) HBasicBlock(graph);
     HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(exit_block);
-    exit_block->AddInstruction(new (&allocator) HExit());
-
-    entry_block->AddSuccessor(code_block);
-    code_block->AddSuccessor(exit_block);
     graph->SetExitBlock(exit_block);
 
-    HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
-    HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
-    HLessThan cmp_lt(cst_lhs, cst_rhs);
-    code_block->AddInstruction(&cmp_lt);
-    HReturn ret(&cmp_lt);
-    code_block->AddInstruction(&ret);
+    graph->AddBlock(then_block);
+    graph->AddBlock(else_block);
+    graph->AddBlock(exit_block);
+    first_block->AddSuccessor(then_block);
+    first_block->AddSuccessor(else_block);
+    then_block->AddSuccessor(exit_block);
+    else_block->AddSuccessor(exit_block);
+
+    exit_block->AddInstruction(new (&allocator) HExit());
+    then_block->AddInstruction(new (&allocator) HReturn(constant0));
+    else_block->AddInstruction(new (&allocator) HReturn(constant1));
+
+    ASSERT_TRUE(equal->NeedsMaterialization());
+    TransformToSsa(graph);
+    PrepareForRegisterAllocation(graph).Run();
+    ASSERT_FALSE(equal->NeedsMaterialization());
 
     auto hook_before_codegen = [](HGraph* graph_in) {
       HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
@@ -758,93 +682,143 @@
       block->InsertInstructionBefore(move, block->GetLastInstruction());
     };
 
-    RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+    RunCode(target_isa, graph, hook_before_codegen, true, 0);
   }
 }
 
-TEST_P(CodegenTest, MaterializedCondition2) {
-  // Check that HIf correctly interprets a materialized condition.
-  // We force the materialization of comparisons for different combinations of
-  // inputs. An HIf takes the materialized combination as input and returns a
-  // value that we verify.
+TEST_F(CodegenTest, MaterializedCondition1) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    // Check that condition are materialized correctly. A materialized condition
+    // should yield `1` if it evaluated to true, and `0` otherwise.
+    // We force the materialization of comparisons for different combinations of
 
-  int lhs[] = {1, 2, -1, 2, 0xabc};
-  int rhs[] = {2, 1, 2, -1, 0xabc};
+    // inputs and check the results.
 
+    int lhs[] = {1, 2, -1, 2, 0xabc};
+    int rhs[] = {2, 1, 2, -1, 0xabc};
 
-  for (size_t i = 0; i < arraysize(lhs); i++) {
-    ArenaPool pool;
-    ArenaAllocator allocator(&pool);
-    HGraph* graph = CreateGraph(&allocator);
+    for (size_t i = 0; i < arraysize(lhs); i++) {
+      ArenaPool pool;
+      ArenaAllocator allocator(&pool);
+      HGraph* graph = CreateGraph(&allocator);
 
-    HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(entry_block);
-    graph->SetEntryBlock(entry_block);
-    entry_block->AddInstruction(new (&allocator) HGoto());
+      HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(entry_block);
+      graph->SetEntryBlock(entry_block);
+      entry_block->AddInstruction(new (&allocator) HGoto());
+      HBasicBlock* code_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(code_block);
+      HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(exit_block);
+      exit_block->AddInstruction(new (&allocator) HExit());
 
-    HBasicBlock* if_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(if_block);
-    HBasicBlock* if_true_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(if_true_block);
-    HBasicBlock* if_false_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(if_false_block);
-    HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(exit_block);
-    exit_block->AddInstruction(new (&allocator) HExit());
+      entry_block->AddSuccessor(code_block);
+      code_block->AddSuccessor(exit_block);
+      graph->SetExitBlock(exit_block);
 
-    graph->SetEntryBlock(entry_block);
-    entry_block->AddSuccessor(if_block);
-    if_block->AddSuccessor(if_true_block);
-    if_block->AddSuccessor(if_false_block);
-    if_true_block->AddSuccessor(exit_block);
-    if_false_block->AddSuccessor(exit_block);
-    graph->SetExitBlock(exit_block);
+      HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
+      HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
+      HLessThan cmp_lt(cst_lhs, cst_rhs);
+      code_block->AddInstruction(&cmp_lt);
+      HReturn ret(&cmp_lt);
+      code_block->AddInstruction(&ret);
 
-    HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
-    HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
-    HLessThan cmp_lt(cst_lhs, cst_rhs);
-    if_block->AddInstruction(&cmp_lt);
-    // We insert a temporary to separate the HIf from the HLessThan and force
-    // the materialization of the condition.
-    HTemporary force_materialization(0);
-    if_block->AddInstruction(&force_materialization);
-    HIf if_lt(&cmp_lt);
-    if_block->AddInstruction(&if_lt);
-
-    HIntConstant* cst_lt = graph->GetIntConstant(1);
-    HReturn ret_lt(cst_lt);
-    if_true_block->AddInstruction(&ret_lt);
-    HIntConstant* cst_ge = graph->GetIntConstant(0);
-    HReturn ret_ge(cst_ge);
-    if_false_block->AddInstruction(&ret_ge);
-
-    auto hook_before_codegen = [](HGraph* graph_in) {
-      HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
-      HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
-      block->InsertInstructionBefore(move, block->GetLastInstruction());
-    };
-
-    RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+      TransformToSsa(graph);
+      auto hook_before_codegen = [](HGraph* graph_in) {
+        HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
+        HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
+        block->InsertInstructionBefore(move, block->GetLastInstruction());
+      };
+      RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+    }
   }
 }
 
-TEST_P(CodegenTest, ReturnDivIntLit8) {
+TEST_F(CodegenTest, MaterializedCondition2) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    // Check that HIf correctly interprets a materialized condition.
+    // We force the materialization of comparisons for different combinations of
+    // inputs. An HIf takes the materialized combination as input and returns a
+    // value that we verify.
+
+    int lhs[] = {1, 2, -1, 2, 0xabc};
+    int rhs[] = {2, 1, 2, -1, 0xabc};
+
+
+    for (size_t i = 0; i < arraysize(lhs); i++) {
+      ArenaPool pool;
+      ArenaAllocator allocator(&pool);
+      HGraph* graph = CreateGraph(&allocator);
+
+      HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(entry_block);
+      graph->SetEntryBlock(entry_block);
+      entry_block->AddInstruction(new (&allocator) HGoto());
+
+      HBasicBlock* if_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(if_block);
+      HBasicBlock* if_true_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(if_true_block);
+      HBasicBlock* if_false_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(if_false_block);
+      HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(exit_block);
+      exit_block->AddInstruction(new (&allocator) HExit());
+
+      graph->SetEntryBlock(entry_block);
+      entry_block->AddSuccessor(if_block);
+      if_block->AddSuccessor(if_true_block);
+      if_block->AddSuccessor(if_false_block);
+      if_true_block->AddSuccessor(exit_block);
+      if_false_block->AddSuccessor(exit_block);
+      graph->SetExitBlock(exit_block);
+
+      HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
+      HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
+      HLessThan cmp_lt(cst_lhs, cst_rhs);
+      if_block->AddInstruction(&cmp_lt);
+      // We insert a temporary to separate the HIf from the HLessThan and force
+      // the materialization of the condition.
+      HTemporary force_materialization(0);
+      if_block->AddInstruction(&force_materialization);
+      HIf if_lt(&cmp_lt);
+      if_block->AddInstruction(&if_lt);
+
+      HIntConstant* cst_lt = graph->GetIntConstant(1);
+      HReturn ret_lt(cst_lt);
+      if_true_block->AddInstruction(&ret_lt);
+      HIntConstant* cst_ge = graph->GetIntConstant(0);
+      HReturn ret_ge(cst_ge);
+      if_false_block->AddInstruction(&ret_ge);
+
+      TransformToSsa(graph);
+      auto hook_before_codegen = [](HGraph* graph_in) {
+        HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
+        HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
+        block->InsertInstructionBefore(move, block->GetLastInstruction());
+      };
+      RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+    }
+  }
+}
+
+TEST_F(CodegenTest, ReturnDivIntLit8) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::DIV_INT_LIT8, 3 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 1);
+  TestCode(data, true, 1);
 }
 
-TEST_P(CodegenTest, ReturnDivInt2Addr) {
+TEST_F(CodegenTest, ReturnDivInt2Addr) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0,
     Instruction::CONST_4 | 2 << 12 | 1 << 8,
     Instruction::DIV_INT_2ADDR | 1 << 12,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 2);
+  TestCode(data, true, 2);
 }
 
 // Helper method.
@@ -933,80 +907,55 @@
   block->AddInstruction(comparison);
   block->AddInstruction(new (&allocator) HReturn(comparison));
 
-  auto hook_before_codegen = [](HGraph*) {
-  };
-  RunCodeOptimized(target_isa, graph, hook_before_codegen, true, expected_result);
+  TransformToSsa(graph);
+  RunCode(target_isa, graph, [](HGraph*) {}, true, expected_result);
 }
 
-TEST_P(CodegenTest, ComparisonsInt) {
-  const InstructionSet target_isa = GetParam();
-  for (int64_t i = -1; i <= 1; i++) {
-    for (int64_t j = -1; j <= 1; j++) {
-      TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondB,  i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondA,  i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa);
+TEST_F(CodegenTest, ComparisonsInt) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    for (int64_t i = -1; i <= 1; i++) {
+      for (int64_t j = -1; j <= 1; j++) {
+        TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondB,  i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondA,  i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa);
+      }
     }
   }
 }
 
-TEST_P(CodegenTest, ComparisonsLong) {
+TEST_F(CodegenTest, ComparisonsLong) {
   // TODO: make MIPS work for long
   if (kRuntimeISA == kMips || kRuntimeISA == kMips64) {
     return;
   }
 
-  const InstructionSet target_isa = GetParam();
-  if (target_isa == kMips || target_isa == kMips64) {
-    return;
-  }
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    if (target_isa == kMips || target_isa == kMips64) {
+      continue;
+    }
 
-  for (int64_t i = -1; i <= 1; i++) {
-    for (int64_t j = -1; j <= 1; j++) {
-      TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondB,  i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondA,  i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa);
+    for (int64_t i = -1; i <= 1; i++) {
+      for (int64_t j = -1; j <= 1; j++) {
+        TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondB,  i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondA,  i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa);
+      }
     }
   }
 }
 
-static ::std::vector<InstructionSet> GetTargetISAs() {
-  ::std::vector<InstructionSet> v;
-  // Add all ISAs that are executable on hardware or on simulator.
-  const ::std::vector<InstructionSet> executable_isa_candidates = {
-    kArm,
-    kArm64,
-    kThumb2,
-    kX86,
-    kX86_64,
-    kMips,
-    kMips64
-  };
-
-  for (auto target_isa : executable_isa_candidates) {
-    if (CanExecute(target_isa)) {
-      v.push_back(target_isa);
-    }
-  }
-
-  return v;
-}
-
-INSTANTIATE_TEST_CASE_P(MultipleTargets,
-                        CodegenTest,
-                        ::testing::ValuesIn(GetTargetISAs()));
-
 }  // namespace art
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 67ff87a..e170e37 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -81,12 +81,6 @@
   }
 }
 
-static void MarkLoopHeadersContaining(const HBasicBlock& block, ArenaBitVector* set) {
-  for (HLoopInformationOutwardIterator it(block); !it.Done(); it.Advance()) {
-    set->SetBit(it.Current()->GetHeader()->GetBlockId());
-  }
-}
-
 void HDeadCodeElimination::MaybeRecordDeadBlock(HBasicBlock* block) {
   if (stats_ != nullptr) {
     stats_->RecordStat(MethodCompilationStat::kRemovedDeadInstruction,
@@ -95,13 +89,19 @@
 }
 
 void HDeadCodeElimination::RemoveDeadBlocks() {
+  if (graph_->HasIrreducibleLoops()) {
+    // Do not eliminate dead blocks if the graph has irreducible loops. We could
+    // support it, but that would require changes in our loop representation to handle
+    // multiple entry points. We decided it was not worth the complexity.
+    return;
+  }
   // Classify blocks as reachable/unreachable.
   ArenaAllocator* allocator = graph_->GetArena();
   ArenaBitVector live_blocks(allocator, graph_->GetBlocks().size(), false);
-  ArenaBitVector affected_loops(allocator, graph_->GetBlocks().size(), false);
 
   MarkReachableBlocks(graph_, &live_blocks);
   bool removed_one_or_more_blocks = false;
+  bool rerun_dominance_and_loop_analysis = false;
 
   // Remove all dead blocks. Iterate in post order because removal needs the
   // block's chain of dominators and nested loops need to be updated from the
@@ -109,25 +109,28 @@
   for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block  = it.Current();
     int id = block->GetBlockId();
-    if (live_blocks.IsBitSet(id)) {
-      if (affected_loops.IsBitSet(id)) {
-        DCHECK(block->IsLoopHeader());
-        block->GetLoopInformation()->Update();
-      }
-    } else {
+    if (!live_blocks.IsBitSet(id)) {
       MaybeRecordDeadBlock(block);
-      MarkLoopHeadersContaining(*block, &affected_loops);
       block->DisconnectAndDelete();
       removed_one_or_more_blocks = true;
+      if (block->IsInLoop()) {
+        rerun_dominance_and_loop_analysis = true;
+      }
     }
   }
 
   // If we removed at least one block, we need to recompute the full
   // dominator tree and try block membership.
   if (removed_one_or_more_blocks) {
-    graph_->ClearDominanceInformation();
-    graph_->ComputeDominanceInformation();
-    graph_->ComputeTryBlockInformation();
+    if (rerun_dominance_and_loop_analysis) {
+      graph_->ClearLoopInformation();
+      graph_->ClearDominanceInformation();
+      graph_->BuildDominatorTree();
+    } else {
+      graph_->ClearDominanceInformation();
+      graph_->ComputeDominanceInformation();
+      graph_->ComputeTryBlockInformation();
+    }
   }
 
   // Connect successive blocks created by dead branches. Order does not matter.
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc
index 6582063..3db254a 100644
--- a/compiler/optimizing/dex_cache_array_fixups_arm.cc
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc
@@ -83,6 +83,11 @@
 };
 
 void DexCacheArrayFixups::Run() {
+  if (graph_->HasIrreducibleLoops()) {
+    // Do not run this optimization, as irreducible loops do not work with an instruction
+    // that can be live-in at the irreducible loop header.
+    return;
+  }
   DexCacheArrayFixupsVisitor visitor(graph_);
   visitor.VisitInsertionOrder();
   visitor.MoveBasesIfNeeded();
diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc
index 9b0eb70..4770fa2 100644
--- a/compiler/optimizing/find_loops_test.cc
+++ b/compiler/optimizing/find_loops_test.cc
@@ -33,7 +33,6 @@
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   builder.BuildGraph(*item);
   graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
   return graph;
 }
 
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 6d0bdbe..3113677 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -391,6 +391,15 @@
     }
   }
 
+  // Ensure dominated blocks have `block` as the dominator.
+  for (HBasicBlock* dominated : block->GetDominatedBlocks()) {
+    if (dominated->GetDominator() != block) {
+      AddError(StringPrintf("Block %d should be the dominator of %d.",
+                            block->GetBlockId(),
+                            dominated->GetBlockId()));
+    }
+  }
+
   // Ensure there is no critical edge (i.e., an edge connecting a
   // block with multiple successors to a block with multiple
   // predecessors). Exceptional edges are synthesized and hence
@@ -467,13 +476,7 @@
   int id = loop_header->GetBlockId();
   HLoopInformation* loop_information = loop_header->GetLoopInformation();
 
-  // Ensure the pre-header block is first in the list of predecessors of a loop
-  // header and that the header block is its only successor.
-  if (!loop_header->IsLoopPreHeaderFirstPredecessor()) {
-    AddError(StringPrintf(
-        "Loop pre-header is not the first predecessor of the loop header %d.",
-        id));
-  } else if (loop_information->GetPreHeader()->GetSuccessors().size() != 1) {
+  if (loop_information->GetPreHeader()->GetSuccessors().size() != 1) {
     AddError(StringPrintf(
         "Loop pre-header %d of loop defined by header %d has %zu successors.",
         loop_information->GetPreHeader()->GetBlockId(),
@@ -481,6 +484,18 @@
         loop_information->GetPreHeader()->GetSuccessors().size()));
   }
 
+  if (loop_information->GetSuspendCheck() == nullptr) {
+    AddError(StringPrintf(
+        "Loop with header %d does not have a suspend check.",
+        loop_header->GetBlockId()));
+  }
+
+  if (loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) {
+    AddError(StringPrintf(
+        "Loop header %d does not have the loop suspend check as the first instruction.",
+        loop_header->GetBlockId()));
+  }
+
   // Ensure the loop header has only one incoming branch and the remaining
   // predecessors are back edges.
   size_t num_preds = loop_header->GetPredecessors().size();
@@ -532,20 +547,6 @@
     }
   }
 
-  // Ensure all blocks in the loop are live and dominated by the loop header.
-  for (uint32_t i : loop_blocks.Indexes()) {
-    HBasicBlock* loop_block = GetGraph()->GetBlocks()[i];
-    if (loop_block == nullptr) {
-      AddError(StringPrintf("Loop defined by header %d contains a previously removed block %d.",
-                            id,
-                            i));
-    } else if (!loop_header->Dominates(loop_block)) {
-      AddError(StringPrintf("Loop block %d not dominated by loop header %d.",
-                            i,
-                            id));
-    }
-  }
-
   // If this is a nested loop, ensure the outer loops contain a superset of the blocks.
   for (HLoopInformationOutwardIterator it(*loop_header); !it.Done(); it.Advance()) {
     HLoopInformation* outer_info = it.Current();
@@ -556,6 +557,29 @@
                             outer_info->GetHeader()->GetBlockId()));
     }
   }
+
+  // Ensure the pre-header block is first in the list of predecessors of a loop
+  // header and that the header block is its only successor.
+  if (!loop_header->IsLoopPreHeaderFirstPredecessor()) {
+    AddError(StringPrintf(
+        "Loop pre-header is not the first predecessor of the loop header %d.",
+        id));
+  }
+
+  // Ensure all blocks in the loop are live and dominated by the loop header in
+  // the case of natural loops.
+  for (uint32_t i : loop_blocks.Indexes()) {
+    HBasicBlock* loop_block = GetGraph()->GetBlocks()[i];
+    if (loop_block == nullptr) {
+      AddError(StringPrintf("Loop defined by header %d contains a previously removed block %d.",
+                            id,
+                            i));
+    } else if (!loop_information->IsIrreducible() && !loop_header->Dominates(loop_block)) {
+      AddError(StringPrintf("Loop block %d not dominated by loop header %d.",
+                            i,
+                            id));
+    }
+  }
 }
 
 void SSAChecker::VisitInstruction(HInstruction* instruction) {
@@ -577,6 +601,14 @@
     }
   }
 
+  if (instruction->NeedsEnvironment() && !instruction->HasEnvironment()) {
+    AddError(StringPrintf("Instruction %s:%d in block %d requires an environment "
+                          "but does not have one.",
+                          instruction->DebugName(),
+                          instruction->GetId(),
+                          current_block_->GetBlockId()));
+  }
+
   // Ensure an instruction having an environment is dominated by the
   // instructions contained in the environment.
   for (HEnvironment* environment = instruction->GetEnvironment();
@@ -606,6 +638,34 @@
                             instruction->GetId()));
     }
   }
+
+  if (instruction->CanThrowIntoCatchBlock()) {
+    // Find the top-level environment. This corresponds to the environment of
+    // the catch block since we do not inline methods with try/catch.
+    HEnvironment* environment = instruction->GetEnvironment();
+    while (environment->GetParent() != nullptr) {
+      environment = environment->GetParent();
+    }
+
+    // Find all catch blocks and test that `instruction` has an environment
+    // value for each one.
+    const HTryBoundary& entry = instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry();
+    for (HBasicBlock* catch_block : entry.GetExceptionHandlers()) {
+      for (HInstructionIterator phi_it(catch_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+        HPhi* catch_phi = phi_it.Current()->AsPhi();
+        if (environment->GetInstructionAt(catch_phi->GetRegNumber()) == nullptr) {
+          AddError(StringPrintf("Instruction %s:%d throws into catch block %d "
+                                "with catch phi %d for vreg %d but its "
+                                "corresponding environment slot is empty.",
+                                instruction->DebugName(),
+                                instruction->GetId(),
+                                catch_block->GetBlockId(),
+                                catch_phi->GetId(),
+                                catch_phi->GetRegNumber()));
+        }
+      }
+    }
+  }
 }
 
 static Primitive::Type PrimitiveKind(Primitive::Type type) {
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 5f1328f..32c3a92 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -486,7 +486,9 @@
         StartAttributeStream("is_low") << interval->IsLowInterval();
         StartAttributeStream("is_high") << interval->IsHighInterval();
       }
-    } else if (IsPass(RegisterAllocator::kRegisterAllocatorPassName) && is_after_pass_) {
+    }
+
+    if (IsPass(RegisterAllocator::kRegisterAllocatorPassName) && is_after_pass_) {
       StartAttributeStream("liveness") << instruction->GetLifetimePosition();
       LocationSummary* locations = instruction->GetLocations();
       if (locations != nullptr) {
@@ -498,15 +500,29 @@
         attr << inputs << "->";
         DumpLocation(attr, locations->Out());
       }
-    } else if (IsPass(LICM::kLoopInvariantCodeMotionPassName)
-               || IsPass(HDeadCodeElimination::kFinalDeadCodeEliminationPassName)) {
+    }
+
+    if (IsPass(LICM::kLoopInvariantCodeMotionPassName)
+        || IsPass(HDeadCodeElimination::kFinalDeadCodeEliminationPassName)
+        || IsPass(HDeadCodeElimination::kInitialDeadCodeEliminationPassName)
+        || IsPass(SsaBuilder::kSsaBuilderPassName)) {
       HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
       if (info == nullptr) {
         StartAttributeStream("loop") << "none";
       } else {
         StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId();
+        HLoopInformation* outer = info->GetPreHeader()->GetLoopInformation();
+        if (outer != nullptr) {
+          StartAttributeStream("outer_loop") << "B" << outer->GetHeader()->GetBlockId();
+        } else {
+          StartAttributeStream("outer_loop") << "none";
+        }
+        StartAttributeStream("irreducible")
+            << std::boolalpha << info->IsIrreducible() << std::noboolalpha;
       }
-    } else if ((IsPass(SsaBuilder::kSsaBuilderPassName)
+    }
+
+    if ((IsPass(SsaBuilder::kSsaBuilderPassName)
         || IsPass(HInliner::kInlinerPassName))
         && (instruction->GetType() == Primitive::kPrimNot)) {
       ReferenceTypeInfo info = instruction->IsLoadClass()
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index 4af111b..b492278 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -125,6 +125,14 @@
     });
   }
 
+  void Clear() {
+    num_entries_ = 0;
+    for (size_t i = 0; i < num_buckets_; ++i) {
+      buckets_[i] = nullptr;
+    }
+    buckets_owned_.SetInitialBits(num_buckets_);
+  }
+
   // Updates this set by intersecting with instructions in a predecessor's set.
   void IntersectWith(ValueSet* predecessor) {
     if (IsEmpty()) {
@@ -191,14 +199,6 @@
     return clone_iterator;
   }
 
-  void Clear() {
-    num_entries_ = 0;
-    for (size_t i = 0; i < num_buckets_; ++i) {
-      buckets_[i] = nullptr;
-    }
-    buckets_owned_.SetInitialBits(num_buckets_);
-  }
-
   // Iterates over buckets with impure instructions (even indices) and deletes
   // the ones on which 'cond' returns true.
   template<typename Functor>
@@ -261,11 +261,14 @@
     }
   }
 
-  // Generates a hash code for an instruction. Pure instructions are put into
-  // odd buckets to speed up deletion.
+  // Generates a hash code for an instruction.
   size_t HashCode(HInstruction* instruction) const {
     size_t hash_code = instruction->ComputeHashCode();
-    if (instruction->GetSideEffects().HasDependencies()) {
+    // Pure instructions are put into odd buckets to speed up deletion. Note that in the
+    // case of irreducible loops, we don't put pure instructions in odd buckets, as we
+    // need to delete them when entering the loop.
+    if (instruction->GetSideEffects().HasDependencies() ||
+        instruction->GetBlock()->GetGraph()->HasIrreducibleLoops()) {
       return (hash_code << 1) | 0;
     } else {
       return (hash_code << 1) | 1;
@@ -360,8 +363,14 @@
     }
     if (!set->IsEmpty()) {
       if (block->IsLoopHeader()) {
-        DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader());
-        set->Kill(side_effects_.GetLoopEffects(block));
+        if (block->GetLoopInformation()->IsIrreducible()) {
+          // To satisfy our linear scan algorithm, no instruction should flow in an irreducible
+          // loop header.
+          set->Clear();
+        } else {
+          DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader());
+          set->Kill(side_effects_.GetLoopEffects(block));
+        }
       } else if (predecessors.size() > 1) {
         for (HBasicBlock* predecessor : predecessors) {
           set->IntersectWith(sets_[predecessor->GetBlockId()]);
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index eef6cef..37f2d79 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -76,7 +76,9 @@
   // range analysis on outer loop while visiting inner loops.
   for (HReversePostOrderIterator it_graph(*graph_); !it_graph.Done(); it_graph.Advance()) {
     HBasicBlock* graph_block = it_graph.Current();
-    if (graph_block->IsLoopHeader()) {
+    // Don't analyze irreducible loops.
+    // TODO(ajcbik): could/should we remove this restriction?
+    if (graph_block->IsLoopHeader() && !graph_block->GetLoopInformation()->IsIrreducible()) {
       VisitLoop(graph_block->GetLoopInformation());
     }
   }
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 48d3299..20c4f1f 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -356,12 +356,12 @@
       compare, invoke_instruction->GetDexPc());
   // TODO: Extend reference type propagation to understand the guard.
   if (cursor != nullptr) {
-    bb_cursor->InsertInstructionAfter(load_class, cursor);
+    bb_cursor->InsertInstructionAfter(field_get, cursor);
   } else {
-    bb_cursor->InsertInstructionBefore(load_class, bb_cursor->GetFirstInstruction());
+    bb_cursor->InsertInstructionBefore(field_get, bb_cursor->GetFirstInstruction());
   }
-  bb_cursor->InsertInstructionAfter(field_get, load_class);
-  bb_cursor->InsertInstructionAfter(compare, field_get);
+  bb_cursor->InsertInstructionAfter(load_class, field_get);
+  bb_cursor->InsertInstructionAfter(compare, load_class);
   bb_cursor->InsertInstructionAfter(deoptimize, compare);
   deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
 
@@ -539,7 +539,7 @@
     return false;
   }
 
-  if (callee_graph->TryBuildingSsa(handles_) != kBuildSsaSuccess) {
+  if (callee_graph->TryBuildingSsa(handles_) != kAnalysisSuccess) {
     VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
                    << " could not be transformed to SSA";
     return false;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index c504ded..49fc8c7 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -77,7 +77,6 @@
   void VisitUShr(HUShr* instruction) OVERRIDE;
   void VisitXor(HXor* instruction) OVERRIDE;
   void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE;
-  void VisitFakeString(HFakeString* fake_string) OVERRIDE;
   void VisitInvoke(HInvoke* invoke) OVERRIDE;
   void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
 
@@ -211,19 +210,6 @@
 
 // Try to replace a binary operation flanked by one UShr and one Shl with a bitfield rotation.
 bool InstructionSimplifierVisitor::TryReplaceWithRotate(HBinaryOperation* op) {
-  // This simplification is currently supported on x86, x86_64, ARM and ARM64.
-  // TODO: Implement it for MIPS/64.
-  const InstructionSet instruction_set = GetGraph()->GetInstructionSet();
-  switch (instruction_set) {
-    case kArm:
-    case kArm64:
-    case kThumb2:
-    case kX86:
-    case kX86_64:
-      break;
-    default:
-      return false;
-  }
   DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
   HInstruction* left = op->GetLeft();
   HInstruction* right = op->GetRight();
@@ -1192,48 +1178,6 @@
   TryReplaceWithRotate(instruction);
 }
 
-void InstructionSimplifierVisitor::VisitFakeString(HFakeString* instruction) {
-  HInstruction* actual_string = nullptr;
-
-  // Find the string we need to replace this instruction with. The actual string is
-  // the return value of a StringFactory call.
-  for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
-    HInstruction* use = it.Current()->GetUser();
-    if (use->IsInvokeStaticOrDirect()
-        && use->AsInvokeStaticOrDirect()->IsStringFactoryFor(instruction)) {
-      use->AsInvokeStaticOrDirect()->RemoveFakeStringArgumentAsLastInput();
-      actual_string = use;
-      break;
-    }
-  }
-
-  // Check that there is no other instruction that thinks it is the factory for that string.
-  if (kIsDebugBuild) {
-    CHECK(actual_string != nullptr);
-    for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
-      HInstruction* use = it.Current()->GetUser();
-      if (use->IsInvokeStaticOrDirect()) {
-        CHECK(!use->AsInvokeStaticOrDirect()->IsStringFactoryFor(instruction));
-      }
-    }
-  }
-
-  // We need to remove any environment uses of the fake string that are not dominated by
-  // `actual_string` to null.
-  for (HUseIterator<HEnvironment*> it(instruction->GetEnvUses()); !it.Done(); it.Advance()) {
-    HEnvironment* environment = it.Current()->GetUser();
-    if (!actual_string->StrictlyDominates(environment->GetHolder())) {
-      environment->RemoveAsUserOfInput(it.Current()->GetIndex());
-      environment->SetRawEnvAt(it.Current()->GetIndex(), nullptr);
-    }
-  }
-
-  // Only uses dominated by `actual_string` must remain. We can safely replace and remove
-  // `instruction`.
-  instruction->ReplaceWith(actual_string);
-  instruction->GetBlock()->RemoveInstruction(instruction);
-}
-
 void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) {
   HInstruction* argument = instruction->InputAt(1);
   HInstruction* receiver = instruction->InputAt(0);
@@ -1261,19 +1205,6 @@
 void InstructionSimplifierVisitor::SimplifyRotate(HInvoke* invoke, bool is_left) {
   DCHECK(invoke->IsInvokeStaticOrDirect());
   DCHECK_EQ(invoke->GetOriginalInvokeType(), InvokeType::kStatic);
-  // This simplification is currently supported on x86, x86_64, ARM and ARM64.
-  // TODO: Implement it for MIPS/64.
-  const InstructionSet instruction_set = GetGraph()->GetInstructionSet();
-  switch (instruction_set) {
-    case kArm:
-    case kArm64:
-    case kThumb2:
-    case kX86:
-    case kX86_64:
-      break;
-    default:
-      return;
-  }
   HInstruction* value = invoke->InputAt(0);
   HInstruction* distance = invoke->InputAt(1);
   // Replace the invoke with an HRor.
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 6bbc751..4bcfc54 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -30,6 +30,15 @@
                                                                      HInstruction* array,
                                                                      HInstruction* index,
                                                                      int access_size) {
+  if (kEmitCompilerReadBarrier) {
+    // The read barrier instrumentation does not support the
+    // HArm64IntermediateAddress instruction yet.
+    //
+    // TODO: Handle this case properly in the ARM64 code generator and
+    // re-enable this optimization; otherwise, remove this TODO.
+    // b/26601270
+    return;
+  }
   if (index->IsConstant() ||
       (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
     // When the index is a constant all the addressing can be fitted in the
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 9f50d18..3bf3f7f 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -85,9 +85,9 @@
                             InvokeDexCallingConventionVisitor* calling_convention_visitor) {
     if (kIsDebugBuild && invoke->IsInvokeStaticOrDirect()) {
       HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
-      // When we do not run baseline, explicit clinit checks triggered by static
-      // invokes must have been pruned by art::PrepareForRegisterAllocation.
-      DCHECK(codegen->IsBaseline() || !invoke_static_or_direct->IsStaticWithExplicitClinitCheck());
+      // Explicit clinit checks triggered by static invokes must have been
+      // pruned by art::PrepareForRegisterAllocation.
+      DCHECK(!invoke_static_or_direct->IsStaticWithExplicitClinitCheck());
     }
 
     if (invoke->GetNumberOfArguments() == 0) {
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 1e6b3a1..b1fbf28 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -847,6 +847,9 @@
   }
 
   // Prevent reordering with prior memory operations.
+  // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
+  // latter allows a preceding load to be delayed past the STXR
+  // instruction below.
   __ dmb(ISH);
 
   __ add(tmp_ptr, base, ShifterOperand(offset));
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index f723940..81cab86 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1035,7 +1035,11 @@
     __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
     __ Cbnz(tmp_32, &loop_head);
   } else {
-    __ Dmb(InnerShareable, BarrierWrites);
+    // Emit a `Dmb(InnerShareable, BarrierAll)` (DMB ISH) instruction
+    // instead of a `Dmb(InnerShareable, BarrierWrites)` (DMB ISHST)
+    // one, as the latter allows a preceding load to be delayed past
+    // the STXR instruction below.
+    __ Dmb(InnerShareable, BarrierAll);
     __ Bind(&loop_head);
     // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
     // the reference stored in the object before attempting the CAS,
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 06fab61..bc126a2 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -43,14 +43,18 @@
   return codegen_->GetGraph()->GetArena();
 }
 
-inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() {
+inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() const {
   return codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
 }
 
-inline bool IntrinsicCodeGeneratorMIPS::IsR6() {
+inline bool IntrinsicCodeGeneratorMIPS::IsR6() const {
   return codegen_->GetInstructionSetFeatures().IsR6();
 }
 
+inline bool IntrinsicCodeGeneratorMIPS::Is32BitFPU() const {
+  return codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint();
+}
+
 #define __ codegen->GetAssembler()->
 
 static void MoveFromReturnRegister(Location trg,
@@ -162,7 +166,7 @@
     Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
 
     __ Mfc1(out_lo, in);
-    __ Mfhc1(out_hi, in);
+    __ MoveFromFpuHigh(out_hi, in);
   } else {
     Register out = locations->Out().AsRegister<Register>();
 
@@ -204,7 +208,7 @@
     Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
 
     __ Mtc1(in_lo, out);
-    __ Mthc1(in_hi, out);
+    __ MoveToFpuHigh(in_hi, out);
   } else {
     Register in = locations->InAt(0).AsRegister<Register>();
 
diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h
index f86b0ef..575a7d0 100644
--- a/compiler/optimizing/intrinsics_mips.h
+++ b/compiler/optimizing/intrinsics_mips.h
@@ -67,8 +67,9 @@
 #undef INTRINSICS_LIST
 #undef OPTIMIZING_INTRINSICS
 
-  bool IsR2OrNewer(void);
-  bool IsR6(void);
+  bool IsR2OrNewer() const;
+  bool IsR6() const;
+  bool Is32BitFPU() const;
 
  private:
   MipsAssembler* GetAssembler();
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index 02befc0..a6b4078 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -94,6 +94,16 @@
     SideEffects loop_effects = side_effects_.GetLoopEffects(block);
     HBasicBlock* pre_header = loop_info->GetPreHeader();
 
+    bool contains_irreducible_loop = false;
+    if (graph_->HasIrreducibleLoops()) {
+      for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) {
+        if (it_loop.Current()->GetLoopInformation()->IsIrreducible()) {
+          contains_irreducible_loop = true;
+          break;
+        }
+      }
+    }
+
     for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) {
       HBasicBlock* inner = it_loop.Current();
       DCHECK(inner->IsInLoop());
@@ -104,6 +114,12 @@
       }
       visited.SetBit(inner->GetBlockId());
 
+      if (contains_irreducible_loop) {
+        // We cannot licm in an irreducible loop, or in a natural loop containing an
+        // irreducible loop.
+        continue;
+      }
+
       // We can move an instruction that can throw only if it is the first
       // throwing instruction in the loop. Note that the first potentially
       // throwing instruction encountered that is not hoisted stops this
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 2b313f6..c4492c8 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -582,9 +582,22 @@
     DCHECK(block->IsLoopHeader());
     int block_id = block->GetBlockId();
     ArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id];
+
+    // Don't eliminate loads in irreducible loops. This is safe for singletons, because
+    // they are always used by the non-eliminated loop-phi.
+    if (block->GetLoopInformation()->IsIrreducible()) {
+      if (kIsDebugBuild) {
+        for (size_t i = 0; i < heap_values.size(); i++) {
+          DCHECK_EQ(heap_values[i], kUnknownHeapValue);
+        }
+      }
+      return;
+    }
+
     HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
     ArenaVector<HInstruction*>& pre_header_heap_values =
         heap_values_for_[pre_header->GetBlockId()];
+
     // Inherit the values from pre-header.
     for (size_t i = 0; i < heap_values.size(); i++) {
       heap_values[i] = pre_header_heap_values[i];
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 6d4275d..2eabadf 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -13,7 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #include "nodes.h"
 
 #include "code_generator.h"
@@ -90,6 +89,7 @@
   for (size_t i = 0; i < blocks_.size(); ++i) {
     if (!visited.IsBitSet(i)) {
       HBasicBlock* block = blocks_[i];
+      if (block == nullptr) continue;
       DCHECK(block->GetPhis().IsEmpty()) << "Phis are not inserted at this stage";
       for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
         RemoveAsUser(it.Current());
@@ -102,6 +102,7 @@
   for (size_t i = 0; i < blocks_.size(); ++i) {
     if (!visited.IsBitSet(i)) {
       HBasicBlock* block = blocks_[i];
+      if (block == nullptr) continue;
       // We only need to update the successor, which might be live.
       for (HBasicBlock* successor : block->GetSuccessors()) {
         successor->RemovePredecessor(block);
@@ -113,7 +114,7 @@
   }
 }
 
-void HGraph::BuildDominatorTree() {
+GraphAnalysisResult HGraph::BuildDominatorTree() {
   // (1) Simplify the CFG so that catch blocks have only exceptional incoming
   //     edges. This invariant simplifies building SSA form because Phis cannot
   //     collect both normal- and exceptional-flow values at the same time.
@@ -130,7 +131,7 @@
   RemoveInstructionsAsUsersFromDeadBlocks(visited);
 
   // (4) Remove blocks not visited during the initial DFS.
-  //     Step (4) requires dead blocks to be removed from the
+  //     Step (5) requires dead blocks to be removed from the
   //     predecessors list of live blocks.
   RemoveDeadBlocks(visited);
 
@@ -140,6 +141,20 @@
 
   // (6) Compute the dominance information and the reverse post order.
   ComputeDominanceInformation();
+
+  // (7) Analyze loops discover through back edge analysis, and
+  //     set the loop information on each block.
+  GraphAnalysisResult result = AnalyzeLoops();
+  if (result != kAnalysisSuccess) {
+    return result;
+  }
+
+  // (8) Precompute per-block try membership before entering the SSA builder,
+  //     which needs the information to build catch block phis from values of
+  //     locals at throwing instructions inside try blocks.
+  ComputeTryBlockInformation();
+
+  return kAnalysisSuccess;
 }
 
 void HGraph::ClearDominanceInformation() {
@@ -149,11 +164,26 @@
   reverse_post_order_.clear();
 }
 
+void HGraph::ClearLoopInformation() {
+  SetHasIrreducibleLoops(false);
+  for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
+    it.Current()->SetLoopInformation(nullptr);
+  }
+}
+
 void HBasicBlock::ClearDominanceInformation() {
   dominated_blocks_.clear();
   dominator_ = nullptr;
 }
 
+HInstruction* HBasicBlock::GetFirstInstructionDisregardMoves() const {
+  HInstruction* instruction = GetFirstInstruction();
+  while (instruction->IsParallelMove()) {
+    instruction = instruction->GetNext();
+  }
+  return instruction;
+}
+
 void HGraph::ComputeDominanceInformation() {
   DCHECK(reverse_post_order_.empty());
   reverse_post_order_.reserve(blocks_.size());
@@ -190,31 +220,28 @@
       // dominator of the block. We can then start visiting its successors.
       if (++visits[successor->GetBlockId()] ==
           successor->GetPredecessors().size() - successor->NumberOfBackEdges()) {
-        successor->GetDominator()->AddDominatedBlock(successor);
         reverse_post_order_.push_back(successor);
         worklist.push_back(successor);
       }
     }
   }
+
+  // Populate `dominated_blocks_` information after computing all dominators.
+  // The potential presence of irreducible loops require to do it after.
+  for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    if (!block->IsEntryBlock()) {
+      block->GetDominator()->AddDominatedBlock(block);
+    }
+  }
 }
 
-BuildSsaResult HGraph::TryBuildingSsa(StackHandleScopeCollection* handles) {
-  BuildDominatorTree();
-
-  // The SSA builder requires loops to all be natural. Specifically, the dead phi
-  // elimination phase checks the consistency of the graph when doing a post-order
-  // visit for eliminating dead phis: a dead phi can only have loop header phi
-  // users remaining when being visited.
-  BuildSsaResult result = AnalyzeNaturalLoops();
-  if (result != kBuildSsaSuccess) {
+GraphAnalysisResult HGraph::TryBuildingSsa(StackHandleScopeCollection* handles) {
+  GraphAnalysisResult result = BuildDominatorTree();
+  if (result != kAnalysisSuccess) {
     return result;
   }
 
-  // Precompute per-block try membership before entering the SSA builder,
-  // which needs the information to build catch block phis from values of
-  // locals at throwing instructions inside try blocks.
-  ComputeTryBlockInformation();
-
   // Create the inexact Object reference type and store it in the HGraph.
   ScopedObjectAccess soa(Thread::Current());
   ClassLinker* linker = Runtime::Current()->GetClassLinker();
@@ -224,12 +251,12 @@
 
   // Tranforms graph to SSA form.
   result = SsaBuilder(this, handles).BuildSsa();
-  if (result != kBuildSsaSuccess) {
+  if (result != kAnalysisSuccess) {
     return result;
   }
 
   in_ssa_form_ = true;
-  return kBuildSsaSuccess;
+  return kAnalysisSuccess;
 }
 
 HBasicBlock* HGraph::SplitEdge(HBasicBlock* block, HBasicBlock* successor) {
@@ -331,7 +358,7 @@
   // can be invalidated. We remember the initial size to avoid iterating over the new blocks.
   for (size_t block_id = 0u, end = blocks_.size(); block_id != end; ++block_id) {
     HBasicBlock* catch_block = blocks_[block_id];
-    if (!catch_block->IsCatchBlock()) {
+    if (catch_block == nullptr || !catch_block->IsCatchBlock()) {
       continue;
     }
 
@@ -434,11 +461,15 @@
     }
     if (block->IsLoopHeader()) {
       SimplifyLoop(block);
+    } else if (!block->IsEntryBlock() && block->GetFirstInstruction()->IsSuspendCheck()) {
+      // We are being called by the dead code elimiation pass, and what used to be
+      // a loop got dismantled. Just remove the suspend check.
+      block->RemoveInstruction(block->GetFirstInstruction());
     }
   }
 }
 
-BuildSsaResult HGraph::AnalyzeNaturalLoops() const {
+GraphAnalysisResult HGraph::AnalyzeLoops() const {
   // Order does not matter.
   for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
@@ -446,16 +477,26 @@
       if (block->IsCatchBlock()) {
         // TODO: Dealing with exceptional back edges could be tricky because
         //       they only approximate the real control flow. Bail out for now.
-        return kBuildSsaFailThrowCatchLoop;
+        return kAnalysisFailThrowCatchLoop;
       }
-      HLoopInformation* info = block->GetLoopInformation();
-      if (!info->Populate()) {
-        // Abort if the loop is non natural. We currently bailout in such cases.
-        return kBuildSsaFailNonNaturalLoop;
-      }
+      block->GetLoopInformation()->Populate();
     }
   }
-  return kBuildSsaSuccess;
+  return kAnalysisSuccess;
+}
+
+void HLoopInformation::Dump(std::ostream& os) {
+  os << "header: " << header_->GetBlockId() << std::endl;
+  os << "pre header: " << GetPreHeader()->GetBlockId() << std::endl;
+  for (HBasicBlock* block : back_edges_) {
+    os << "back edge: " << block->GetBlockId() << std::endl;
+  }
+  for (HBasicBlock* block : header_->GetPredecessors()) {
+    os << "predecessor: " << block->GetBlockId() << std::endl;
+  }
+  for (uint32_t idx : blocks_.Indexes()) {
+    os << "  in loop: " << idx << std::endl;
+  }
 }
 
 void HGraph::InsertConstant(HConstant* constant) {
@@ -555,61 +596,65 @@
   }
 }
 
-bool HLoopInformation::Populate() {
+void HLoopInformation::PopulateIrreducibleRecursive(HBasicBlock* block) {
+  if (blocks_.IsBitSet(block->GetBlockId())) {
+    return;
+  }
+
+  if (block->IsLoopHeader()) {
+    // If we hit a loop header in an irreducible loop, we first check if the
+    // pre header of that loop belongs to the currently analyzed loop. If it does,
+    // then we visit the back edges.
+    // Note that we cannot use GetPreHeader, as the loop may have not been populated
+    // yet.
+    HBasicBlock* pre_header = block->GetPredecessors()[0];
+    PopulateIrreducibleRecursive(pre_header);
+    if (blocks_.IsBitSet(pre_header->GetBlockId())) {
+      blocks_.SetBit(block->GetBlockId());
+      block->SetInLoop(this);
+      HLoopInformation* info = block->GetLoopInformation();
+      for (HBasicBlock* back_edge : info->GetBackEdges()) {
+        PopulateIrreducibleRecursive(back_edge);
+      }
+    }
+  } else {
+    // Visit all predecessors. If one predecessor is part of the loop, this
+    // block is also part of this loop.
+    for (HBasicBlock* predecessor : block->GetPredecessors()) {
+      PopulateIrreducibleRecursive(predecessor);
+      if (blocks_.IsBitSet(predecessor->GetBlockId())) {
+        blocks_.SetBit(block->GetBlockId());
+        block->SetInLoop(this);
+      }
+    }
+  }
+}
+
+void HLoopInformation::Populate() {
   DCHECK_EQ(blocks_.NumSetBits(), 0u) << "Loop information has already been populated";
+  // Populate this loop: starting with the back edge, recursively add predecessors
+  // that are not already part of that loop. Set the header as part of the loop
+  // to end the recursion.
+  // This is a recursive implementation of the algorithm described in
+  // "Advanced Compiler Design & Implementation" (Muchnick) p192.
+  blocks_.SetBit(header_->GetBlockId());
+  header_->SetInLoop(this);
   for (HBasicBlock* back_edge : GetBackEdges()) {
     DCHECK(back_edge->GetDominator() != nullptr);
     if (!header_->Dominates(back_edge)) {
-      // This loop is not natural. Do not bother going further.
-      return false;
+      irreducible_ = true;
+      header_->GetGraph()->SetHasIrreducibleLoops(true);
+      PopulateIrreducibleRecursive(back_edge);
+    } else {
+      PopulateRecursive(back_edge);
     }
-
-    // Populate this loop: starting with the back edge, recursively add predecessors
-    // that are not already part of that loop. Set the header as part of the loop
-    // to end the recursion.
-    // This is a recursive implementation of the algorithm described in
-    // "Advanced Compiler Design & Implementation" (Muchnick) p192.
-    blocks_.SetBit(header_->GetBlockId());
-    PopulateRecursive(back_edge);
-  }
-  return true;
-}
-
-void HLoopInformation::Update() {
-  HGraph* graph = header_->GetGraph();
-  for (uint32_t id : blocks_.Indexes()) {
-    HBasicBlock* block = graph->GetBlocks()[id];
-    // Reset loop information of non-header blocks inside the loop, except
-    // members of inner nested loops because those should already have been
-    // updated by their own LoopInformation.
-    if (block->GetLoopInformation() == this && block != header_) {
-      block->SetLoopInformation(nullptr);
-    }
-  }
-  blocks_.ClearAllBits();
-
-  if (back_edges_.empty()) {
-    // The loop has been dismantled, delete its suspend check and remove info
-    // from the header.
-    DCHECK(HasSuspendCheck());
-    header_->RemoveInstruction(suspend_check_);
-    header_->SetLoopInformation(nullptr);
-    header_ = nullptr;
-    suspend_check_ = nullptr;
-  } else {
-    if (kIsDebugBuild) {
-      for (HBasicBlock* back_edge : back_edges_) {
-        DCHECK(header_->Dominates(back_edge));
-      }
-    }
-    // This loop still has reachable back edges. Repopulate the list of blocks.
-    bool populate_successful = Populate();
-    DCHECK(populate_successful);
   }
 }
 
 HBasicBlock* HLoopInformation::GetPreHeader() const {
-  return header_->GetDominator();
+  HBasicBlock* block = header_->GetPredecessors()[0];
+  DCHECK(irreducible_ || (block == header_->GetDominator()));
+  return block;
 }
 
 bool HLoopInformation::Contains(const HBasicBlock& block) const {
@@ -2146,10 +2191,7 @@
                            IntrinsicExceptions exceptions) {
   intrinsic_ = intrinsic;
   IntrinsicOptimizations opt(this);
-  if (needs_env_or_cache == kNoEnvironmentOrCache) {
-    opt.SetDoesNotNeedDexCache();
-    opt.SetDoesNotNeedEnvironment();
-  }
+
   // Adjust method's side effects from intrinsic table.
   switch (side_effects) {
     case kNoSideEffects: SetSideEffects(SideEffects::None()); break;
@@ -2157,6 +2199,14 @@
     case kWriteSideEffects: SetSideEffects(SideEffects::AllWrites()); break;
     case kAllSideEffects: SetSideEffects(SideEffects::AllExceptGCDependency()); break;
   }
+
+  if (needs_env_or_cache == kNoEnvironmentOrCache) {
+    opt.SetDoesNotNeedDexCache();
+    opt.SetDoesNotNeedEnvironment();
+  } else {
+    // If we need an environment, that means there will be a call, which can trigger GC.
+    SetSideEffects(GetSideEffects().Union(SideEffects::CanTriggerGC()));
+  }
   // Adjust method's exception status from intrinsic table.
   switch (exceptions) {
     case kNoThrow: SetCanThrow(false); break;
@@ -2164,6 +2214,11 @@
   }
 }
 
+bool HNewInstance::IsStringAlloc() const {
+  ScopedObjectAccess soa(Thread::Current());
+  return GetReferenceTypeInfo().IsStringClass();
+}
+
 bool HInvoke::NeedsEnvironment() const {
   if (!IsIntrinsic()) {
     return true;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index c06d164..e222ef7 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -44,7 +44,6 @@
 class HCurrentMethod;
 class HDoubleConstant;
 class HEnvironment;
-class HFakeString;
 class HFloatConstant;
 class HGraphBuilder;
 class HGraphVisitor;
@@ -98,11 +97,10 @@
   kCondAE,  // >=
 };
 
-enum BuildSsaResult {
-  kBuildSsaFailNonNaturalLoop,
-  kBuildSsaFailThrowCatchLoop,
-  kBuildSsaFailAmbiguousArrayOp,
-  kBuildSsaSuccess,
+enum GraphAnalysisResult {
+  kAnalysisFailThrowCatchLoop,
+  kAnalysisFailAmbiguousArrayOp,
+  kAnalysisSuccess,
 };
 
 class HInstructionList : public ValueObject {
@@ -289,6 +287,7 @@
         temporaries_vreg_slots_(0),
         has_bounds_checks_(false),
         has_try_catch_(false),
+        has_irreducible_loops_(false),
         debuggable_(debuggable),
         current_instruction_id_(start_instruction_id),
         dex_file_(dex_file),
@@ -324,20 +323,20 @@
   // Try building the SSA form of this graph, with dominance computation and
   // loop recognition. Returns a code specifying that it was successful or the
   // reason for failure.
-  BuildSsaResult TryBuildingSsa(StackHandleScopeCollection* handles);
+  GraphAnalysisResult TryBuildingSsa(StackHandleScopeCollection* handles);
 
   void ComputeDominanceInformation();
   void ClearDominanceInformation();
-
-  void BuildDominatorTree();
+  void ClearLoopInformation();
+  void FindBackEdges(ArenaBitVector* visited);
+  GraphAnalysisResult BuildDominatorTree();
   void SimplifyCFG();
   void SimplifyCatchBlocks();
 
   // Analyze all natural loops in this graph. Returns a code specifying that it
   // was successful or the reason for failure. The method will fail if a loop
-  // is not natural, that is the header does not dominate a back edge, or if it
   // is a throw-catch loop, i.e. the header is a catch block.
-  BuildSsaResult AnalyzeNaturalLoops() const;
+  GraphAnalysisResult AnalyzeLoops() const;
 
   // Iterate over blocks to compute try block membership. Needs reverse post
   // order and loop information.
@@ -482,6 +481,9 @@
   bool HasTryCatch() const { return has_try_catch_; }
   void SetHasTryCatch(bool value) { has_try_catch_ = value; }
 
+  bool HasIrreducibleLoops() const { return has_irreducible_loops_; }
+  void SetHasIrreducibleLoops(bool value) { has_irreducible_loops_ = value; }
+
   ArtMethod* GetArtMethod() const { return art_method_; }
   void SetArtMethod(ArtMethod* method) { art_method_ = method; }
 
@@ -491,7 +493,6 @@
   HInstruction* InsertOppositeCondition(HInstruction* cond, HInstruction* cursor);
 
  private:
-  void FindBackEdges(ArenaBitVector* visited);
   void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
   void RemoveDeadBlocks(const ArenaBitVector& visited);
 
@@ -558,6 +559,9 @@
   // try/catch-related passes if false.
   bool has_try_catch_;
 
+  // Flag whether there are any irreducible loops in the graph.
+  bool has_irreducible_loops_;
+
   // Indicates whether the graph should be compiled in a way that
   // ensures full debuggability. If false, we can apply more
   // aggressive optimizations that may limit the level of debugging.
@@ -613,12 +617,17 @@
   HLoopInformation(HBasicBlock* header, HGraph* graph)
       : header_(header),
         suspend_check_(nullptr),
+        irreducible_(false),
         back_edges_(graph->GetArena()->Adapter(kArenaAllocLoopInfoBackEdges)),
         // Make bit vector growable, as the number of blocks may change.
         blocks_(graph->GetArena(), graph->GetBlocks().size(), true) {
     back_edges_.reserve(kDefaultNumberOfBackEdges);
   }
 
+  bool IsIrreducible() const { return irreducible_; }
+
+  void Dump(std::ostream& os);
+
   HBasicBlock* GetHeader() const {
     return header_;
   }
@@ -661,15 +670,8 @@
     ReplaceElement(back_edges_, existing, new_back_edge);
   }
 
-  // Finds blocks that are part of this loop. Returns whether the loop is a natural loop,
-  // that is the header dominates the back edge.
-  bool Populate();
-
-  // Reanalyzes the loop by removing loop info from its blocks and re-running
-  // Populate(). If there are no back edges left, the loop info is completely
-  // removed as well as its SuspendCheck instruction. It must be run on nested
-  // inner loops first.
-  void Update();
+  // Finds blocks that are part of this loop.
+  void Populate();
 
   // Returns whether this loop information contains `block`.
   // Note that this loop information *must* be populated before entering this function.
@@ -690,9 +692,11 @@
  private:
   // Internal recursive implementation of `Populate`.
   void PopulateRecursive(HBasicBlock* block);
+  void PopulateIrreducibleRecursive(HBasicBlock* block);
 
   HBasicBlock* header_;
   HSuspendCheck* suspend_check_;
+  bool irreducible_;
   ArenaVector<HBasicBlock*> back_edges_;
   ArenaBitVector blocks_;
 
@@ -856,6 +860,8 @@
   HInstruction* GetLastPhi() const { return phis_.last_instruction_; }
   const HInstructionList& GetPhis() const { return phis_; }
 
+  HInstruction* GetFirstInstructionDisregardMoves() const;
+
   void AddSuccessor(HBasicBlock* block) {
     successors_.push_back(block);
     block->predecessors_.push_back(this);
@@ -1019,6 +1025,11 @@
     return GetPredecessors()[0] == GetLoopInformation()->GetPreHeader();
   }
 
+  bool IsFirstPredecessorBackEdge() const {
+    DCHECK(IsLoopHeader());
+    return GetLoopInformation()->IsBackEdge(*GetPredecessors()[0]);
+  }
+
   HLoopInformation* GetLoopInformation() const {
     return loop_information_;
   }
@@ -1156,7 +1167,6 @@
   M(DoubleConstant, Constant)                                           \
   M(Equal, Condition)                                                   \
   M(Exit, Instruction)                                                  \
-  M(FakeString, Instruction)                                            \
   M(FloatConstant, Constant)                                            \
   M(Goto, Instruction)                                                  \
   M(GreaterThan, Condition)                                             \
@@ -1831,7 +1841,10 @@
   void SetBlock(HBasicBlock* block) { block_ = block; }
   bool IsInBlock() const { return block_ != nullptr; }
   bool IsInLoop() const { return block_->IsInLoop(); }
-  bool IsLoopHeaderPhi() { return IsPhi() && block_->IsLoopHeader(); }
+  bool IsLoopHeaderPhi() const { return IsPhi() && block_->IsLoopHeader(); }
+  bool IsIrreducibleLoopHeaderPhi() const {
+    return IsLoopHeaderPhi() && GetBlock()->GetLoopInformation()->IsIrreducible();
+  }
 
   virtual size_t InputCount() const = 0;
   HInstruction* InputAt(size_t i) const { return InputRecordAt(i).GetInstruction(); }
@@ -1868,6 +1881,10 @@
     return false;
   }
 
+  virtual bool IsActualObject() const {
+    return GetType() == Primitive::kPrimNot;
+  }
+
   void SetReferenceTypeInfo(ReferenceTypeInfo rti);
 
   ReferenceTypeInfo GetReferenceTypeInfo() const {
@@ -2487,8 +2504,10 @@
 // Deoptimize to interpreter, upon checking a condition.
 class HDeoptimize : public HTemplateInstruction<1> {
  public:
+  // We set CanTriggerGC to prevent any intermediate address to be live
+  // at the point of the `HDeoptimize`.
   HDeoptimize(HInstruction* cond, uint32_t dex_pc)
-      : HTemplateInstruction(SideEffects::None(), dex_pc) {
+      : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) {
     SetRawInputAt(0, cond);
   }
 
@@ -3246,6 +3265,61 @@
   DISALLOW_COPY_AND_ASSIGN(HDoubleConstant);
 };
 
+class HNewInstance : public HExpression<2> {
+ public:
+  HNewInstance(HInstruction* cls,
+               HCurrentMethod* current_method,
+               uint32_t dex_pc,
+               uint16_t type_index,
+               const DexFile& dex_file,
+               bool can_throw,
+               bool finalizable,
+               QuickEntrypointEnum entrypoint)
+      : HExpression(Primitive::kPrimNot, SideEffects::CanTriggerGC(), dex_pc),
+        type_index_(type_index),
+        dex_file_(dex_file),
+        can_throw_(can_throw),
+        finalizable_(finalizable),
+        entrypoint_(entrypoint) {
+    SetRawInputAt(0, cls);
+    SetRawInputAt(1, current_method);
+  }
+
+  uint16_t GetTypeIndex() const { return type_index_; }
+  const DexFile& GetDexFile() const { return dex_file_; }
+
+  // Calls runtime so needs an environment.
+  bool NeedsEnvironment() const OVERRIDE { return true; }
+
+  // It may throw when called on type that's not instantiable/accessible.
+  // It can throw OOME.
+  // TODO: distinguish between the two cases so we can for example allow allocation elimination.
+  bool CanThrow() const OVERRIDE { return can_throw_ || true; }
+
+  bool IsFinalizable() const { return finalizable_; }
+
+  bool CanBeNull() const OVERRIDE { return false; }
+
+  QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; }
+
+  void SetEntrypoint(QuickEntrypointEnum entrypoint) {
+    entrypoint_ = entrypoint;
+  }
+
+  bool IsStringAlloc() const;
+
+  DECLARE_INSTRUCTION(NewInstance);
+
+ private:
+  const uint16_t type_index_;
+  const DexFile& dex_file_;
+  const bool can_throw_;
+  const bool finalizable_;
+  QuickEntrypointEnum entrypoint_;
+
+  DISALLOW_COPY_AND_ASSIGN(HNewInstance);
+};
+
 enum class Intrinsics {
 #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   k ## Name,
@@ -3539,10 +3613,9 @@
 
   // Get the index of the special input, if any.
   //
-  // If the invoke IsStringInit(), it initially has a HFakeString special argument
-  // which is removed by the instruction simplifier; if the invoke HasCurrentMethodInput(),
-  // the "special input" is the current method pointer; otherwise there may be one
-  // platform-specific special input, such as PC-relative addressing base.
+  // If the invoke HasCurrentMethodInput(), the "special input" is the current
+  // method pointer; otherwise there may be one platform-specific special input,
+  // such as PC-relative addressing base.
   uint32_t GetSpecialInputIndex() const { return GetNumberOfArguments(); }
 
   InvokeType GetOptimizedInvokeType() const { return optimized_invoke_type_; }
@@ -3616,20 +3689,18 @@
     DCHECK(!IsStaticWithExplicitClinitCheck());
   }
 
-  bool IsStringFactoryFor(HFakeString* str) const {
-    if (!IsStringInit()) return false;
-    DCHECK(!HasCurrentMethodInput());
-    if (InputCount() == (number_of_arguments_)) return false;
-    return InputAt(InputCount() - 1)->AsFakeString() == str;
+  HNewInstance* GetThisArgumentOfStringInit() const {
+    DCHECK(IsStringInit());
+    size_t index = InputCount() - 1;
+    DCHECK(InputAt(index)->IsNewInstance());
+    return InputAt(index)->AsNewInstance();
   }
 
-  void RemoveFakeStringArgumentAsLastInput() {
+  void RemoveThisArgumentOfStringInit() {
     DCHECK(IsStringInit());
-    size_t last_input_index = InputCount() - 1;
-    HInstruction* last_input = InputAt(last_input_index);
-    DCHECK(last_input != nullptr);
-    DCHECK(last_input->IsFakeString()) << last_input->DebugName();
-    RemoveAsUserOfInput(last_input_index);
+    size_t index = InputCount() - 1;
+    DCHECK(InputAt(index)->IsNewInstance());
+    RemoveAsUserOfInput(index);
     inputs_.pop_back();
   }
 
@@ -3737,59 +3808,6 @@
   DISALLOW_COPY_AND_ASSIGN(HInvokeInterface);
 };
 
-class HNewInstance : public HExpression<2> {
- public:
-  HNewInstance(HInstruction* cls,
-               HCurrentMethod* current_method,
-               uint32_t dex_pc,
-               uint16_t type_index,
-               const DexFile& dex_file,
-               bool can_throw,
-               bool finalizable,
-               QuickEntrypointEnum entrypoint)
-      : HExpression(Primitive::kPrimNot, SideEffects::CanTriggerGC(), dex_pc),
-        type_index_(type_index),
-        dex_file_(dex_file),
-        can_throw_(can_throw),
-        finalizable_(finalizable),
-        entrypoint_(entrypoint) {
-    SetRawInputAt(0, cls);
-    SetRawInputAt(1, current_method);
-  }
-
-  uint16_t GetTypeIndex() const { return type_index_; }
-  const DexFile& GetDexFile() const { return dex_file_; }
-
-  // Calls runtime so needs an environment.
-  bool NeedsEnvironment() const OVERRIDE { return true; }
-
-  // It may throw when called on type that's not instantiable/accessible.
-  // It can throw OOME.
-  // TODO: distinguish between the two cases so we can for example allow allocation elimination.
-  bool CanThrow() const OVERRIDE { return can_throw_ || true; }
-
-  bool IsFinalizable() const { return finalizable_; }
-
-  bool CanBeNull() const OVERRIDE { return false; }
-
-  QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; }
-
-  void SetEntrypoint(QuickEntrypointEnum entrypoint) {
-    entrypoint_ = entrypoint;
-  }
-
-  DECLARE_INSTRUCTION(NewInstance);
-
- private:
-  const uint16_t type_index_;
-  const DexFile& dex_file_;
-  const bool can_throw_;
-  const bool finalizable_;
-  QuickEntrypointEnum entrypoint_;
-
-  DISALLOW_COPY_AND_ASSIGN(HNewInstance);
-};
-
 class HNeg : public HUnaryOperation {
  public:
   HNeg(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc)
@@ -4004,8 +4022,10 @@
 
 class HDivZeroCheck : public HExpression<1> {
  public:
+  // `HDivZeroCheck` can trigger GC, as it may call the `ArithmeticException`
+  // constructor.
   HDivZeroCheck(HInstruction* value, uint32_t dex_pc)
-      : HExpression(value->GetType(), SideEffects::None(), dex_pc) {
+      : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
     SetRawInputAt(0, value);
   }
 
@@ -4526,8 +4546,10 @@
 
 class HNullCheck : public HExpression<1> {
  public:
+  // `HNullCheck` can trigger GC, as it may call the `NullPointerException`
+  // constructor.
   HNullCheck(HInstruction* value, uint32_t dex_pc)
-      : HExpression(value->GetType(), SideEffects::None(), dex_pc) {
+      : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
     SetRawInputAt(0, value);
   }
 
@@ -4848,8 +4870,10 @@
 
 class HBoundsCheck : public HExpression<2> {
  public:
+  // `HBoundsCheck` can trigger GC, as it may call the `IndexOutOfBoundsException`
+  // constructor.
   HBoundsCheck(HInstruction* index, HInstruction* length, uint32_t dex_pc)
-      : HExpression(index->GetType(), SideEffects::None(), dex_pc) {
+      : HExpression(index->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
     DCHECK(index->GetType() == Primitive::kPrimInt);
     SetRawInputAt(0, index);
     SetRawInputAt(1, length);
@@ -5564,26 +5588,6 @@
   DISALLOW_COPY_AND_ASSIGN(HMonitorOperation);
 };
 
-/**
- * A HInstruction used as a marker for the replacement of new + <init>
- * of a String to a call to a StringFactory. Only baseline will see
- * the node at code generation, where it will be be treated as null.
- * When compiling non-baseline, `HFakeString` instructions are being removed
- * in the instruction simplifier.
- */
-class HFakeString : public HTemplateInstruction<0> {
- public:
-  explicit HFakeString(uint32_t dex_pc = kNoDexPc)
-      : HTemplateInstruction(SideEffects::None(), dex_pc) {}
-
-  Primitive::Type GetType() const OVERRIDE { return Primitive::kPrimNot; }
-
-  DECLARE_INSTRUCTION(FakeString);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(HFakeString);
-};
-
 class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> {
  public:
   MoveOperands(Location source,
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 18405f2..445cdab 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -107,6 +107,7 @@
 
   bool CanBeMoved() const OVERRIDE { return true; }
   bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; }
+  bool IsActualObject() const OVERRIDE { return false; }
 
   HInstruction* GetBaseAddress() const { return InputAt(0); }
   HInstruction* GetOffset() const { return InputAt(1); }
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 3eb7274..fffd005 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -17,6 +17,7 @@
 #include "optimizing_compiler.h"
 
 #include <fstream>
+#include <memory>
 #include <stdint.h>
 
 #ifdef ART_ENABLE_CODEGEN_arm64
@@ -52,6 +53,8 @@
 #include "driver/compiler_driver-inl.h"
 #include "driver/compiler_options.h"
 #include "driver/dex_compilation_unit.h"
+#include "dwarf/method_debug_info.h"
+#include "elf_writer_debug.h"
 #include "elf_writer_quick.h"
 #include "graph_checker.h"
 #include "graph_visualizer.h"
@@ -60,6 +63,7 @@
 #include "inliner.h"
 #include "instruction_simplifier.h"
 #include "intrinsics.h"
+#include "jit/debugger_interface.h"
 #include "jit/jit_code_cache.h"
 #include "licm.h"
 #include "jni/quick/jni_compiler.h"
@@ -68,6 +72,7 @@
 #include "prepare_for_register_allocation.h"
 #include "reference_type_propagation.h"
 #include "register_allocator.h"
+#include "oat_quick_method_header.h"
 #include "sharpening.h"
 #include "side_effects_analysis.h"
 #include "ssa_builder.h"
@@ -122,7 +127,7 @@
         timing_logger_enabled_(compiler_driver->GetDumpPasses()),
         timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true),
         disasm_info_(graph->GetArena()),
-        visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()),
+        visualizer_enabled_(!compiler_driver->GetCompilerOptions().GetDumpCfgFileName().empty()),
         visualizer_(visualizer_output, graph, *codegen),
         graph_in_bad_state_(false) {
     if (timing_logger_enabled_ || visualizer_enabled_) {
@@ -300,30 +305,19 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
-  // Whether we should run any optimization or register allocation. If false, will
-  // just run the code generation after the graph was built.
-  const bool run_optimizations_;
-
   // Create a 'CompiledMethod' for an optimized graph.
-  CompiledMethod* EmitOptimized(ArenaAllocator* arena,
-                                CodeVectorAllocator* code_allocator,
-                                CodeGenerator* codegen,
-                                CompilerDriver* driver) const;
-
-  // Create a 'CompiledMethod' for a non-optimized graph.
-  CompiledMethod* EmitBaseline(ArenaAllocator* arena,
-                               CodeVectorAllocator* code_allocator,
-                               CodeGenerator* codegen,
-                               CompilerDriver* driver) const;
+  CompiledMethod* Emit(ArenaAllocator* arena,
+                       CodeVectorAllocator* code_allocator,
+                       CodeGenerator* codegen,
+                       CompilerDriver* driver) const;
 
   // Try compiling a method and return the code generator used for
   // compiling it.
   // This method:
   // 1) Builds the graph. Returns null if it failed to build it.
-  // 2) If `run_optimizations_` is set:
-  //    2.1) Transform the graph to SSA. Returns null if it failed.
-  //    2.2) Run optimizations on the graph, including register allocator.
-  // 3) Generate code with the `code_allocator` provided.
+  // 2) Transforms the graph to SSA. Returns null if it failed.
+  // 3) Runs optimizations on the graph, including register allocator.
+  // 4) Generates code with the `code_allocator` provided.
   CodeGenerator* TryCompile(ArenaAllocator* arena,
                             CodeVectorAllocator* code_allocator,
                             const DexFile::CodeItem* code_item,
@@ -345,21 +339,19 @@
 static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */
 
 OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver)
-    : Compiler(driver, kMaximumCompilationTimeBeforeWarning),
-      run_optimizations_(
-          driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime) {}
+    : Compiler(driver, kMaximumCompilationTimeBeforeWarning) {}
 
 void OptimizingCompiler::Init() {
   // Enable C1visualizer output. Must be done in Init() because the compiler
   // driver is not fully initialized when passed to the compiler's constructor.
   CompilerDriver* driver = GetCompilerDriver();
-  const std::string cfg_file_name = driver->GetDumpCfgFileName();
+  const std::string cfg_file_name = driver->GetCompilerOptions().GetDumpCfgFileName();
   if (!cfg_file_name.empty()) {
     CHECK_EQ(driver->GetThreadCount(), 1U)
       << "Graph visualizer requires the compiler to run single-threaded. "
       << "Invoke the compiler with '-j1'.";
     std::ios_base::openmode cfg_file_mode =
-        driver->GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out;
+        driver->GetCompilerOptions().GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out;
     visualizer_output_.reset(new std::ofstream(cfg_file_name, cfg_file_mode));
   }
   if (driver->GetDumpStats()) {
@@ -572,17 +564,6 @@
   AllocateRegisters(graph, codegen, pass_observer);
 }
 
-// The stack map we generate must be 4-byte aligned on ARM. Since existing
-// maps are generated alongside these stack maps, we must also align them.
-static ArrayRef<const uint8_t> AlignVectorSize(ArenaVector<uint8_t>& vector) {
-  size_t size = vector.size();
-  size_t aligned_size = RoundUp(size, 4);
-  for (; size < aligned_size; ++size) {
-    vector.push_back(0);
-  }
-  return ArrayRef<const uint8_t>(vector);
-}
-
 static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) {
   ArenaVector<LinkerPatch> linker_patches(codegen->GetGraph()->GetArena()->Adapter());
   codegen->EmitLinkerPatches(&linker_patches);
@@ -596,10 +577,10 @@
   return linker_patches;
 }
 
-CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena,
-                                                  CodeVectorAllocator* code_allocator,
-                                                  CodeGenerator* codegen,
-                                                  CompilerDriver* compiler_driver) const {
+CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena,
+                                         CodeVectorAllocator* code_allocator,
+                                         CodeGenerator* codegen,
+                                         CompilerDriver* compiler_driver) const {
   ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
   ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps));
   stack_map.resize(codegen->ComputeStackMapsSize());
@@ -625,39 +606,6 @@
   return compiled_method;
 }
 
-CompiledMethod* OptimizingCompiler::EmitBaseline(
-    ArenaAllocator* arena,
-    CodeVectorAllocator* code_allocator,
-    CodeGenerator* codegen,
-    CompilerDriver* compiler_driver) const {
-  ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
-
-  ArenaVector<uint8_t> mapping_table(arena->Adapter(kArenaAllocBaselineMaps));
-  codegen->BuildMappingTable(&mapping_table);
-  ArenaVector<uint8_t> vmap_table(arena->Adapter(kArenaAllocBaselineMaps));
-  codegen->BuildVMapTable(&vmap_table);
-  ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps));
-  codegen->BuildNativeGCMap(&gc_map, *compiler_driver);
-
-  CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
-      compiler_driver,
-      codegen->GetInstructionSet(),
-      ArrayRef<const uint8_t>(code_allocator->GetMemory()),
-      // Follow Quick's behavior and set the frame size to zero if it is
-      // considered "empty" (see the definition of
-      // art::CodeGenerator::HasEmptyFrame).
-      codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
-      codegen->GetCoreSpillMask(),
-      codegen->GetFpuSpillMask(),
-      ArrayRef<const SrcMapElem>(),
-      AlignVectorSize(mapping_table),
-      AlignVectorSize(vmap_table),
-      AlignVectorSize(gc_map),
-      ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
-      ArrayRef<const LinkerPatch>(linker_patches));
-  return compiled_method;
-}
-
 CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
                                               CodeVectorAllocator* code_allocator,
                                               const DexFile::CodeItem* code_item,
@@ -770,44 +718,37 @@
 
   VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName();
 
-  if (run_optimizations_) {
-    ScopedObjectAccess soa(Thread::Current());
-    StackHandleScopeCollection handles(soa.Self());
-    ScopedThreadSuspension sts(soa.Self(), kNative);
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScopeCollection handles(soa.Self());
+  ScopedThreadSuspension sts(soa.Self(), kNative);
 
-    {
-      PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
-      BuildSsaResult result = graph->TryBuildingSsa(&handles);
-      if (result != kBuildSsaSuccess) {
-        switch (result) {
-          case kBuildSsaFailNonNaturalLoop:
-            MaybeRecordStat(MethodCompilationStat::kNotCompiledNonNaturalLoop);
-            break;
-          case kBuildSsaFailThrowCatchLoop:
-            MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
-            break;
-          case kBuildSsaFailAmbiguousArrayOp:
-            MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
-            break;
-          case kBuildSsaSuccess:
-            UNREACHABLE();
-        }
-        pass_observer.SetGraphInBadState();
-        return nullptr;
+  {
+    PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
+    GraphAnalysisResult result = graph->TryBuildingSsa(&handles);
+    if (result != kAnalysisSuccess) {
+      switch (result) {
+        case kAnalysisFailThrowCatchLoop:
+          MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
+          break;
+        case kAnalysisFailAmbiguousArrayOp:
+          MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
+          break;
+        case kAnalysisSuccess:
+          UNREACHABLE();
       }
+      pass_observer.SetGraphInBadState();
+      return nullptr;
     }
-
-    RunOptimizations(graph,
-                     codegen.get(),
-                     compiler_driver,
-                     compilation_stats_.get(),
-                     dex_compilation_unit,
-                     &pass_observer,
-                     &handles);
-    codegen->CompileOptimized(code_allocator);
-  } else {
-    codegen->CompileBaseline(code_allocator);
   }
+
+  RunOptimizations(graph,
+                   codegen.get(),
+                   compiler_driver,
+                   compilation_stats_.get(),
+                   dex_compilation_unit,
+                   &pass_observer,
+                   &handles);
+  codegen->Compile(code_allocator);
   pass_observer.DumpDisassembly();
 
   if (kArenaAllocatorCountAllocations) {
@@ -859,11 +800,7 @@
                    dex_cache));
     if (codegen.get() != nullptr) {
       MaybeRecordStat(MethodCompilationStat::kCompiled);
-      if (run_optimizations_) {
-        method = EmitOptimized(&arena, &code_allocator, codegen.get(), compiler_driver);
-      } else {
-        method = EmitBaseline(&arena, &code_allocator, codegen.get(), compiler_driver);
-      }
+      method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver);
     }
   } else {
     if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
@@ -926,8 +863,6 @@
   {
     // Go to native so that we don't block GC during compilation.
     ScopedThreadSuspension sts(self, kNative);
-
-    DCHECK(run_optimizations_);
     codegen.reset(
         TryCompile(&arena,
                    &code_allocator,
@@ -968,6 +903,39 @@
     return false;
   }
 
+  if (GetCompilerDriver()->GetCompilerOptions().GetGenerateDebugInfo()) {
+    const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code);
+    const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode());
+    CompiledMethod compiled_method(
+        GetCompilerDriver(),
+        codegen->GetInstructionSet(),
+        ArrayRef<const uint8_t>(code_allocator.GetMemory()),
+        codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
+        codegen->GetCoreSpillMask(),
+        codegen->GetFpuSpillMask(),
+        ArrayRef<const SrcMapElem>(),
+        ArrayRef<const uint8_t>(),  // mapping_table.
+        ArrayRef<const uint8_t>(stack_map_data, stack_map_size),
+        ArrayRef<const uint8_t>(),  // native_gc_map.
+        ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
+        ArrayRef<const LinkerPatch>());
+    dwarf::MethodDebugInfo method_debug_info {
+        dex_file,
+        class_def_idx,
+        method_idx,
+        access_flags,
+        code_item,
+        false,  // deduped.
+        code_address,
+        code_address + code_allocator.GetSize(),
+        &compiled_method
+    };
+    ArrayRef<const uint8_t> elf_file = dwarf::WriteDebugElfFileForMethod(method_debug_info);
+    CreateJITCodeEntryForAddress(code_address,
+                                 std::unique_ptr<const uint8_t[]>(elf_file.data()),
+                                 elf_file.size());
+  }
+
   return true;
 }
 
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index bca1632..f8035aa 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -38,7 +38,6 @@
   kRemovedDeadInstruction,
   kRemovedNullCheck,
   kNotCompiledBranchOutsideMethodCode,
-  kNotCompiledNonNaturalLoop,
   kNotCompiledThrowCatchLoop,
   kNotCompiledAmbiguousArrayOp,
   kNotCompiledHugeMethod,
@@ -106,7 +105,6 @@
       case kRemovedDeadInstruction: name = "RemovedDeadInstruction"; break;
       case kRemovedNullCheck: name = "RemovedNullCheck"; break;
       case kNotCompiledBranchOutsideMethodCode: name = "NotCompiledBranchOutsideMethodCode"; break;
-      case kNotCompiledNonNaturalLoop : name = "NotCompiledNonNaturalLoop"; break;
       case kNotCompiledThrowCatchLoop : name = "NotCompiledThrowCatchLoop"; break;
       case kNotCompiledAmbiguousArrayOp : name = "NotCompiledAmbiguousArrayOp"; break;
       case kNotCompiledHugeMethod : name = "NotCompiledHugeMethod"; break;
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index af3a005..5a91043 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -117,7 +117,7 @@
 inline void TransformToSsa(HGraph* graph) {
   ScopedObjectAccess soa(Thread::Current());
   StackHandleScopeCollection handles(soa.Self());
-  EXPECT_EQ(graph->TryBuildingSsa(&handles), kBuildSsaSuccess);
+  EXPECT_EQ(graph->TryBuildingSsa(&handles), kAnalysisSuccess);
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 9d136f3..be470cc 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -504,7 +504,7 @@
 void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) {
   // This function is used to reduce the dependencies in the graph after
   // (from -> to) has been performed. Since we ensure there is no move with the same
-  // destination, (to -> X) can not be blocked while (from -> X) might still be
+  // destination, (to -> X) cannot be blocked while (from -> X) might still be
   // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After
   // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is
   // a dependency between the two. If we update the source location from 1 to 2, we
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index a385448..1394dfa 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -145,6 +145,11 @@
 };
 
 void PcRelativeFixups::Run() {
+  if (graph_->HasIrreducibleLoops()) {
+    // Do not run this optimization, as irreducible loops do not work with an instruction
+    // that can be live-in at the irreducible loop header.
+    return;
+  }
   PCRelativeHandlerVisitor visitor(graph_);
   visitor.VisitInsertionOrder();
   visitor.MoveBaseIfNeeded();
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 1c25e48..527c242 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -58,7 +58,6 @@
   void VisitCheckCast(HCheckCast* instr) OVERRIDE;
   void VisitBoundType(HBoundType* instr) OVERRIDE;
   void VisitNullCheck(HNullCheck* instr) OVERRIDE;
-  void VisitFakeString(HFakeString* instr) OVERRIDE;
   void UpdateReferenceTypeInfo(HInstruction* instr,
                                uint16_t type_idx,
                                const DexFile& dex_file,
@@ -568,10 +567,6 @@
   }
 }
 
-void RTPVisitor::VisitFakeString(HFakeString* instr) {
-  instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(string_class_handle_, /* is_exact */ true));
-}
-
 void RTPVisitor::VisitBoundType(HBoundType* instr) {
   ScopedObjectAccess soa(Thread::Current());
 
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index d399bc2..a966b62 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -72,8 +72,7 @@
   float_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
   double_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
 
-  static constexpr bool kIsBaseline = false;
-  codegen->SetupBlockedRegisters(kIsBaseline);
+  codegen->SetupBlockedRegisters();
   physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr);
   physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr);
   // Always reserve for the current method and the graph's max out registers.
@@ -179,9 +178,11 @@
       ProcessInstruction(inst_it.Current());
     }
 
-    if (block->IsCatchBlock()) {
-      // By blocking all registers at the top of each catch block, we force
-      // intervals used after catch to spill.
+    if (block->IsCatchBlock() ||
+        (block->GetLoopInformation() != nullptr && block->GetLoopInformation()->IsIrreducible())) {
+      // By blocking all registers at the top of each catch block or irreducible loop, we force
+      // intervals belonging to the live-in set of the catch/header block to be spilled.
+      // TODO(ngeoffray): Phis in this block could be allocated in register.
       size_t position = block->GetLifetimeStart();
       BlockRegisters(position, position + 1);
     }
@@ -1677,6 +1678,9 @@
 
       LocationSummary* locations = safepoint_position->GetLocations();
       if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) {
+        DCHECK(interval->GetDefinedBy()->IsActualObject())
+            << interval->GetDefinedBy()->DebugName()
+            << "@" << safepoint_position->GetInstruction()->DebugName();
         locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
       }
 
@@ -1689,6 +1693,9 @@
                       maximum_number_of_live_fp_registers_);
           }
           if (current->GetType() == Primitive::kPrimNot) {
+            DCHECK(interval->GetDefinedBy()->IsActualObject())
+                << interval->GetDefinedBy()->DebugName()
+                << "@" << safepoint_position->GetInstruction()->DebugName();
             locations->SetRegisterBit(source.reg());
           }
           break;
@@ -1864,8 +1871,10 @@
   // Resolve non-linear control flow across branches. Order does not matter.
   for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
-    if (block->IsCatchBlock()) {
-      // Instructions live at the top of catch blocks were forced to spill.
+    if (block->IsCatchBlock() ||
+        (block->GetLoopInformation() != nullptr && block->GetLoopInformation()->IsIrreducible())) {
+      // Instructions live at the top of catch blocks or irreducible loop header
+      // were forced to spill.
       if (kIsDebugBuild) {
         BitVector* live = liveness_.GetLiveInSet(*block);
         for (uint32_t idx : live->Indexes()) {
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 306a457..572faa8 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -535,7 +535,7 @@
   (*phi)->AddInput(*input2);
 
   graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
+  graph->AnalyzeLoops();
   return graph;
 }
 
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index f6bab8e..c0011cd 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -68,7 +68,7 @@
       // should be replaced with a null constant.
       // Both type propagation and redundant phi elimination ensure `int_operand`
       // can only be the 0 constant.
-      DCHECK(int_operand->IsIntConstant());
+      DCHECK(int_operand->IsIntConstant()) << int_operand->DebugName();
       DCHECK_EQ(0, int_operand->AsIntConstant()->GetValue());
       equality_instr->ReplaceInput(GetGraph()->GetNullConstant(), int_operand == right ? 1 : 0);
     }
@@ -422,7 +422,35 @@
   return true;
 }
 
-BuildSsaResult SsaBuilder::BuildSsa() {
+void SsaBuilder::RemoveRedundantUninitializedStrings() {
+  if (GetGraph()->IsDebuggable()) {
+    // Do not perform the optimization for consistency with the interpreter
+    // which always allocates an object for new-instance of String.
+    return;
+  }
+
+  for (HNewInstance* new_instance : uninitialized_strings_) {
+    DCHECK(new_instance->IsStringAlloc());
+
+    // Replace NewInstance of String with NullConstant if not used prior to
+    // calling StringFactory. In case of deoptimization, the interpreter is
+    // expected to skip null check on the `this` argument of the StringFactory call.
+    if (!new_instance->HasNonEnvironmentUses()) {
+      new_instance->ReplaceWith(GetGraph()->GetNullConstant());
+      new_instance->GetBlock()->RemoveInstruction(new_instance);
+
+      // Remove LoadClass if not needed any more.
+      HLoadClass* load_class = new_instance->InputAt(0)->AsLoadClass();
+      DCHECK(load_class != nullptr);
+      DCHECK(!load_class->NeedsAccessCheck()) << "String class is always accessible";
+      if (!load_class->HasUses()) {
+        load_class->GetBlock()->RemoveInstruction(load_class);
+      }
+    }
+  }
+}
+
+GraphAnalysisResult SsaBuilder::BuildSsa() {
   // 1) Visit in reverse post order. We need to have all predecessors of a block
   // visited (with the exception of loops) in order to create the right environment
   // for that block. For loops, we create phis whose inputs will be set in 2).
@@ -462,7 +490,7 @@
   // computed the type of the array input, the ambiguity can be resolved and the
   // correct equivalents kept.
   if (!FixAmbiguousArrayOps()) {
-    return kBuildSsaFailAmbiguousArrayOp;
+    return kAnalysisFailAmbiguousArrayOp;
   }
 
   // 8) Mark dead phis. This will mark phis which are not used by instructions
@@ -487,7 +515,15 @@
   // input types.
   dead_phi_elimimation.EliminateDeadPhis();
 
-  // 11) Clear locals.
+  // 11) Step 1) replaced uses of NewInstances of String with the results of
+  // their corresponding StringFactory calls. Unless the String objects are used
+  // before they are initialized, they can be replaced with NullConstant.
+  // Note that this optimization is valid only if unsimplified code does not use
+  // the uninitialized value because we assume execution can be deoptimized at
+  // any safepoint. We must therefore perform it before any other optimizations.
+  RemoveRedundantUninitializedStrings();
+
+  // 12) Clear locals.
   for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
        !it.Done();
        it.Advance()) {
@@ -497,7 +533,7 @@
     }
   }
 
-  return kBuildSsaSuccess;
+  return kAnalysisSuccess;
 }
 
 ArenaVector<HInstruction*>* SsaBuilder::GetLocalsFor(HBasicBlock* block) {
@@ -885,4 +921,26 @@
   VisitInstruction(aset);
 }
 
+void SsaBuilder::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  VisitInstruction(invoke);
+
+  if (invoke->IsStringInit()) {
+    // This is a StringFactory call which acts as a String constructor. Its
+    // result replaces the empty String pre-allocated by NewInstance.
+    HNewInstance* new_instance = invoke->GetThisArgumentOfStringInit();
+    invoke->RemoveThisArgumentOfStringInit();
+
+    // Replacing the NewInstance might render it redundant. Keep a list of these
+    // to be visited once it is clear whether it is has remaining uses.
+    uninitialized_strings_.push_back(new_instance);
+
+    // Walk over all vregs and replace any occurrence of `new_instance` with `invoke`.
+    for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
+      if ((*current_locals_)[vreg] == new_instance) {
+        (*current_locals_)[vreg] = invoke;
+      }
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 0fcc3a1..ccef8ea 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -49,7 +49,7 @@
  */
 class SsaBuilder : public HGraphVisitor {
  public:
-  explicit SsaBuilder(HGraph* graph, StackHandleScopeCollection* handles)
+  SsaBuilder(HGraph* graph, StackHandleScopeCollection* handles)
       : HGraphVisitor(graph),
         handles_(handles),
         agets_fixed_(false),
@@ -57,26 +57,28 @@
         loop_headers_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
         ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
         ambiguous_asets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
+        uninitialized_strings_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
         locals_for_(graph->GetBlocks().size(),
                     ArenaVector<HInstruction*>(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
                     graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) {
     loop_headers_.reserve(kDefaultNumberOfLoops);
   }
 
-  BuildSsaResult BuildSsa();
+  GraphAnalysisResult BuildSsa();
 
   // Returns locals vector for `block`. If it is a catch block, the vector will be
   // prepopulated with catch phis for vregs which are defined in `current_locals_`.
   ArenaVector<HInstruction*>* GetLocalsFor(HBasicBlock* block);
   HInstruction* ValueOfLocal(HBasicBlock* block, size_t local);
 
-  void VisitBasicBlock(HBasicBlock* block);
-  void VisitLoadLocal(HLoadLocal* load);
-  void VisitStoreLocal(HStoreLocal* store);
-  void VisitInstruction(HInstruction* instruction);
-  void VisitTemporary(HTemporary* instruction);
-  void VisitArrayGet(HArrayGet* aget);
-  void VisitArraySet(HArraySet* aset);
+  void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
+  void VisitLoadLocal(HLoadLocal* load) OVERRIDE;
+  void VisitStoreLocal(HStoreLocal* store) OVERRIDE;
+  void VisitInstruction(HInstruction* instruction) OVERRIDE;
+  void VisitTemporary(HTemporary* instruction) OVERRIDE;
+  void VisitArrayGet(HArrayGet* aget) OVERRIDE;
+  void VisitArraySet(HArraySet* aset) OVERRIDE;
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
 
   static constexpr const char* kSsaBuilderPassName = "ssa_builder";
 
@@ -104,6 +106,8 @@
   HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
   HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget);
 
+  void RemoveRedundantUninitializedStrings();
+
   StackHandleScopeCollection* const handles_;
 
   // True if types of ambiguous ArrayGets have been resolved.
@@ -118,6 +122,7 @@
 
   ArenaVector<HArrayGet*> ambiguous_agets_;
   ArenaVector<HArraySet*> ambiguous_asets_;
+  ArenaVector<HNewInstance*> uninitialized_strings_;
 
   // HEnvironment for each block.
   ArenaVector<ArenaVector<HInstruction*>> locals_for_;
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index b9d8731..a5609fc 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -273,6 +273,18 @@
     }
 
     if (block->IsLoopHeader()) {
+      if (kIsDebugBuild && block->GetLoopInformation()->IsIrreducible()) {
+        // To satisfy our liveness algorithm, we need to ensure loop headers of
+        // irreducible loops do not have any live-in instructions, except constants
+        // and the current method, which can be trivially re-materialized.
+        for (uint32_t idx : live_in->Indexes()) {
+          HInstruction* instruction = GetInstructionFromSsaIndex(idx);
+          DCHECK(instruction->GetBlock()->IsEntryBlock()) << instruction->DebugName();
+          DCHECK(!instruction->IsParameterValue()) << instruction->DebugName();
+          DCHECK(instruction->IsCurrentMethod() || instruction->IsConstant())
+              << instruction->DebugName();
+        }
+      }
       size_t last_position = block->GetLoopInformation()->GetLifetimeEnd();
       // For all live_in instructions at the loop header, we need to create a range
       // that covers the full loop.
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index 2eef307..6816b6a 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -154,6 +154,7 @@
     cycle_worklist.push_back(phi);
     visited_phis_in_cycle.insert(phi->GetId());
     bool catch_phi_in_cycle = phi->IsCatchPhi();
+    bool irreducible_loop_phi_in_cycle = phi->IsIrreducibleLoopHeaderPhi();
 
     // First do a simple loop over inputs and check if they are all the same.
     for (size_t j = 0; j < phi->InputCount(); ++j) {
@@ -187,6 +188,7 @@
               cycle_worklist.push_back(input->AsPhi());
               visited_phis_in_cycle.insert(input->GetId());
               catch_phi_in_cycle |= input->AsPhi()->IsCatchPhi();
+              irreducible_loop_phi_in_cycle |= input->IsIrreducibleLoopHeaderPhi();
             } else {
               // Already visited, nothing to do.
             }
@@ -206,6 +208,15 @@
       continue;
     }
 
+    if (irreducible_loop_phi_in_cycle && !candidate->IsConstant()) {
+      // For irreducible loops, we need to keep the phis to satisfy our linear scan
+      // algorithm.
+      // There is one exception for constants, as the type propagation requires redundant
+      // cyclic phis of a constant to be removed. This is ok for the linear scan as it
+      // has to deal with constants anyway, and they can trivially be rematerialized.
+      continue;
+    }
+
     for (HPhi* current : cycle_worklist) {
       // The candidate may not dominate a phi in a catch block: there may be non-throwing
       // instructions at the beginning of a try range, that may be the first input of
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index c60a4ea..4784de1 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -270,7 +270,7 @@
       stack_map.SetStackMask(stack_map_encoding_, *entry.sp_mask);
     }
 
-    if (entry.num_dex_registers == 0) {
+    if (entry.num_dex_registers == 0 || (entry.live_dex_registers_mask->NumSetBits() == 0)) {
       // No dex map available.
       stack_map.SetDexRegisterMapOffset(stack_map_encoding_, StackMap::kNoDexRegisterMap);
     } else {
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 560502f..604787f 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -614,6 +614,10 @@
   stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
   stream.EndStackMapEntry();
 
+  number_of_dex_registers = 1;
+  stream.BeginStackMapEntry(1, 67, 0x4, &sp_mask, number_of_dex_registers, 0);
+  stream.EndStackMapEntry();
+
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
@@ -622,7 +626,7 @@
   CodeInfo code_info(region);
   StackMapEncoding encoding = code_info.ExtractEncoding();
   ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask());
-  ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
+  ASSERT_EQ(2u, code_info.GetNumberOfStackMaps());
 
   uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
   ASSERT_EQ(0u, number_of_location_catalog_entries);
@@ -638,6 +642,16 @@
 
   ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding));
   ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
+
+  stack_map = code_info.GetStackMapAt(1, encoding);
+  ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1, encoding)));
+  ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(67, encoding)));
+  ASSERT_EQ(1u, stack_map.GetDexPc(encoding));
+  ASSERT_EQ(67u, stack_map.GetNativePcOffset(encoding));
+  ASSERT_EQ(0x4u, stack_map.GetRegisterMask(encoding));
+
+  ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding));
+  ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
 }
 
 TEST(StackMapTest, InlineTest) {
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index b79c2f0..f96376d 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -501,6 +501,8 @@
 
   virtual void cmp(Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
+  // Note: CMN updates flags based on addition of its operands. Do not confuse
+  // the "N" suffix with bitwise inversion performed by MVN.
   virtual void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
   virtual void orr(Register rd, Register rn, const ShifterOperand& so,
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index f341030..52023a6 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -3428,10 +3428,10 @@
     CHECK(rn != IP);
     // If rd != rn, use rd as temp. This alows 16-bit ADD/SUB in more situations than using IP.
     Register temp = (rd != rn) ? rd : IP;
-    if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~value, set_cc, &shifter_op)) {
+    if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~value, kCcKeep, &shifter_op)) {
       mvn(temp, shifter_op, cond, kCcKeep);
       add(rd, rn, ShifterOperand(temp), cond, set_cc);
-    } else if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~(-value), set_cc, &shifter_op)) {
+    } else if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~(-value), kCcKeep, &shifter_op)) {
       mvn(temp, shifter_op, cond, kCcKeep);
       sub(rd, rn, ShifterOperand(temp), cond, set_cc);
     } else if (High16Bits(-value) == 0) {
@@ -3449,22 +3449,32 @@
 }
 
 void Thumb2Assembler::CmpConstant(Register rn, int32_t value, Condition cond) {
-  // We prefer to select the shorter code sequence rather than selecting add for
-  // positive values and sub for negatives ones, which would slightly improve
-  // the readability of generated code for some constants.
+  // We prefer to select the shorter code sequence rather than using plain cmp and cmn
+  // which would slightly improve the readability of generated code for some constants.
   ShifterOperand shifter_op;
   if (ShifterOperandCanHold(kNoRegister, rn, CMP, value, kCcSet, &shifter_op)) {
     cmp(rn, shifter_op, cond);
-  } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, ~value, kCcSet, &shifter_op)) {
+  } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, -value, kCcSet, &shifter_op)) {
     cmn(rn, shifter_op, cond);
   } else {
     CHECK(rn != IP);
-    movw(IP, Low16Bits(value), cond);
-    uint16_t value_high = High16Bits(value);
-    if (value_high != 0) {
-      movt(IP, value_high, cond);
+    if (ShifterOperandCanHold(IP, kNoRegister, MVN, ~value, kCcKeep, &shifter_op)) {
+      mvn(IP, shifter_op, cond, kCcKeep);
+      cmp(rn, ShifterOperand(IP), cond);
+    } else if (ShifterOperandCanHold(IP, kNoRegister, MVN, ~(-value), kCcKeep, &shifter_op)) {
+      mvn(IP, shifter_op, cond, kCcKeep);
+      cmn(rn, ShifterOperand(IP), cond);
+    } else if (High16Bits(-value) == 0) {
+      movw(IP, Low16Bits(-value), cond);
+      cmn(rn, ShifterOperand(IP), cond);
+    } else {
+      movw(IP, Low16Bits(value), cond);
+      uint16_t value_high = High16Bits(value);
+      if (value_high != 0) {
+        movt(IP, value_high, cond);
+      }
+      cmp(rn, ShifterOperand(IP), cond);
     }
-    cmp(rn, ShifterOperand(IP), cond);
   }
 }
 
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 0ef0dc1..2df9b17 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -1626,6 +1626,76 @@
   EmitAndCheck(&assembler, "AddConstant");
 }
 
+TEST(Thumb2AssemblerTest, CmpConstant) {
+  arm::Thumb2Assembler assembler;
+
+  __ CmpConstant(R0, 0);                              // 16-bit CMP.
+  __ CmpConstant(R1, 1);                              // 16-bit CMP.
+  __ CmpConstant(R0, 7);                              // 16-bit CMP.
+  __ CmpConstant(R1, 8);                              // 16-bit CMP.
+  __ CmpConstant(R0, 255);                            // 16-bit CMP.
+  __ CmpConstant(R1, 256);                            // 32-bit CMP.
+  __ CmpConstant(R0, 257);                            // MNV+CMN.
+  __ CmpConstant(R1, 0xfff);                          // MOVW+CMP.
+  __ CmpConstant(R0, 0x1000);                         // 32-bit CMP.
+  __ CmpConstant(R1, 0x1001);                         // MNV+CMN.
+  __ CmpConstant(R0, 0x1002);                         // MOVW+CMP.
+  __ CmpConstant(R1, 0xffff);                         // MOVW+CMP.
+  __ CmpConstant(R0, 0x10000);                        // 32-bit CMP.
+  __ CmpConstant(R1, 0x10001);                        // 32-bit CMP.
+  __ CmpConstant(R0, 0x10002);                        // MVN+CMN.
+  __ CmpConstant(R1, 0x10003);                        // MOVW+MOVT+CMP.
+  __ CmpConstant(R0, -1);                             // 32-bit CMP.
+  __ CmpConstant(R1, -7);                             // CMN.
+  __ CmpConstant(R0, -8);                             // CMN.
+  __ CmpConstant(R1, -255);                           // CMN.
+  __ CmpConstant(R0, -256);                           // CMN.
+  __ CmpConstant(R1, -257);                           // MNV+CMP.
+  __ CmpConstant(R0, -0xfff);                         // MOVW+CMN.
+  __ CmpConstant(R1, -0x1000);                        // CMN.
+  __ CmpConstant(R0, -0x1001);                        // MNV+CMP.
+  __ CmpConstant(R1, -0x1002);                        // MOVW+CMN.
+  __ CmpConstant(R0, -0xffff);                        // MOVW+CMN.
+  __ CmpConstant(R1, -0x10000);                       // CMN.
+  __ CmpConstant(R0, -0x10001);                       // CMN.
+  __ CmpConstant(R1, -0x10002);                       // MVN+CMP.
+  __ CmpConstant(R0, -0x10003);                       // MOVW+MOVT+CMP.
+
+  __ CmpConstant(R8, 0);                              // 32-bit CMP.
+  __ CmpConstant(R9, 1);                              // 32-bit CMP.
+  __ CmpConstant(R8, 7);                              // 32-bit CMP.
+  __ CmpConstant(R9, 8);                              // 32-bit CMP.
+  __ CmpConstant(R8, 255);                            // 32-bit CMP.
+  __ CmpConstant(R9, 256);                            // 32-bit CMP.
+  __ CmpConstant(R8, 257);                            // MNV+CMN
+  __ CmpConstant(R9, 0xfff);                          // MOVW+CMP.
+  __ CmpConstant(R8, 0x1000);                         // 32-bit CMP.
+  __ CmpConstant(R9, 0x1001);                         // MVN+CMN.
+  __ CmpConstant(R8, 0x1002);                         // MOVW+CMP.
+  __ CmpConstant(R9, 0xffff);                         // MOVW+CMP.
+  __ CmpConstant(R8, 0x10000);                        // 32-bit CMP.
+  __ CmpConstant(R9, 0x10001);                        // 32-bit CMP.
+  __ CmpConstant(R8, 0x10002);                        // MVN+CMN.
+  __ CmpConstant(R9, 0x10003);                        // MOVW+MOVT+CMP.
+  __ CmpConstant(R8, -1);                             // 32-bit CMP
+  __ CmpConstant(R9, -7);                             // CMN.
+  __ CmpConstant(R8, -8);                             // CMN.
+  __ CmpConstant(R9, -255);                           // CMN.
+  __ CmpConstant(R8, -256);                           // CMN.
+  __ CmpConstant(R9, -257);                           // MNV+CMP.
+  __ CmpConstant(R8, -0xfff);                         // MOVW+CMN.
+  __ CmpConstant(R9, -0x1000);                        // CMN.
+  __ CmpConstant(R8, -0x1001);                        // MVN+CMP.
+  __ CmpConstant(R9, -0x1002);                        // MOVW+CMN.
+  __ CmpConstant(R8, -0xffff);                        // MOVW+CMN.
+  __ CmpConstant(R9, -0x10000);                       // CMN.
+  __ CmpConstant(R8, -0x10001);                       // CMN.
+  __ CmpConstant(R9, -0x10002);                       // MVN+CMP.
+  __ CmpConstant(R8, -0x10003);                       // MOVW+MOVT+CMP.
+
+  EmitAndCheck(&assembler, "CmpConstant");
+}
+
 #undef __
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index f07f8c7..6736015 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -1,4 +1,4 @@
-const char* SimpleMovResults[] = {
+const char* const SimpleMovResults[] = {
   "   0:	0008      	movs	r0, r1\n",
   "   2:	4608      	mov	r0, r1\n",
   "   4:	46c8      	mov	r8, r9\n",
@@ -6,18 +6,18 @@
   "   8:	f04f 0809 	mov.w	r8, #9\n",
   nullptr
 };
-const char* SimpleMov32Results[] = {
+const char* const SimpleMov32Results[] = {
   "   0:	ea4f 0001 	mov.w	r0, r1\n",
   "   4:	ea4f 0809 	mov.w	r8, r9\n",
   nullptr
 };
-const char* SimpleMovAddResults[] = {
+const char* const SimpleMovAddResults[] = {
   "   0:	4608      	mov	r0, r1\n",
   "   2:	1888      	adds	r0, r1, r2\n",
   "   4:	1c08      	adds	r0, r1, #0\n",
   nullptr
 };
-const char* DataProcessingRegisterResults[] = {
+const char* const DataProcessingRegisterResults[] = {
   "   0:	ea6f 0001 	mvn.w	r0, r1\n",
   "   4:	eb01 0002 	add.w	r0, r1, r2\n",
   "   8:	eba1 0002 	sub.w	r0, r1, r2\n",
@@ -129,7 +129,7 @@
   " 120:	eb01 0c00 	add.w	ip, r1, r0\n",
   nullptr
 };
-const char* DataProcessingImmediateResults[] = {
+const char* const DataProcessingImmediateResults[] = {
   "   0:	2055      	movs	r0, #85	; 0x55\n",
   "   2:	f06f 0055 	mvn.w	r0, #85	; 0x55\n",
   "   6:	f101 0055 	add.w	r0, r1, #85	; 0x55\n",
@@ -154,7 +154,7 @@
   "  48:	1f48      	subs	r0, r1, #5\n",
   nullptr
 };
-const char* DataProcessingModifiedImmediateResults[] = {
+const char* const DataProcessingModifiedImmediateResults[] = {
   "   0:	f04f 1055 	mov.w	r0, #5570645	; 0x550055\n",
   "   4:	f06f 1055 	mvn.w	r0, #5570645	; 0x550055\n",
   "   8:	f101 1055 	add.w	r0, r1, #5570645	; 0x550055\n",
@@ -173,7 +173,7 @@
   "  3c:	f110 1f55 	cmn.w	r0, #5570645	; 0x550055\n",
   nullptr
 };
-const char* DataProcessingModifiedImmediatesResults[] = {
+const char* const DataProcessingModifiedImmediatesResults[] = {
   "   0:	f04f 1055 	mov.w	r0, #5570645	; 0x550055\n",
   "   4:	f04f 2055 	mov.w	r0, #1426085120	; 0x55005500\n",
   "   8:	f04f 3055 	mov.w	r0, #1431655765	; 0x55555555\n",
@@ -183,7 +183,7 @@
   "  18:	f44f 70d4 	mov.w	r0, #424	; 0x1a8\n",
   nullptr
 };
-const char* DataProcessingShiftedRegisterResults[] = {
+const char* const DataProcessingShiftedRegisterResults[] = {
   "   0:	0123      	lsls	r3, r4, #4\n",
   "   2:	0963      	lsrs	r3, r4, #5\n",
   "   4:	11a3      	asrs	r3, r4, #6\n",
@@ -201,7 +201,7 @@
   "  32:	ea5f 0834 	movs.w	r8, r4, rrx\n",
   nullptr
 };
-const char* ShiftImmediateResults[] = {
+const char* const ShiftImmediateResults[] = {
   "   0:  0123        lsls  r3, r4, #4\n",
   "   2:  0963        lsrs  r3, r4, #5\n",
   "   4:  11a3        asrs  r3, r4, #6\n",
@@ -219,7 +219,7 @@
   "  32:  ea5f 0834   movs.w  r8, r4, rrx\n",
   nullptr
 };
-const char* BasicLoadResults[] = {
+const char* const BasicLoadResults[] = {
   "   0:	69a3      	ldr	r3, [r4, #24]\n",
   "   2:	7e23      	ldrb	r3, [r4, #24]\n",
   "   4:	8b23      	ldrh	r3, [r4, #24]\n",
@@ -233,7 +233,7 @@
   "  20:	f9b4 8018 	ldrsh.w	r8, [r4, #24]\n",
   nullptr
 };
-const char* BasicStoreResults[] = {
+const char* const BasicStoreResults[] = {
   "   0:	61a3      	str	r3, [r4, #24]\n",
   "   2:	7623      	strb	r3, [r4, #24]\n",
   "   4:	8323      	strh	r3, [r4, #24]\n",
@@ -243,7 +243,7 @@
   "  10:	f8a4 8018 	strh.w	r8, [r4, #24]\n",
   nullptr
 };
-const char* ComplexLoadResults[] = {
+const char* const ComplexLoadResults[] = {
   "   0:	69a3      	ldr	r3, [r4, #24]\n",
   "   2:	f854 3f18 	ldr.w	r3, [r4, #24]!\n",
   "   6:	f854 3b18 	ldr.w	r3, [r4], #24\n",
@@ -276,7 +276,7 @@
   "  6e:	f934 3918 	ldrsh.w	r3, [r4], #-24\n",
   nullptr
 };
-const char* ComplexStoreResults[] = {
+const char* const ComplexStoreResults[] = {
   "   0:	61a3      	str	r3, [r4, #24]\n",
   "   2:	f844 3f18 	str.w	r3, [r4, #24]!\n",
   "   6:	f844 3b18 	str.w	r3, [r4], #24\n",
@@ -297,7 +297,7 @@
   "  3e:	f824 3918 	strh.w	r3, [r4], #-24\n",
   nullptr
 };
-const char* NegativeLoadStoreResults[] = {
+const char* const NegativeLoadStoreResults[] = {
   "   0:	f854 3c18 	ldr.w	r3, [r4, #-24]\n",
   "   4:	f854 3d18 	ldr.w	r3, [r4, #-24]!\n",
   "   8:	f854 3918 	ldr.w	r3, [r4], #-24\n",
@@ -348,12 +348,12 @@
   "  bc:	f824 3b18 	strh.w	r3, [r4], #24\n",
   nullptr
 };
-const char* SimpleLoadStoreDualResults[] = {
+const char* const SimpleLoadStoreDualResults[] = {
   "   0:	e9c0 2306 	strd	r2, r3, [r0, #24]\n",
   "   4:	e9d0 2306 	ldrd	r2, r3, [r0, #24]\n",
   nullptr
 };
-const char* ComplexLoadStoreDualResults[] = {
+const char* const ComplexLoadStoreDualResults[] = {
   "   0:	e9c0 2306 	strd	r2, r3, [r0, #24]\n",
   "   4:	e9e0 2306 	strd	r2, r3, [r0, #24]!\n",
   "   8:	e8e0 2306 	strd	r2, r3, [r0], #24\n",
@@ -368,7 +368,7 @@
   "  2c:	e870 2306 	ldrd	r2, r3, [r0], #-24\n",
   nullptr
 };
-const char* NegativeLoadStoreDualResults[] = {
+const char* const NegativeLoadStoreDualResults[] = {
   "   0:	e940 2306 	strd	r2, r3, [r0, #-24]\n",
   "   4:	e960 2306 	strd	r2, r3, [r0, #-24]!\n",
   "   8:	e860 2306 	strd	r2, r3, [r0], #-24\n",
@@ -383,7 +383,7 @@
   "  2c:	e8f0 2306 	ldrd	r2, r3, [r0], #24\n",
   nullptr
 };
-const char* SimpleBranchResults[] = {
+const char* const SimpleBranchResults[] = {
   "   0:	2002      	movs	r0, #2\n",
   "   2:	2101      	movs	r1, #1\n",
   "   4:	e7fd      	b.n	2 <SimpleBranch+0x2>\n",
@@ -403,7 +403,7 @@
   "  20:	2006      	movs	r0, #6\n",
   nullptr
 };
-const char* LongBranchResults[] = {
+const char* const LongBranchResults[] = {
   "   0:	f04f 0002 	mov.w	r0, #2\n",
   "   4:	f04f 0101 	mov.w	r1, #1\n",
   "   8:	f7ff bffc 	b.w	4 <LongBranch+0x4>\n",
@@ -423,14 +423,14 @@
   "  40:	f04f 0006 	mov.w	r0, #6\n",
   nullptr
 };
-const char* LoadMultipleResults[] = {
+const char* const LoadMultipleResults[] = {
   "   0:	cc09      	ldmia	r4!, {r0, r3}\n",
   "   2:	e934 4800 	ldmdb	r4!, {fp, lr}\n",
   "   6:	e914 4800 	ldmdb	r4, {fp, lr}\n",
   "   a:	f854 5b04 	ldr.w	r5, [r4], #4\n",
   nullptr
 };
-const char* StoreMultipleResults[] = {
+const char* const StoreMultipleResults[] = {
   "   0:	c409      	stmia	r4!, {r0, r3}\n",
   "   2:	e8a4 4800 	stmia.w	r4!, {fp, lr}\n",
   "   6:	e884 4800 	stmia.w	r4, {fp, lr}\n",
@@ -438,7 +438,7 @@
   "   e:	f844 5d04 	str.w	r5, [r4, #-4]!\n",
   nullptr
 };
-const char* MovWMovTResults[] = {
+const char* const MovWMovTResults[] = {
   "   0:	f240 0400 	movw  r4, #0\n",
   "   4:	f240 0434 	movw  r4, #52 ; 0x34\n",
   "   8:	f240 0934 	movw	r9, #52	; 0x34\n",
@@ -449,7 +449,7 @@
   "  1c:	f6cf 71ff 	movt	r1, #65535	; 0xffff\n",
   nullptr
 };
-const char* SpecialAddSubResults[] = {
+const char* const SpecialAddSubResults[] = {
   "   0:	aa14      	add	r2, sp, #80	; 0x50\n",
   "   2:	b014      	add	sp, #80		; 0x50\n",
   "   4:	f10d 0850 	add.w	r8, sp, #80	; 0x50\n",
@@ -463,7 +463,7 @@
   "  22:	f6ad 7dfc 	subw	sp, sp, #4092	; 0xffc\n",
   nullptr
 };
-const char* LoadFromOffsetResults[] = {
+const char* const LoadFromOffsetResults[] = {
   "   0:	68e2      	ldr	r2, [r4, #12]\n",
   "   2:	f8d4 2fff 	ldr.w	r2, [r4, #4095]	; 0xfff\n",
   "   6:	f504 5280 	add.w	r2, r4, #4096	; 0x1000\n",
@@ -514,7 +514,7 @@
   "  9e:	f9b4 200c 	ldrsh.w	r2, [r4, #12]\n",
   nullptr
 };
-const char* StoreToOffsetResults[] = {
+const char* const StoreToOffsetResults[] = {
   "   0:	60e2      	str	r2, [r4, #12]\n",
   "   2:	f8c4 2fff 	str.w	r2, [r4, #4095]	; 0xfff\n",
   "   6:	f504 5c80 	add.w	ip, r4, #4096	; 0x1000\n",
@@ -563,7 +563,7 @@
   "  a4:	7322      	strb	r2, [r4, #12]\n",
   nullptr
 };
-const char* IfThenResults[] = {
+const char* const IfThenResults[] = {
   "   0:	bf08      	it	eq\n",
   "   2:	2101      	moveq	r1, #1\n",
   "   4:	bf04      	itt	eq\n",
@@ -587,7 +587,7 @@
   "  28:	2404      	movne	r4, #4\n",
   nullptr
 };
-const char* CbzCbnzResults[] = {
+const char* const CbzCbnzResults[] = {
   "   0:	b10a      	cbz	r2, 6 <CbzCbnz+0x6>\n",
   "   2:	2103      	movs	r1, #3\n",
   "   4:	2203      	movs	r2, #3\n",
@@ -598,7 +598,7 @@
   "  10:	2204      	movs	r2, #4\n",
   nullptr
 };
-const char* MultiplyResults[] = {
+const char* const MultiplyResults[] = {
   "   0:	4348      	muls	r0, r1\n",
   "   2:	fb01 f002 	mul.w	r0, r1, r2\n",
   "   6:	fb09 f808 	mul.w	r8, r9, r8\n",
@@ -611,21 +611,21 @@
   "  22:	fbaa 890b 	umull	r8, r9, sl, fp\n",
   nullptr
 };
-const char* DivideResults[] = {
+const char* const DivideResults[] = {
   "   0:	fb91 f0f2 	sdiv	r0, r1, r2\n",
   "   4:	fb99 f8fa 	sdiv	r8, r9, sl\n",
   "   8:	fbb1 f0f2 	udiv	r0, r1, r2\n",
   "   c:	fbb9 f8fa 	udiv	r8, r9, sl\n",
   nullptr
 };
-const char* VMovResults[] = {
+const char* const VMovResults[] = {
   "   0:	eef7 0a00 	vmov.f32	s1, #112	; 0x70\n",
   "   4:	eeb7 1b00 	vmov.f64	d1, #112	; 0x70\n",
   "   8:	eef0 0a41 	vmov.f32	s1, s2\n",
   "   c:	eeb0 1b42 	vmov.f64	d1, d2\n",
   nullptr
 };
-const char* BasicFloatingPointResults[] = {
+const char* const BasicFloatingPointResults[] = {
   "   0:	ee30 0a81 	vadd.f32	s0, s1, s2\n",
   "   4:	ee30 0ac1 	vsub.f32	s0, s1, s2\n",
   "   8:	ee20 0a81 	vmul.f32	s0, s1, s2\n",
@@ -646,7 +646,7 @@
   "  44:	eeb1 0bc1 	vsqrt.f64	d0, d1\n",
   nullptr
 };
-const char* FloatingPointConversionsResults[] = {
+const char* const FloatingPointConversionsResults[] = {
   "   0:	eeb7 1bc2 	vcvt.f32.f64	s2, d2\n",
   "   4:	eeb7 2ac1 	vcvt.f64.f32	d2, s2\n",
   "   8:	eefd 0ac1 	vcvt.s32.f32	s1, s2\n",
@@ -659,35 +659,35 @@
   "  24:	eeb8 1b41 	vcvt.f64.u32	d1, s2\n",
   nullptr
 };
-const char* FloatingPointComparisonsResults[] = {
+const char* const FloatingPointComparisonsResults[] = {
   "   0:	eeb4 0a60 	vcmp.f32	s0, s1\n",
   "   4:	eeb4 0b41 	vcmp.f64	d0, d1\n",
   "   8:	eeb5 1a40 	vcmp.f32	s2, #0.0\n",
   "   c:	eeb5 2b40 	vcmp.f64	d2, #0.0\n",
   nullptr
 };
-const char* CallsResults[] = {
+const char* const CallsResults[] = {
   "   0:	47f0      	blx	lr\n",
   "   2:	4770      	bx	lr\n",
   nullptr
 };
-const char* BreakpointResults[] = {
+const char* const BreakpointResults[] = {
   "   0:	be00      	bkpt	0x0000\n",
   nullptr
 };
-const char* StrR1Results[] = {
+const char* const StrR1Results[] = {
   "   0:	9111      	str	r1, [sp, #68]	; 0x44\n",
   "   2:	f8cd 142c 	str.w	r1, [sp, #1068]	; 0x42c\n",
   nullptr
 };
-const char* VPushPopResults[] = {
+const char* const VPushPopResults[] = {
   "   0:	ed2d 1a04 	vpush	{s2-s5}\n",
   "   4:	ed2d 2b08 	vpush	{d2-d5}\n",
   "   8:	ecbd 1a04 	vpop	{s2-s5}\n",
   "   c:	ecbd 2b08 	vpop	{d2-d5}\n",
   nullptr
 };
-const char* Max16BitBranchResults[] = {
+const char* const Max16BitBranchResults[] = {
   "   0:	e3ff      	b.n	802 <Max16BitBranch+0x802>\n",
   "   2:	2300      	movs	r3, #0\n",
   "   4:	2302      	movs	r3, #2\n",
@@ -1716,7 +1716,7 @@
   " 802:	4611      	mov	r1, r2\n",
   nullptr
 };
-const char* Branch32Results[] = {
+const char* const Branch32Results[] = {
   "   0:	f000 bc01 	b.w	806 <Branch32+0x806>\n",
   "   4:	2300      	movs	r3, #0\n",
   "   6:	2302      	movs	r3, #2\n",
@@ -2746,7 +2746,7 @@
   " 806:	4611      	mov	r1, r2\n",
   nullptr
 };
-const char* CompareAndBranchMaxResults[] = {
+const char* const CompareAndBranchMaxResults[] = {
   "   0:	b3fc      	cbz	r4, 82 <CompareAndBranchMax+0x82>\n",
   "   2:	2300      	movs	r3, #0\n",
   "   4:	2302      	movs	r3, #2\n",
@@ -2815,7 +2815,7 @@
   "  82:	4611      	mov	r1, r2\n",
   nullptr
 };
-const char* CompareAndBranchRelocation16Results[] = {
+const char* const CompareAndBranchRelocation16Results[] = {
   "   0:	2c00      	cmp	r4, #0\n",
   "   2:	d040      	beq.n	86 <CompareAndBranchRelocation16+0x86>\n",
   "   4:	2300      	movs	r3, #0\n",
@@ -2886,7 +2886,7 @@
   "  86:	4611      	mov	r1, r2\n",
   nullptr
 };
-const char* CompareAndBranchRelocation32Results[] = {
+const char* const CompareAndBranchRelocation32Results[] = {
   "   0:	2c00      	cmp	r4, #0\n",
   "   2:	f000 8401 	beq.w	808 <CompareAndBranchRelocation32+0x808>\n",
   "   6:	2300      	movs	r3, #0\n",
@@ -3917,7 +3917,7 @@
   " 808:	4611      	mov	r1, r2\n",
   nullptr
 };
-const char* MixedBranch32Results[] = {
+const char* const MixedBranch32Results[] = {
   "   0:	f000 bc03 	b.w	80a <MixedBranch32+0x80a>\n",
   "   4:	2300      	movs	r3, #0\n",
   "   6:	2302      	movs	r3, #2\n",
@@ -4948,7 +4948,7 @@
   " 80a:	4611      	mov	r1, r2\n",
   nullptr
 };
-const char* ShiftsResults[] = {
+const char* const ShiftsResults[] = {
   "   0:	0148      	lsls	r0, r1, #5\n",
   "   2:	0948      	lsrs	r0, r1, #5\n",
   "   4:	1148      	asrs	r0, r1, #5\n",
@@ -4997,7 +4997,7 @@
   "  98:	fa51 f008 	asrs.w	r0, r1, r8\n",
   nullptr
 };
-const char* LoadStoreRegOffsetResults[] = {
+const char* const LoadStoreRegOffsetResults[] = {
   "   0:	5888      	ldr	r0, [r1, r2]\n",
   "   2:	5088      	str	r0, [r1, r2]\n",
   "   4:	f851 0012 	ldr.w	r0, [r1, r2, lsl #1]\n",
@@ -5012,7 +5012,7 @@
   "  28:	f841 0008 	str.w	r0, [r1, r8]\n",
   nullptr
 };
-const char* LoadStoreLiteralResults[] = {
+const char* const LoadStoreLiteralResults[] = {
   "   0:   4801            ldr     r0, [pc, #4]    ; (8 <LoadStoreLiteral+0x8>)\n",
   "   2:   f8cf 0004       str.w   r0, [pc, #4]    ; 8 <LoadStoreLiteral+0x8>\n",
   "   6:   f85f 0008       ldr.w   r0, [pc, #-8]   ; 0 <LoadStoreLiteral>\n",
@@ -5023,7 +5023,7 @@
   "  18:   f8cf 07ff       str.w   r0, [pc, #2047] ; 81b <LoadStoreLiteral+0x81b>\n",
   nullptr
 };
-const char* LoadStoreLimitsResults[] = {
+const char* const LoadStoreLimitsResults[] = {
   "   0:   6fe0            ldr     r0, [r4, #124]  ; 0x7c\n",
   "   2:   f8d4 0080       ldr.w   r0, [r4, #128]  ; 0x80\n",
   "   6:   7fe0            ldrb    r0, [r4, #31]\n",
@@ -5042,7 +5042,7 @@
   "  30:   f8a4 0040       strh.w  r0, [r4, #64]   ; 0x40\n",
   nullptr
 };
-const char* CompareAndBranchResults[] = {
+const char* const CompareAndBranchResults[] = {
   "  0: b130        cbz r0, 10 <CompareAndBranch+0x10>\n",
   "  2: f1bb 0f00   cmp.w fp, #0\n",
   "  6: d003        beq.n 10 <CompareAndBranch+0x10>\n",
@@ -5052,7 +5052,7 @@
   nullptr
 };
 
-const char* AddConstantResults[] = {
+const char* const AddConstantResults[] = {
   "   0:	4608      	mov	r0, r1\n",
   "   2:	1c48      	adds	r0, r1, #1\n",
   "   4:	1dc8      	adds	r0, r1, #7\n",
@@ -5370,6 +5370,104 @@
   nullptr
 };
 
+const char* const CmpConstantResults[] = {
+  "   0:	2800      	cmp	r0, #0\n",
+  "   2:	2901      	cmp	r1, #1\n",
+  "   4:	2807      	cmp	r0, #7\n",
+  "   6:	2908      	cmp	r1, #8\n",
+  "   8:	28ff      	cmp	r0, #255	; 0xff\n",
+  "   a:	f5b1 7f80 	cmp.w	r1, #256	; 0x100\n",
+  "   e:	f46f 7c80 	mvn.w	ip, #256	; 0x100\n",
+  "  12:	eb10 0f0c 	cmn.w	r0, ip\n",
+  "  16:	f640 7cff 	movw	ip, #4095	; 0xfff\n",
+  "  1a:	4561      	cmp	r1, ip\n",
+  "  1c:	f5b0 5f80 	cmp.w	r0, #4096	; 0x1000\n",
+  "  20:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  "  24:	eb11 0f0c 	cmn.w	r1, ip\n",
+  "  28:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  "  2c:	4560      	cmp	r0, ip\n",
+  "  2e:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  "  32:	4561      	cmp	r1, ip\n",
+  "  34:	f5b0 3f80 	cmp.w	r0, #65536	; 0x10000\n",
+  "  38:	f1b1 1f01 	cmp.w	r1, #65537	; 0x10001\n",
+  "  3c:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  "  40:	eb10 0f0c 	cmn.w	r0, ip\n",
+  "  44:	f240 0c03 	movw	ip, #3\n",
+  "  48:	f2c0 0c01 	movt	ip, #1\n",
+  "  4c:	4561      	cmp	r1, ip\n",
+  "  4e:	f1b0 3fff 	cmp.w	r0, #4294967295	; 0xffffffff\n",
+  "  52:	f111 0f07 	cmn.w	r1, #7\n",
+  "  56:	f110 0f08 	cmn.w	r0, #8\n",
+  "  5a:	f111 0fff 	cmn.w	r1, #255	; 0xff\n",
+  "  5e:	f510 7f80 	cmn.w	r0, #256	; 0x100\n",
+  "  62:	f46f 7c80 	mvn.w	ip, #256	; 0x100\n",
+  "  66:	4561      	cmp	r1, ip\n",
+  "  68:	f640 7cff 	movw	ip, #4095	; 0xfff\n",
+  "  6c:	eb10 0f0c 	cmn.w	r0, ip\n",
+  "  70:	f511 5f80 	cmn.w	r1, #4096	; 0x1000\n",
+  "  74:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  "  78:	4560      	cmp	r0, ip\n",
+  "  7a:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  "  7e:	eb11 0f0c 	cmn.w	r1, ip\n",
+  "  82:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  "  86:	eb10 0f0c 	cmn.w	r0, ip\n",
+  "  8a:	f511 3f80 	cmn.w	r1, #65536	; 0x10000\n",
+  "  8e:	f110 1f01 	cmn.w	r0, #65537	; 0x10001\n",
+  "  92:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  "  96:	4561      	cmp	r1, ip\n",
+  "  98:	f64f 7cfd 	movw	ip, #65533	; 0xfffd\n",
+  "  9c:	f6cf 7cfe 	movt	ip, #65534	; 0xfffe\n",
+  "  a0:	4560      	cmp	r0, ip\n",
+  "  a2:	f1b8 0f00 	cmp.w	r8, #0\n",
+  "  a6:	f1b9 0f01 	cmp.w	r9, #1\n",
+  "  aa:	f1b8 0f07 	cmp.w	r8, #7\n",
+  "  ae:	f1b9 0f08 	cmp.w	r9, #8\n",
+  "  b2:	f1b8 0fff 	cmp.w	r8, #255	; 0xff\n",
+  "  b6:	f5b9 7f80 	cmp.w	r9, #256	; 0x100\n",
+  "  ba:	f46f 7c80 	mvn.w	ip, #256	; 0x100\n",
+  "  be:	eb18 0f0c 	cmn.w	r8, ip\n",
+  "  c2:	f640 7cff 	movw	ip, #4095	; 0xfff\n",
+  "  c6:	45e1      	cmp	r9, ip\n",
+  "  c8:	f5b8 5f80 	cmp.w	r8, #4096	; 0x1000\n",
+  "  cc:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  "  d0:	eb19 0f0c 	cmn.w	r9, ip\n",
+  "  d4:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  "  d8:	45e0      	cmp	r8, ip\n",
+  "  da:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  "  de:	45e1      	cmp	r9, ip\n",
+  "  e0:	f5b8 3f80 	cmp.w	r8, #65536	; 0x10000\n",
+  "  e4:	f1b9 1f01 	cmp.w	r9, #65537	; 0x10001\n",
+  "  e8:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  "  ec:	eb18 0f0c 	cmn.w	r8, ip\n",
+  "  f0:	f240 0c03 	movw	ip, #3\n",
+  "  f4:	f2c0 0c01 	movt	ip, #1\n",
+  "  f8:	45e1      	cmp	r9, ip\n",
+  "  fa:	f1b8 3fff 	cmp.w	r8, #4294967295	; 0xffffffff\n",
+  "  fe:	f119 0f07 	cmn.w	r9, #7\n",
+  " 102:	f118 0f08 	cmn.w	r8, #8\n",
+  " 106:	f119 0fff 	cmn.w	r9, #255	; 0xff\n",
+  " 10a:	f518 7f80 	cmn.w	r8, #256	; 0x100\n",
+  " 10e:	f46f 7c80 	mvn.w	ip, #256	; 0x100\n",
+  " 112:	45e1      	cmp	r9, ip\n",
+  " 114:	f640 7cff 	movw	ip, #4095	; 0xfff\n",
+  " 118:	eb18 0f0c 	cmn.w	r8, ip\n",
+  " 11c:	f519 5f80 	cmn.w	r9, #4096	; 0x1000\n",
+  " 120:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  " 124:	45e0      	cmp	r8, ip\n",
+  " 126:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  " 12a:	eb19 0f0c 	cmn.w	r9, ip\n",
+  " 12e:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  " 132:	eb18 0f0c 	cmn.w	r8, ip\n",
+  " 136:	f519 3f80 	cmn.w	r9, #65536	; 0x10000\n",
+  " 13a:	f118 1f01 	cmn.w	r8, #65537	; 0x10001\n",
+  " 13e:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  " 142:	45e1      	cmp	r9, ip\n",
+  " 144:	f64f 7cfd 	movw	ip, #65533	; 0xfffd\n",
+  " 148:	f6cf 7cfe 	movt	ip, #65534	; 0xfffe\n",
+  " 14c:	45e0      	cmp	r8, ip\n",
+  nullptr
+};
+
 std::map<std::string, const char* const*> test_results;
 void setup_results() {
     test_results["SimpleMov"] = SimpleMovResults;
@@ -5421,4 +5519,5 @@
     test_results["LoadStoreLimits"] = LoadStoreLimitsResults;
     test_results["CompareAndBranch"] = CompareAndBranchResults;
     test_results["AddConstant"] = AddConstantResults;
+    test_results["CmpConstant"] = CmpConstantResults;
 }
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 0dc307c..ac9c097 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -1035,6 +1035,22 @@
   EmitR(0, rs, static_cast<Register>((cc << 2) | 1), rd, 0, 0x01);
 }
 
+void MipsAssembler::TruncLS(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x09);
+}
+
+void MipsAssembler::TruncLD(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x09);
+}
+
+void MipsAssembler::TruncWS(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x0D);
+}
+
+void MipsAssembler::TruncWD(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x0D);
+}
+
 void MipsAssembler::Cvtsw(FRegister fd, FRegister fs) {
   EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20);
 }
@@ -1051,6 +1067,14 @@
   EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x21);
 }
 
+void MipsAssembler::Cvtsl(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x20);
+}
+
+void MipsAssembler::Cvtdl(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x21);
+}
+
 void MipsAssembler::Mfc1(Register rt, FRegister fs) {
   EmitFR(0x11, 0x00, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
 }
@@ -1067,6 +1091,24 @@
   EmitFR(0x11, 0x07, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
 }
 
+void MipsAssembler::MoveFromFpuHigh(Register rt, FRegister fs) {
+  if (Is32BitFPU()) {
+    CHECK_EQ(fs % 2, 0) << fs;
+    Mfc1(rt, static_cast<FRegister>(fs + 1));
+  } else {
+    Mfhc1(rt, fs);
+  }
+}
+
+void MipsAssembler::MoveToFpuHigh(Register rt, FRegister fs) {
+  if (Is32BitFPU()) {
+    CHECK_EQ(fs % 2, 0) << fs;
+    Mtc1(rt, static_cast<FRegister>(fs + 1));
+  } else {
+    Mthc1(rt, fs);
+  }
+}
+
 void MipsAssembler::Lwc1(FRegister ft, Register rs, uint16_t imm16) {
   EmitI(0x31, rs, static_cast<Register>(ft), imm16);
 }
@@ -1213,10 +1255,10 @@
     Mtc1(temp, rd);
   }
   if (high == 0) {
-    Mthc1(ZERO, rd);
+    MoveToFpuHigh(ZERO, rd);
   } else {
     LoadConst32(temp, high);
-    Mthc1(temp, rd);
+    MoveToFpuHigh(temp, rd);
   }
 }
 
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 066e7b0..01c6490 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -265,15 +265,23 @@
   void Movf(Register rd, Register rs, int cc);  // R2
   void Movt(Register rd, Register rs, int cc);  // R2
 
+  void TruncLS(FRegister fd, FRegister fs);  // R2+, FR=1
+  void TruncLD(FRegister fd, FRegister fs);  // R2+, FR=1
+  void TruncWS(FRegister fd, FRegister fs);
+  void TruncWD(FRegister fd, FRegister fs);
   void Cvtsw(FRegister fd, FRegister fs);
   void Cvtdw(FRegister fd, FRegister fs);
   void Cvtsd(FRegister fd, FRegister fs);
   void Cvtds(FRegister fd, FRegister fs);
+  void Cvtsl(FRegister fd, FRegister fs);  // R2+, FR=1
+  void Cvtdl(FRegister fd, FRegister fs);  // R2+, FR=1
 
   void Mfc1(Register rt, FRegister fs);
   void Mtc1(Register rt, FRegister fs);
   void Mfhc1(Register rt, FRegister fs);
   void Mthc1(Register rt, FRegister fs);
+  void MoveFromFpuHigh(Register rt, FRegister fs);
+  void MoveToFpuHigh(Register rt, FRegister fs);
   void Lwc1(FRegister ft, Register rs, uint16_t imm16);
   void Ldc1(FRegister ft, Register rs, uint16_t imm16);
   void Swc1(FRegister ft, Register rs, uint16_t imm16);
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index 4361843..5fc3dee 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -599,6 +599,14 @@
   DriverStr(RepeatFF(&mips::MipsAssembler::Cvtdw, "cvt.d.w ${reg1}, ${reg2}"), "CvtDW");
 }
 
+TEST_F(AssemblerMIPSTest, CvtSL) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsl, "cvt.s.l ${reg1}, ${reg2}"), "CvtSL");
+}
+
+TEST_F(AssemblerMIPSTest, CvtDL) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "CvtDL");
+}
+
 TEST_F(AssemblerMIPSTest, CvtSD) {
   DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsd, "cvt.s.d ${reg1}, ${reg2}"), "CvtSD");
 }
@@ -607,6 +615,22 @@
   DriverStr(RepeatFF(&mips::MipsAssembler::Cvtds, "cvt.d.s ${reg1}, ${reg2}"), "CvtDS");
 }
 
+TEST_F(AssemblerMIPSTest, TruncWS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::TruncWS, "trunc.w.s ${reg1}, ${reg2}"), "TruncWS");
+}
+
+TEST_F(AssemblerMIPSTest, TruncWD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::TruncWD, "trunc.w.d ${reg1}, ${reg2}"), "TruncWD");
+}
+
+TEST_F(AssemblerMIPSTest, TruncLS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::TruncLS, "trunc.l.s ${reg1}, ${reg2}"), "TruncLS");
+}
+
+TEST_F(AssemblerMIPSTest, TruncLD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::TruncLD, "trunc.l.d ${reg1}, ${reg2}"), "TruncLD");
+}
+
 TEST_F(AssemblerMIPSTest, Mfc1) {
   DriverStr(RepeatRF(&mips::MipsAssembler::Mfc1, "mfc1 ${reg1}, ${reg2}"), "Mfc1");
 }
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index cfd8421..f9ff2df 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -771,6 +771,22 @@
   EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0xc);
 }
 
+void Mips64Assembler::TruncLS(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0x9);
+}
+
+void Mips64Assembler::TruncLD(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0x9);
+}
+
+void Mips64Assembler::TruncWS(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0xd);
+}
+
+void Mips64Assembler::TruncWD(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0xd);
+}
+
 void Mips64Assembler::CeilLS(FpuRegister fd, FpuRegister fs) {
   EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0xa);
 }
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 883f013..3262640 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -250,6 +250,10 @@
   void RoundLD(FpuRegister fd, FpuRegister fs);
   void RoundWS(FpuRegister fd, FpuRegister fs);
   void RoundWD(FpuRegister fd, FpuRegister fs);
+  void TruncLS(FpuRegister fd, FpuRegister fs);
+  void TruncLD(FpuRegister fd, FpuRegister fs);
+  void TruncWS(FpuRegister fd, FpuRegister fs);
+  void TruncWD(FpuRegister fd, FpuRegister fs);
   void CeilLS(FpuRegister fd, FpuRegister fs);
   void CeilLD(FpuRegister fd, FpuRegister fs);
   void CeilWS(FpuRegister fd, FpuRegister fs);
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index bac4375..7d79be2 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -527,6 +527,22 @@
   DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtsw, "cvt.s.w ${reg1}, ${reg2}"), "cvt.s.w");
 }
 
+TEST_F(AssemblerMIPS64Test, TruncWS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncWS, "trunc.w.s ${reg1}, ${reg2}"), "trunc.w.s");
+}
+
+TEST_F(AssemblerMIPS64Test, TruncWD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncWD, "trunc.w.d ${reg1}, ${reg2}"), "trunc.w.d");
+}
+
+TEST_F(AssemblerMIPS64Test, TruncLS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncLS, "trunc.l.s ${reg1}, ${reg2}"), "trunc.l.s");
+}
+
+TEST_F(AssemblerMIPS64Test, TruncLD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncLD, "trunc.l.d ${reg1}, ${reg2}"), "trunc.l.d");
+}
+
 ////////////////
 // CALL / JMP //
 ////////////////
diff --git a/compiler/utils/swap_space.cc b/compiler/utils/swap_space.cc
index 42ed881..244a5fe 100644
--- a/compiler/utils/swap_space.cc
+++ b/compiler/utils/swap_space.cc
@@ -18,6 +18,7 @@
 
 #include <algorithm>
 #include <numeric>
+#include <sys/mman.h>
 
 #include "base/logging.h"
 #include "base/macros.h"
@@ -44,23 +45,17 @@
   }
 }
 
-template <typename FreeByStartSet, typename FreeBySizeSet>
-static void RemoveChunk(FreeByStartSet* free_by_start,
-                        FreeBySizeSet* free_by_size,
-                        typename FreeBySizeSet::const_iterator free_by_size_pos) {
+void SwapSpace::RemoveChunk(FreeBySizeSet::const_iterator free_by_size_pos) {
   auto free_by_start_pos = free_by_size_pos->second;
-  free_by_size->erase(free_by_size_pos);
-  free_by_start->erase(free_by_start_pos);
+  free_by_size_.erase(free_by_size_pos);
+  free_by_start_.erase(free_by_start_pos);
 }
 
-template <typename FreeByStartSet, typename FreeBySizeSet>
-static void InsertChunk(FreeByStartSet* free_by_start,
-                        FreeBySizeSet* free_by_size,
-                        const SpaceChunk& chunk) {
+inline void SwapSpace::InsertChunk(const SpaceChunk& chunk) {
   DCHECK_NE(chunk.size, 0u);
-  auto insert_result = free_by_start->insert(chunk);
+  auto insert_result = free_by_start_.insert(chunk);
   DCHECK(insert_result.second);
-  free_by_size->emplace(chunk.size, insert_result.first);
+  free_by_size_.emplace(chunk.size, insert_result.first);
 }
 
 SwapSpace::SwapSpace(int fd, size_t initial_size)
@@ -69,10 +64,18 @@
       lock_("SwapSpace lock", static_cast<LockLevel>(LockLevel::kDefaultMutexLevel - 1)) {
   // Assume that the file is unlinked.
 
-  InsertChunk(&free_by_start_, &free_by_size_, NewFileChunk(initial_size));
+  InsertChunk(NewFileChunk(initial_size));
 }
 
 SwapSpace::~SwapSpace() {
+  // Unmap all mmapped chunks. Nothing should be allocated anymore at
+  // this point, so there should be only full size chunks in free_by_start_.
+  for (const SpaceChunk& chunk : free_by_start_) {
+    if (munmap(chunk.ptr, chunk.size) != 0) {
+      PLOG(ERROR) << "Failed to unmap swap space chunk at "
+          << static_cast<const void*>(chunk.ptr) << " size=" << chunk.size;
+    }
+  }
   // All arenas are backed by the same file. Just close the descriptor.
   close(fd_);
 }
@@ -113,7 +116,7 @@
       : free_by_size_.lower_bound(FreeBySizeEntry { size, free_by_start_.begin() });
   if (it != free_by_size_.end()) {
     old_chunk = *it->second;
-    RemoveChunk(&free_by_start_, &free_by_size_, it);
+    RemoveChunk(it);
   } else {
     // Not a big enough free chunk, need to increase file size.
     old_chunk = NewFileChunk(size);
@@ -124,13 +127,13 @@
   if (old_chunk.size != size) {
     // Insert the remainder.
     SpaceChunk new_chunk = { old_chunk.ptr + size, old_chunk.size - size };
-    InsertChunk(&free_by_start_, &free_by_size_, new_chunk);
+    InsertChunk(new_chunk);
   }
 
   return ret;
 }
 
-SpaceChunk SwapSpace::NewFileChunk(size_t min_size) {
+SwapSpace::SpaceChunk SwapSpace::NewFileChunk(size_t min_size) {
 #if !defined(__APPLE__)
   size_t next_part = std::max(RoundUp(min_size, kPageSize), RoundUp(kMininumMapSize, kPageSize));
   int result = TEMP_FAILURE_RETRY(ftruncate64(fd_, size_ + next_part));
@@ -159,7 +162,7 @@
 }
 
 // TODO: Full coalescing.
-void SwapSpace::Free(void* ptrV, size_t size) {
+void SwapSpace::Free(void* ptr, size_t size) {
   MutexLock lock(Thread::Current(), lock_);
   size = RoundUp(size, 8U);
 
@@ -168,7 +171,7 @@
     free_before = CollectFree(free_by_start_, free_by_size_);
   }
 
-  SpaceChunk chunk = { reinterpret_cast<uint8_t*>(ptrV), size };
+  SpaceChunk chunk = { reinterpret_cast<uint8_t*>(ptr), size };
   auto it = free_by_start_.lower_bound(chunk);
   if (it != free_by_start_.begin()) {
     auto prev = it;
@@ -180,7 +183,7 @@
       chunk.ptr -= prev->size;
       auto erase_pos = free_by_size_.find(FreeBySizeEntry { prev->size, prev });
       DCHECK(erase_pos != free_by_size_.end());
-      RemoveChunk(&free_by_start_, &free_by_size_, erase_pos);
+      RemoveChunk(erase_pos);
       // "prev" is invalidated but "it" remains valid.
     }
   }
@@ -191,11 +194,11 @@
       chunk.size += it->size;
       auto erase_pos = free_by_size_.find(FreeBySizeEntry { it->size, it });
       DCHECK(erase_pos != free_by_size_.end());
-      RemoveChunk(&free_by_start_, &free_by_size_, erase_pos);
+      RemoveChunk(erase_pos);
       // "it" is invalidated but we don't need it anymore.
     }
   }
-  InsertChunk(&free_by_start_, &free_by_size_, chunk);
+  InsertChunk(chunk);
 
   if (kCheckFreeMaps) {
     size_t free_after = CollectFree(free_by_start_, free_by_size_);
diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h
index 9127b6b..b659f1d 100644
--- a/compiler/utils/swap_space.h
+++ b/compiler/utils/swap_space.h
@@ -19,42 +19,17 @@
 
 #include <cstdlib>
 #include <list>
+#include <vector>
 #include <set>
 #include <stdint.h>
 #include <stddef.h>
 
-#include "base/debug_stack.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/mutex.h"
-#include "mem_map.h"
 
 namespace art {
 
-// Chunk of space.
-struct SpaceChunk {
-  uint8_t* ptr;
-  size_t size;
-
-  uintptr_t Start() const {
-    return reinterpret_cast<uintptr_t>(ptr);
-  }
-  uintptr_t End() const {
-    return reinterpret_cast<uintptr_t>(ptr) + size;
-  }
-};
-
-inline bool operator==(const SpaceChunk& lhs, const SpaceChunk& rhs) {
-  return (lhs.size == rhs.size) && (lhs.ptr == rhs.ptr);
-}
-
-class SortChunkByPtr {
- public:
-  bool operator()(const SpaceChunk& a, const SpaceChunk& b) const {
-    return reinterpret_cast<uintptr_t>(a.ptr) < reinterpret_cast<uintptr_t>(b.ptr);
-  }
-};
-
 // An arena pool that creates arenas backed by an mmaped file.
 class SwapSpace {
  public:
@@ -68,17 +43,27 @@
   }
 
  private:
-  SpaceChunk NewFileChunk(size_t min_size) REQUIRES(lock_);
+  // Chunk of space.
+  struct SpaceChunk {
+    uint8_t* ptr;
+    size_t size;
 
-  int fd_;
-  size_t size_;
-  std::list<SpaceChunk> maps_;
+    uintptr_t Start() const {
+      return reinterpret_cast<uintptr_t>(ptr);
+    }
+    uintptr_t End() const {
+      return reinterpret_cast<uintptr_t>(ptr) + size;
+    }
+  };
 
-  // NOTE: Boost.Bimap would be useful for the two following members.
+  class SortChunkByPtr {
+   public:
+    bool operator()(const SpaceChunk& a, const SpaceChunk& b) const {
+      return reinterpret_cast<uintptr_t>(a.ptr) < reinterpret_cast<uintptr_t>(b.ptr);
+    }
+  };
 
-  // Map start of a free chunk to its size.
   typedef std::set<SpaceChunk, SortChunkByPtr> FreeByStartSet;
-  FreeByStartSet free_by_start_ GUARDED_BY(lock_);
 
   // Map size to an iterator to free_by_start_'s entry.
   typedef std::pair<size_t, FreeByStartSet::const_iterator> FreeBySizeEntry;
@@ -92,6 +77,21 @@
     }
   };
   typedef std::set<FreeBySizeEntry, FreeBySizeComparator> FreeBySizeSet;
+
+  SpaceChunk NewFileChunk(size_t min_size) REQUIRES(lock_);
+
+  void RemoveChunk(FreeBySizeSet::const_iterator free_by_size_pos) REQUIRES(lock_);
+  void InsertChunk(const SpaceChunk& chunk) REQUIRES(lock_);
+
+  int fd_;
+  size_t size_;
+  std::list<SpaceChunk> maps_;
+
+  // NOTE: Boost.Bimap would be useful for the two following members.
+
+  // Map start of a free chunk to its size.
+  FreeByStartSet free_by_start_ GUARDED_BY(lock_);
+  // Free chunks ordered by size.
   FreeBySizeSet free_by_size_ GUARDED_BY(lock_);
 
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
@@ -126,6 +126,9 @@
 
   template <typename U>
   friend class SwapAllocator;
+
+  template <typename U>
+  friend bool operator==(const SwapAllocator<U>& lhs, const SwapAllocator<U>& rhs);
 };
 
 template <typename T>
@@ -201,9 +204,22 @@
 
   template <typename U>
   friend class SwapAllocator;
+
+  template <typename U>
+  friend bool operator==(const SwapAllocator<U>& lhs, const SwapAllocator<U>& rhs);
 };
 
 template <typename T>
+inline bool operator==(const SwapAllocator<T>& lhs, const SwapAllocator<T>& rhs) {
+  return lhs.swap_space_ == rhs.swap_space_;
+}
+
+template <typename T>
+inline bool operator!=(const SwapAllocator<T>& lhs, const SwapAllocator<T>& rhs) {
+  return !(lhs == rhs);
+}
+
+template <typename T>
 using SwapVector = std::vector<T, SwapAllocator<T>>;
 template <typename T, typename Comparator>
 using SwapSet = std::set<T, Comparator, SwapAllocator<T>>;
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 178d606..52c2283 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -313,13 +313,12 @@
   UsageError("  -g");
   UsageError("  --generate-debug-info: Generate debug information for native debugging,");
   UsageError("      such as stack unwinding information, ELF symbols and DWARF sections.");
-  UsageError("      This generates all the available information. Unneeded parts can be");
-  UsageError("      stripped using standard command line tools such as strip or objcopy.");
-  UsageError("      (enabled by default in debug builds, disabled by default otherwise)");
+  UsageError("      If used without --native-debuggable, it will be best-effort only.");
+  UsageError("      This option does not affect the generated code. (disabled by default)");
   UsageError("");
   UsageError("  --no-generate-debug-info: Do not generate debug information for native debugging.");
   UsageError("");
-  UsageError("  --debuggable: Produce code debuggable with Java debugger. Implies -g.");
+  UsageError("  --debuggable: Produce code debuggable with Java debugger.");
   UsageError("");
   UsageError("  --native-debuggable: Produce code debuggable with native debugger (like LLDB).");
   UsageError("      Implies --debuggable.");
@@ -535,6 +534,7 @@
       compiled_methods_filename_(nullptr),
       app_image_(false),
       boot_image_(false),
+      multi_image_(false),
       is_host_(false),
       image_writer_(nullptr),
       driver_(nullptr),
@@ -542,7 +542,6 @@
       dump_passes_(false),
       dump_timing_(false),
       dump_slow_timing_(kIsDebugBuild),
-      dump_cfg_append_(false),
       swap_fd_(-1),
       app_image_fd_(kInvalidImageFd),
       timings_(timings) {}
@@ -683,7 +682,7 @@
     }
   }
 
-  void ProcessOptions(ParserOptions* parser_options, bool multi_image) {
+  void ProcessOptions(ParserOptions* parser_options) {
     boot_image_ = !image_filenames_.empty();
     app_image_ = app_image_fd_ != -1 || !app_image_file_name_.empty();
 
@@ -888,86 +887,17 @@
 
     compiler_options_->verbose_methods_ = verbose_methods_.empty() ? nullptr : &verbose_methods_;
 
-    if (!IsBootImage() && multi_image) {
+    if (!IsBootImage() && multi_image_) {
       Usage("--multi-image can only be used when creating boot images");
     }
-    if (IsBootImage() && multi_image && image_filenames_.size() > 1) {
+    if (IsBootImage() && multi_image_ && image_filenames_.size() > 1) {
       Usage("--multi-image cannot be used with multiple image names");
     }
 
     // For now, if we're on the host and compile the boot image, *always* use multiple image files.
     if (!kIsTargetBuild && IsBootImage()) {
       if (image_filenames_.size() == 1) {
-        multi_image = true;
-      }
-    }
-
-    if (IsBootImage() && multi_image) {
-      // Expand the oat and image filenames.
-      std::string base_oat = oat_filenames_[0];
-      size_t last_oat_slash = base_oat.rfind('/');
-      if (last_oat_slash == std::string::npos) {
-        Usage("--multi-image used with unusable oat filename %s", base_oat.c_str());
-      }
-      // We also need to honor path components that were encoded through '@'. Otherwise the loading
-      // code won't be able to find the images.
-      if (base_oat.find('@', last_oat_slash) != std::string::npos) {
-        last_oat_slash = base_oat.rfind('@');
-      }
-      base_oat = base_oat.substr(0, last_oat_slash + 1);
-
-      std::string base_img = image_filenames_[0];
-      size_t last_img_slash = base_img.rfind('/');
-      if (last_img_slash == std::string::npos) {
-        Usage("--multi-image used with unusable image filename %s", base_img.c_str());
-      }
-      // We also need to honor path components that were encoded through '@'. Otherwise the loading
-      // code won't be able to find the images.
-      if (base_img.find('@', last_img_slash) != std::string::npos) {
-        last_img_slash = base_img.rfind('@');
-      }
-
-      // Get the prefix, which is the primary image name (without path components). Strip the
-      // extension.
-      std::string prefix = base_img.substr(last_img_slash + 1);
-      if (prefix.rfind('.') != std::string::npos) {
-        prefix = prefix.substr(0, prefix.rfind('.'));
-      }
-      if (!prefix.empty()) {
-        prefix = prefix + "-";
-      }
-
-      base_img = base_img.substr(0, last_img_slash + 1);
-
-      // Note: we have some special case here for our testing. We have to inject the differentiating
-      //       parts for the different core images.
-      std::string infix;  // Empty infix by default.
-      {
-        // Check the first name.
-        std::string dex_file = oat_filenames_[0];
-        size_t last_dex_slash = dex_file.rfind('/');
-        if (last_dex_slash != std::string::npos) {
-          dex_file = dex_file.substr(last_dex_slash + 1);
-        }
-        size_t last_dex_dot = dex_file.rfind('.');
-        if (last_dex_dot != std::string::npos) {
-          dex_file = dex_file.substr(0, last_dex_dot);
-        }
-        if (StartsWith(dex_file, "core-")) {
-          infix = dex_file.substr(strlen("core"));
-        }
-      }
-
-      // Now create the other names. Use a counted loop to skip the first one.
-      for (size_t i = 1; i < dex_locations_.size(); ++i) {
-        // TODO: Make everything properly std::string.
-        std::string image_name = CreateMultiImageName(dex_locations_[i], prefix, infix, ".art");
-        char_backing_storage_.push_back(base_img + image_name);
-        image_filenames_.push_back((char_backing_storage_.end() - 1)->c_str());
-
-        std::string oat_name = CreateMultiImageName(dex_locations_[i], prefix, infix, ".oat");
-        char_backing_storage_.push_back(base_oat + oat_name);
-        oat_filenames_.push_back((char_backing_storage_.end() - 1)->c_str());
+        multi_image_ = true;
       }
     }
 
@@ -980,6 +910,74 @@
     key_value_store_.reset(new SafeMap<std::string, std::string>());
   }
 
+  void ExpandOatAndImageFilenames() {
+    std::string base_oat = oat_filenames_[0];
+    size_t last_oat_slash = base_oat.rfind('/');
+    if (last_oat_slash == std::string::npos) {
+      Usage("--multi-image used with unusable oat filename %s", base_oat.c_str());
+    }
+    // We also need to honor path components that were encoded through '@'. Otherwise the loading
+    // code won't be able to find the images.
+    if (base_oat.find('@', last_oat_slash) != std::string::npos) {
+      last_oat_slash = base_oat.rfind('@');
+    }
+    base_oat = base_oat.substr(0, last_oat_slash + 1);
+
+    std::string base_img = image_filenames_[0];
+    size_t last_img_slash = base_img.rfind('/');
+    if (last_img_slash == std::string::npos) {
+      Usage("--multi-image used with unusable image filename %s", base_img.c_str());
+    }
+    // We also need to honor path components that were encoded through '@'. Otherwise the loading
+    // code won't be able to find the images.
+    if (base_img.find('@', last_img_slash) != std::string::npos) {
+      last_img_slash = base_img.rfind('@');
+    }
+
+    // Get the prefix, which is the primary image name (without path components). Strip the
+    // extension.
+    std::string prefix = base_img.substr(last_img_slash + 1);
+    if (prefix.rfind('.') != std::string::npos) {
+      prefix = prefix.substr(0, prefix.rfind('.'));
+    }
+    if (!prefix.empty()) {
+      prefix = prefix + "-";
+    }
+
+    base_img = base_img.substr(0, last_img_slash + 1);
+
+    // Note: we have some special case here for our testing. We have to inject the differentiating
+    //       parts for the different core images.
+    std::string infix;  // Empty infix by default.
+    {
+      // Check the first name.
+      std::string dex_file = oat_filenames_[0];
+      size_t last_dex_slash = dex_file.rfind('/');
+      if (last_dex_slash != std::string::npos) {
+        dex_file = dex_file.substr(last_dex_slash + 1);
+      }
+      size_t last_dex_dot = dex_file.rfind('.');
+      if (last_dex_dot != std::string::npos) {
+        dex_file = dex_file.substr(0, last_dex_dot);
+      }
+      if (StartsWith(dex_file, "core-")) {
+        infix = dex_file.substr(strlen("core"));
+      }
+    }
+
+    // Now create the other names. Use a counted loop to skip the first one.
+    for (size_t i = 1; i < dex_locations_.size(); ++i) {
+      // TODO: Make everything properly std::string.
+      std::string image_name = CreateMultiImageName(dex_locations_[i], prefix, infix, ".art");
+      char_backing_storage_.push_back(base_img + image_name);
+      image_filenames_.push_back((char_backing_storage_.end() - 1)->c_str());
+
+      std::string oat_name = CreateMultiImageName(dex_locations_[i], prefix, infix, ".oat");
+      char_backing_storage_.push_back(base_oat + oat_name);
+      oat_filenames_.push_back((char_backing_storage_.end() - 1)->c_str());
+    }
+  }
+
   // Modify the input string in the following way:
   //   0) Assume input is /a/b/c.d
   //   1) Strip the path  -> c.d
@@ -1051,8 +1049,6 @@
     std::unique_ptr<ParserOptions> parser_options(new ParserOptions());
     compiler_options_.reset(new CompilerOptions());
 
-    bool multi_image = false;
-
     for (int i = 0; i < argc; i++) {
       const StringPiece option(argv[i]);
       const bool log_options = false;
@@ -1136,10 +1132,6 @@
         dump_timing_ = true;
       } else if (option == "--dump-passes") {
         dump_passes_ = true;
-      } else if (option.starts_with("--dump-cfg=")) {
-        dump_cfg_file_name_ = option.substr(strlen("--dump-cfg=")).data();
-      } else if (option.starts_with("--dump-cfg-append")) {
-        dump_cfg_append_ = true;
       } else if (option == "--dump-stats") {
         dump_stats_ = true;
       } else if (option.starts_with("--swap-file=")) {
@@ -1156,7 +1148,7 @@
         gLogVerbosity.compiler = false;
         Split(option.substr(strlen("--verbose-methods=")).ToString(), ',', &verbose_methods_);
       } else if (option == "--multi-image") {
-        multi_image = true;
+        multi_image_ = true;
       } else if (option.starts_with("--no-inline-from=")) {
         no_inline_from_string_ = option.substr(strlen("--no-inline-from=")).data();
       } else if (!compiler_options_->ParseCompilerOption(option, Usage)) {
@@ -1164,7 +1156,7 @@
       }
     }
 
-    ProcessOptions(parser_options.get(), multi_image);
+    ProcessOptions(parser_options.get());
 
     // Insert some compiler things.
     InsertCompileOptions(argc, argv);
@@ -1173,6 +1165,11 @@
   // Check whether the oat output files are writable, and open them for later. Also open a swap
   // file, if a name is given.
   bool OpenFile() {
+    // Expand oat and image filenames for multi image.
+    if (IsBootImage() && multi_image_) {
+      ExpandOatAndImageFilenames();
+    }
+
     bool create_file = oat_fd_ == -1;  // as opposed to using open file descriptor
     if (create_file) {
       for (const char* oat_filename : oat_filenames_) {
@@ -1226,6 +1223,22 @@
                                       // released immediately.
       unlink(swap_file_name_.c_str());
     }
+
+    // If we use a swap file, ensure we are above the threshold to make it necessary.
+    if (swap_fd_ != -1) {
+      if (!UseSwap(IsBootImage(), dex_files_)) {
+        close(swap_fd_);
+        swap_fd_ = -1;
+        VLOG(compiler) << "Decided to run without swap.";
+      } else {
+        LOG(INFO) << "Large app, accepted running with swap.";
+      }
+    }
+    // Note that dex2oat won't close the swap_fd_. The compiler driver's swap space will do that.
+
+    // Organize inputs, handling multi-dex and multiple oat file outputs.
+    CreateDexOatMappings();
+
     return true;
   }
 
@@ -1288,6 +1301,21 @@
     ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
     if (boot_image_filename_.empty()) {
       dex_files_ = class_linker->GetBootClassPath();
+      // Prune invalid dex locations.
+      for (size_t i = 0; i < dex_locations_.size(); i++) {
+        const char* dex_location = dex_locations_[i];
+        bool contains = false;
+        for (const DexFile* dex_file : dex_files_) {
+          if (strcmp(dex_location, dex_file->GetLocation().c_str()) == 0) {
+            contains = true;
+            break;
+          }
+        }
+        if (!contains) {
+          dex_locations_.erase(dex_locations_.begin() + i);
+          i--;
+        }
+      }
     } else {
       TimingLogger::ScopedTiming t_dex("Opening dex files", timings_);
       if (dex_filenames_.empty()) {
@@ -1338,18 +1366,6 @@
       dex_file->CreateTypeLookupTable();
     }
 
-    // If we use a swap file, ensure we are above the threshold to make it necessary.
-    if (swap_fd_ != -1) {
-      if (!UseSwap(IsBootImage(), dex_files_)) {
-        close(swap_fd_);
-        swap_fd_ = -1;
-        VLOG(compiler) << "Decided to run without swap.";
-      } else {
-        LOG(INFO) << "Large app, accepted running with swap.";
-      }
-    }
-    // Note that dex2oat won't close the swap_fd_. The compiler driver's swap space will do that.
-
     /*
      * If we're not in interpret-only or verify-none mode, go ahead and compile small applications.
      * Don't bother to check if we're doing the image.
@@ -1369,16 +1385,11 @@
       }
     }
 
-    // Organize inputs, handling multi-dex and multiple oat file outputs.
-    CreateDexOatMappings();
-
     return true;
   }
 
   void CreateDexOatMappings() {
     if (oat_files_.size() > 1) {
-      // TODO: This needs to change, as it is not a stable mapping. If a dex file is missing,
-      //       the images will be out of whack. b/26317072
       size_t index = 0;
       for (size_t i = 0; i < oat_files_.size(); ++i) {
         std::vector<const DexFile*> dex_files;
@@ -1410,7 +1421,6 @@
 
     // Handle and ClassLoader creation needs to come after Runtime::Create
     jobject class_loader = nullptr;
-    jobject class_path_class_loader = nullptr;
     Thread* self = Thread::Current();
 
     if (!boot_image_filename_.empty()) {
@@ -1425,12 +1435,10 @@
       key_value_store_->Put(OatHeader::kClassPathKey,
                             OatFile::EncodeDexFileDependencies(class_path_files));
 
-      class_path_class_loader = class_linker->CreatePathClassLoader(self,
-                                                                    class_path_files,
-                                                                    nullptr);
+      // Then the dex files we'll compile. Thus we'll resolve the class-path first.
+      class_path_files.insert(class_path_files.end(), dex_files_.begin(), dex_files_.end());
 
-      // Class path loader as parent so that we'll resolve there first.
-      class_loader = class_linker->CreatePathClassLoader(self, dex_files_, class_path_class_loader);
+      class_loader = class_linker->CreatePathClassLoader(self, class_path_files);
     }
 
     // Find the dex file we should not inline from.
@@ -1536,8 +1544,6 @@
                                      thread_count_,
                                      dump_stats_,
                                      dump_passes_,
-                                     dump_cfg_file_name_,
-                                     dump_cfg_append_,
                                      compiler_phases_timings_.get(),
                                      swap_fd_,
                                      &dex_file_oat_filename_map_,
@@ -1546,57 +1552,56 @@
     driver_->CompileAll(class_loader, dex_files_, timings_);
   }
 
-  // TODO: Update comments about how this works for multi image. b/26317072
-  // Notes on the interleaving of creating the image and oat file to
+  // Notes on the interleaving of creating the images and oat files to
   // ensure the references between the two are correct.
   //
   // Currently we have a memory layout that looks something like this:
   //
   // +--------------+
-  // | image        |
+  // | images       |
   // +--------------+
-  // | boot oat     |
+  // | oat files    |
   // +--------------+
   // | alloc spaces |
   // +--------------+
   //
-  // There are several constraints on the loading of the image and boot.oat.
+  // There are several constraints on the loading of the images and oat files.
   //
-  // 1. The image is expected to be loaded at an absolute address and
-  // contains Objects with absolute pointers within the image.
+  // 1. The images are expected to be loaded at an absolute address and
+  // contain Objects with absolute pointers within the images.
   //
-  // 2. There are absolute pointers from Methods in the image to their
-  // code in the oat.
+  // 2. There are absolute pointers from Methods in the images to their
+  // code in the oat files.
   //
-  // 3. There are absolute pointers from the code in the oat to Methods
-  // in the image.
+  // 3. There are absolute pointers from the code in the oat files to Methods
+  // in the images.
   //
-  // 4. There are absolute pointers from code in the oat to other code
-  // in the oat.
+  // 4. There are absolute pointers from code in the oat files to other code
+  // in the oat files.
   //
   // To get this all correct, we go through several steps.
   //
-  // 1. We prepare offsets for all data in the oat file and calculate
+  // 1. We prepare offsets for all data in the oat files and calculate
   // the oat data size and code size. During this stage, we also set
   // oat code offsets in methods for use by the image writer.
   //
-  // 2. We prepare offsets for the objects in the image and calculate
-  // the image size.
+  // 2. We prepare offsets for the objects in the images and calculate
+  // the image sizes.
   //
-  // 3. We create the oat file. Originally this was just our own proprietary
+  // 3. We create the oat files. Originally this was just our own proprietary
   // file but now it is contained within an ELF dynamic object (aka an .so
-  // file). Since we know the image size and oat data size and code size we
+  // file). Since we know the image sizes and oat data sizes and code sizes we
   // can prepare the ELF headers and we then know the ELF memory segment
   // layout and we can now resolve all references. The compiler provides
   // LinkerPatch information in each CompiledMethod and we resolve these,
   // using the layout information and image object locations provided by
   // image writer, as we're writing the method code.
   //
-  // 4. We create the image file. It needs to know where the oat file
-  // will be loaded after itself. Originally when oat file was simply
-  // memory mapped so we could predict where its contents were based
-  // on the file size. Now that it is an ELF file, we need to inspect
-  // the ELF file to understand the in memory segment layout including
+  // 4. We create the image files. They need to know where the oat files
+  // will be loaded after itself. Originally oat files were simply
+  // memory mapped so we could predict where their contents were based
+  // on the file size. Now that they are ELF files, we need to inspect
+  // the ELF files to understand the in memory segment layout including
   // where the oat header is located within.
   // TODO: We could just remember this information from step 3.
   //
@@ -1890,8 +1895,8 @@
     return result;
   }
 
-  static size_t OpenDexFiles(const std::vector<const char*>& dex_filenames,
-                             const std::vector<const char*>& dex_locations,
+  static size_t OpenDexFiles(std::vector<const char*>& dex_filenames,
+                             std::vector<const char*>& dex_locations,
                              std::vector<std::unique_ptr<const DexFile>>* dex_files) {
     DCHECK(dex_files != nullptr) << "OpenDexFiles out-param is nullptr";
     size_t failure_count = 0;
@@ -1902,6 +1907,9 @@
       std::string error_msg;
       if (!OS::FileExists(dex_filename)) {
         LOG(WARNING) << "Skipping non-existent dex file '" << dex_filename << "'";
+        dex_filenames.erase(dex_filenames.begin() + i);
+        dex_locations.erase(dex_locations.begin() + i);
+        i--;
         continue;
       }
       if (!DexFile::Open(dex_filename, dex_location, &error_msg, dex_files)) {
@@ -2331,6 +2339,7 @@
   std::unique_ptr<std::unordered_set<std::string>> compiled_methods_;
   bool app_image_;
   bool boot_image_;
+  bool multi_image_;
   bool is_host_;
   std::string android_root_;
   std::vector<const DexFile*> dex_files_;
@@ -2346,8 +2355,6 @@
   bool dump_passes_;
   bool dump_timing_;
   bool dump_slow_timing_;
-  std::string dump_cfg_file_name_;
-  bool dump_cfg_append_;
   std::string swap_file_name_;
   int swap_fd_;
   std::string app_image_file_name_;
@@ -2487,11 +2494,6 @@
     }
   }
 
-  // Check early that the result of compilation can be written
-  if (!dex2oat.OpenFile()) {
-    return EXIT_FAILURE;
-  }
-
   // Print the complete line when any of the following is true:
   //   1) Debug build
   //   2) Compiling an image
@@ -2505,6 +2507,11 @@
   }
 
   if (!dex2oat.Setup()) {
+    return EXIT_FAILURE;
+  }
+
+  // Check early that the result of compilation can be written
+  if (!dex2oat.OpenFile()) {
     dex2oat.EraseOatFiles();
     return EXIT_FAILURE;
   }
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index ee7b21c..f922687 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -56,7 +56,7 @@
   // R-type instructions.
   { kRTypeMask, 0, "sll", "DTA", },
   // 0, 1, movci
-  { kRTypeMask, 2, "srl", "DTA", },
+  { kRTypeMask | (0x1f << 21), 2, "srl", "DTA", },
   { kRTypeMask, 3, "sra", "DTA", },
   { kRTypeMask | (0x1f << 6), 4, "sllv", "DTS", },
   { kRTypeMask | (0x1f << 6), 6, "srlv", "DTS", },
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 52c6524..7b9ce5b 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -395,6 +395,9 @@
     os << "MAGIC:\n";
     os << oat_header.GetMagic() << "\n\n";
 
+    os << "LOCATION:\n";
+    os << oat_file_.GetLocation() << "\n\n";
+
     os << "CHECKSUM:\n";
     os << StringPrintf("0x%08x\n\n", oat_header.GetChecksum());
 
@@ -2424,7 +2427,7 @@
 
   // Need a class loader.
   // Fake that we're a compiler.
-  jobject class_loader = class_linker->CreatePathClassLoader(self, class_path, /*parent*/nullptr);
+  jobject class_loader = class_linker->CreatePathClassLoader(self, class_path);
 
   // Use the class loader while dumping.
   StackHandleScope<1> scope(self);
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 14e5ec9..04645d1 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -250,6 +250,8 @@
   thread_android.cc
 
 LIBART_TARGET_SRC_FILES_arm := \
+  interpreter/mterp/mterp.cc \
+  interpreter/mterp/out/mterp_arm.S \
   arch/arm/context_arm.cc.arm \
   arch/arm/entrypoints_init_arm.cc \
   arch/arm/instruction_set_features_assembly_tests.S \
@@ -261,6 +263,7 @@
   arch/arm/fault_handler_arm.cc
 
 LIBART_TARGET_SRC_FILES_arm64 := \
+  interpreter/mterp/mterp_stub.cc \
   arch/arm64/context_arm64.cc \
   arch/arm64/entrypoints_init_arm64.cc \
   arch/arm64/jni_entrypoints_arm64.S \
@@ -271,6 +274,7 @@
   arch/arm64/fault_handler_arm64.cc
 
 LIBART_SRC_FILES_x86 := \
+  interpreter/mterp/mterp_stub.cc \
   arch/x86/context_x86.cc \
   arch/x86/entrypoints_init_x86.cc \
   arch/x86/jni_entrypoints_x86.S \
@@ -285,6 +289,7 @@
 # Note that the fault_handler_x86.cc is not a mistake.  This file is
 # shared between the x86 and x86_64 architectures.
 LIBART_SRC_FILES_x86_64 := \
+  interpreter/mterp/mterp_stub.cc \
   arch/x86_64/context_x86_64.cc \
   arch/x86_64/entrypoints_init_x86_64.cc \
   arch/x86_64/jni_entrypoints_x86_64.S \
@@ -298,6 +303,7 @@
   $(LIBART_SRC_FILES_x86_64) \
 
 LIBART_TARGET_SRC_FILES_mips := \
+  interpreter/mterp/mterp_stub.cc \
   arch/mips/context_mips.cc \
   arch/mips/entrypoints_init_mips.cc \
   arch/mips/jni_entrypoints_mips.S \
@@ -307,6 +313,7 @@
   arch/mips/fault_handler_mips.cc
 
 LIBART_TARGET_SRC_FILES_mips64 := \
+  interpreter/mterp/mterp_stub.cc \
   arch/mips64/context_mips64.cc \
   arch/mips64/entrypoints_init_mips64.cc \
   arch/mips64/jni_entrypoints_mips64.S \
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 0691f2a..699ab3e 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1312,7 +1312,114 @@
 .endm
 
 // Generate the allocation entrypoints for each allocator.
-GENERATE_ALL_ALLOC_ENTRYPOINTS
+GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+ENTRY art_quick_alloc_object_rosalloc
+
+    # Fast path rosalloc allocation
+    # a0: type_idx
+    # a1: ArtMethod*
+    # s1: Thread::Current
+    # -----------------------------
+    # t0: class
+    # t1: object size
+    # t2: rosalloc run
+    # t3: thread stack top offset
+    # t4: thread stack bottom offset
+    # v0: free list head
+    #
+    # t5, t6 : temps
+
+    lw    $t0, ART_METHOD_DEX_CACHE_TYPES_OFFSET_32($a1)       # Load dex cache resolved types
+                                                               # array.
+
+    sll   $t5, $a0, COMPRESSED_REFERENCE_SIZE_SHIFT            # Shift the value.
+    addu  $t5, $t0, $t5                                        # Compute the index.
+    lw    $t0, 0($t5)                                          # Load class (t0).
+    beqz  $t0, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    li    $t6, MIRROR_CLASS_STATUS_INITIALIZED
+    lw    $t5, MIRROR_CLASS_STATUS_OFFSET($t0)                 # Check class status.
+    bne   $t5, $t6, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Add a fake dependence from the following access flag and size loads to the status load. This
+    # is to prevent those loads from being reordered above the status load and reading wrong values.
+    xor   $t5, $t5, $t5
+    addu  $t0, $t0, $t5
+
+    lw    $t5, MIRROR_CLASS_ACCESS_FLAGS_OFFSET($t0)           # Check if access flags has
+    li    $t6, ACCESS_FLAGS_CLASS_IS_FINALIZABLE               # kAccClassIsFinalizable.
+    and   $t6, $t5, $t6
+    bnez  $t6, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    lw    $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1)        # Check if thread local allocation
+    lw    $t4, THREAD_LOCAL_ALLOC_STACK_END_OFFSET($s1)        # stack has any room left.
+    bgeu  $t3, $t4, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    lw    $t1, MIRROR_CLASS_OBJECT_SIZE_OFFSET($t0)            # Load object size (t1).
+    li    $t5, ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE          # Check if size is for a thread local
+                                                               # allocation.
+    bgtu  $t1, $t5, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Compute the rosalloc bracket index from the size. Allign up the size by the rosalloc bracket
+    # quantum size and divide by the quantum size and subtract by 1.
+
+    addiu $t1, $t1, -1                                         # Decrease obj size and shift right
+    srl   $t1, $t1, ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT        # by quantum.
+
+    sll   $t2, $t1, POINTER_SIZE_SHIFT
+    addu  $t2, $t2, $s1
+    lw    $t2, THREAD_ROSALLOC_RUNS_OFFSET($t2)                # Load rosalloc run (t2).
+
+    # Load the free list head (v0).
+    # NOTE: this will be the return val.
+
+    lw    $v0, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)
+    beqz  $v0, .Lart_quick_alloc_object_rosalloc_slow_path
+    nop
+
+    # Load the next pointer of the head and update the list head with the next pointer.
+
+    lw    $t5, ROSALLOC_SLOT_NEXT_OFFSET($v0)
+    sw    $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)
+
+    # Store the class pointer in the header. This also overwrites the first pointer. The offsets are
+    # asserted to match.
+
+#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
+#error "Class pointer needs to overwrite next pointer."
+#endif
+
+    POISON_HEAP_REF $t0
+    sw    $t0, MIRROR_OBJECT_CLASS_OFFSET($v0)
+
+    # Push the new object onto the thread local allocation stack and increment the thread local
+    # allocation stack top.
+
+    sw    $v0, 0($t3)
+    addiu $t3, $t3, COMPRESSED_REFERENCE_SIZE
+    sw    $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1)
+
+    # Decrement the size of the free list.
+
+    lw    $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)
+    addiu $t5, $t5, -1
+    sw    $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)
+
+    sync                                                          # Fence.
+
+    jalr  $zero, $ra
+    nop
+
+  .Lart_quick_alloc_object_rosalloc_slow_path:
+
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    jal   artAllocObjectFromCodeRosAlloc
+    move  $a2 ,$s1                                                # Pass self as argument.
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+END art_quick_alloc_object_rosalloc
 
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 2cb2212..d4b873e 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -429,8 +429,10 @@
         : [result] "=r" (result)
         : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
           [referrer] "r"(referrer), [hidden] "r"(hidden)
-        : "at", "v0", "v1", "t0", "t1", "t2", "t3", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
-          "t8", "t9", "k0", "k1", "fp", "ra",
+        // Instead aliases t0-t3, register names $12-$15 has been used in the clobber list because
+        // t0-t3 are ambiguous.
+        : "at", "v0", "v1", "$12", "$13", "$14", "$15", "s0", "s1", "s2", "s3", "s4", "s5", "s6",
+          "s7", "t8", "t9", "k0", "k1", "fp", "ra",
           "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9", "$f10", "$f11",
           "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22",
           "$f23", "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31",
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index cf548ad..a5f5c49 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -225,8 +225,7 @@
     }
     case kSuper:
       // Constructors and static methods are called with invoke-direct.
-      // Interface methods cannot be invoked with invoke-super.
-      return IsConstructor() || IsStatic() || GetDeclaringClass()->IsInterface();
+      return IsConstructor() || IsStatic();
     case kInterface: {
       mirror::Class* methods_class = GetDeclaringClass();
       return IsDirect() || !(methods_class->IsInterface() || methods_class->IsObjectClass());
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 8efad88..0be2fa2 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -267,7 +267,9 @@
   bool HasSameNameAndSignature(ArtMethod* other) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Find the method that this method overrides.
-  ArtMethod* FindOverriddenMethod(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+  ArtMethod* FindOverriddenMethod(size_t pointer_size)
+      REQUIRES(Roles::uninterruptible_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Find the method index for this method within other_dexfile. If this method isn't present then
   // return DexFile::kDexNoIndex. The name_and_signature_idx MUST refer to a MethodId with the same
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 2b4826e..31610a3 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -133,8 +133,20 @@
 #define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_POS_OFFSET + 2 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
             art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value())
+// Offset of field Thread::tlsPtr_.mterp_current_ibase.
+#define THREAD_CURRENT_IBASE_OFFSET (THREAD_LOCAL_POS_OFFSET + 3 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_CURRENT_IBASE_OFFSET,
+            art::Thread::MterpCurrentIBaseOffset<__SIZEOF_POINTER__>().Int32Value())
+// Offset of field Thread::tlsPtr_.mterp_default_ibase.
+#define THREAD_DEFAULT_IBASE_OFFSET (THREAD_LOCAL_POS_OFFSET + 4 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_DEFAULT_IBASE_OFFSET,
+            art::Thread::MterpDefaultIBaseOffset<__SIZEOF_POINTER__>().Int32Value())
+// Offset of field Thread::tlsPtr_.mterp_alt_ibase.
+#define THREAD_ALT_IBASE_OFFSET (THREAD_LOCAL_POS_OFFSET + 5 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_ALT_IBASE_OFFSET,
+            art::Thread::MterpAltIBaseOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.rosalloc_runs.
-#define THREAD_ROSALLOC_RUNS_OFFSET (THREAD_LOCAL_POS_OFFSET + 3 * __SIZEOF_POINTER__)
+#define THREAD_ROSALLOC_RUNS_OFFSET (THREAD_LOCAL_POS_OFFSET + 6 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_ROSALLOC_RUNS_OFFSET,
             art::Thread::RosAllocRunsOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_alloc_stack_top.
@@ -146,6 +158,40 @@
 ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_END_OFFSET,
             art::Thread::ThreadLocalAllocStackEndOffset<__SIZEOF_POINTER__>().Int32Value())
 
+// Offsets within ShadowFrame.
+#define SHADOWFRAME_LINK_OFFSET 0
+ADD_TEST_EQ(SHADOWFRAME_LINK_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::LinkOffset()))
+#define SHADOWFRAME_METHOD_OFFSET (SHADOWFRAME_LINK_OFFSET + 1 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(SHADOWFRAME_METHOD_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::MethodOffset()))
+#define SHADOWFRAME_RESULT_REGISTER_OFFSET (SHADOWFRAME_LINK_OFFSET + 2 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(SHADOWFRAME_RESULT_REGISTER_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::ResultRegisterOffset()))
+#define SHADOWFRAME_DEX_PC_PTR_OFFSET (SHADOWFRAME_LINK_OFFSET + 3 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(SHADOWFRAME_DEX_PC_PTR_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::DexPCPtrOffset()))
+#define SHADOWFRAME_CODE_ITEM_OFFSET (SHADOWFRAME_LINK_OFFSET + 4 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(SHADOWFRAME_CODE_ITEM_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::CodeItemOffset()))
+#define SHADOWFRAME_LOCK_COUNT_DATA_OFFSET (SHADOWFRAME_LINK_OFFSET + 5 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(SHADOWFRAME_LOCK_COUNT_DATA_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::LockCountDataOffset()))
+#define SHADOWFRAME_NUMBER_OF_VREGS_OFFSET (SHADOWFRAME_LINK_OFFSET + 6 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::NumberOfVRegsOffset()))
+#define SHADOWFRAME_DEX_PC_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 4)
+ADD_TEST_EQ(SHADOWFRAME_DEX_PC_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::DexPCOffset()))
+#define SHADOWFRAME_VREGS_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 8)
+ADD_TEST_EQ(SHADOWFRAME_VREGS_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::VRegsOffset()))
+
+// Offsets within CodeItem
+#define CODEITEM_INSNS_OFFSET 16
+ADD_TEST_EQ(CODEITEM_INSNS_OFFSET,
+            static_cast<int32_t>(OFFSETOF_MEMBER(art::DexFile::CodeItem, insns_)))
+
 // Offsets within java.lang.Object.
 #define MIRROR_OBJECT_CLASS_OFFSET 0
 ADD_TEST_EQ(MIRROR_OBJECT_CLASS_OFFSET, art::mirror::Object::ClassOffset().Int32Value())
@@ -188,6 +234,26 @@
 ADD_TEST_EQ(MIRROR_CHAR_ARRAY_DATA_OFFSET,
             art::mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value())
 
+#define MIRROR_BOOLEAN_ARRAY_DATA_OFFSET MIRROR_CHAR_ARRAY_DATA_OFFSET
+ADD_TEST_EQ(MIRROR_BOOLEAN_ARRAY_DATA_OFFSET,
+            art::mirror::Array::DataOffset(sizeof(uint8_t)).Int32Value())
+
+#define MIRROR_BYTE_ARRAY_DATA_OFFSET MIRROR_CHAR_ARRAY_DATA_OFFSET
+ADD_TEST_EQ(MIRROR_BYTE_ARRAY_DATA_OFFSET,
+            art::mirror::Array::DataOffset(sizeof(int8_t)).Int32Value())
+
+#define MIRROR_SHORT_ARRAY_DATA_OFFSET MIRROR_CHAR_ARRAY_DATA_OFFSET
+ADD_TEST_EQ(MIRROR_SHORT_ARRAY_DATA_OFFSET,
+            art::mirror::Array::DataOffset(sizeof(int16_t)).Int32Value())
+
+#define MIRROR_INT_ARRAY_DATA_OFFSET MIRROR_CHAR_ARRAY_DATA_OFFSET
+ADD_TEST_EQ(MIRROR_INT_ARRAY_DATA_OFFSET,
+            art::mirror::Array::DataOffset(sizeof(int32_t)).Int32Value())
+
+#define MIRROR_WIDE_ARRAY_DATA_OFFSET (8 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_WIDE_ARRAY_DATA_OFFSET,
+            art::mirror::Array::DataOffset(sizeof(uint64_t)).Int32Value())
+
 #define MIRROR_OBJECT_ARRAY_DATA_OFFSET (4 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_OBJECT_ARRAY_DATA_OFFSET,
     art::mirror::Array::DataOffset(
@@ -299,6 +365,12 @@
 // Assert this so that we can avoid zeroing the next field by installing the class pointer.
 ADD_TEST_EQ(ROSALLOC_SLOT_NEXT_OFFSET, MIRROR_OBJECT_CLASS_OFFSET)
 
+#define THREAD_SUSPEND_REQUEST 1
+ADD_TEST_EQ(THREAD_SUSPEND_REQUEST, static_cast<int32_t>(art::kSuspendRequest))
+
+#define THREAD_CHECKPOINT_REQUEST 2
+ADD_TEST_EQ(THREAD_CHECKPOINT_REQUEST, static_cast<int32_t>(art::kCheckpointRequest))
+
 #if defined(__cplusplus)
 }  // End of CheckAsmSupportOffsets.
 #endif
diff --git a/runtime/base/allocator.h b/runtime/base/allocator.h
index 969f5b9..cea7046 100644
--- a/runtime/base/allocator.h
+++ b/runtime/base/allocator.h
@@ -158,7 +158,7 @@
 
 template<class Key, class T, AllocatorTag kTag, class Compare = std::less<Key>>
 using AllocationTrackingMultiMap = std::multimap<
-    Key, T, Compare, TrackingAllocator<std::pair<Key, T>, kTag>>;
+    Key, T, Compare, TrackingAllocator<std::pair<const Key, T>, kTag>>;
 
 template<class Key, AllocatorTag kTag, class Compare = std::less<Key>>
 using AllocationTrackingSet = std::set<Key, Compare, TrackingAllocator<Key, kTag>>;
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 70bd398..82a5f96 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -855,6 +855,18 @@
       PLOG(FATAL) << "futex wait failed for " << name_;
     }
   }
+  if (self != nullptr) {
+    JNIEnvExt* const env = self->GetJniEnv();
+    if (UNLIKELY(env != nullptr && env->runtime_deleted)) {
+      CHECK(self->IsDaemon());
+      // If the runtime has been deleted, then we cannot proceed. Just sleep forever. This may
+      // occur for user daemon threads that get a spurious wakeup. This occurs for test 132 with
+      // --host and --gdb.
+      // After we wake up, the runtime may have been shutdown, which means that this condition may
+      // have been deleted. It is not safe to retry the wait.
+      SleepForever();
+    }
+  }
   guard_.ExclusiveLock(self);
   CHECK_GE(num_waiters_, 0);
   num_waiters_--;
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 263f50d..f674a6f 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -75,6 +75,7 @@
   kReferenceQueueWeakReferencesLock,
   kReferenceQueueClearedReferencesLock,
   kReferenceProcessorLock,
+  kJitDebugInterfaceLock,
   kJitCodeCacheLock,
   kAllocSpaceLock,
   kBumpPointerSpaceBlockLock,
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 41842e8..ed833c4 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -46,6 +46,7 @@
 #include "dex_file-inl.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
+#include "experimental_flags.h"
 #include "gc_root-inl.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/heap_bitmap.h"
@@ -1879,6 +1880,9 @@
    */
   Dbg::PostClassPrepare(h_new_class.Get());
 
+  // Notify native debugger of the new class and its layout.
+  jit::Jit::NewTypeLoadedIfUsingJit(h_new_class.Get());
+
   return h_new_class.Get();
 }
 
@@ -2765,6 +2769,7 @@
 
   mirror::Class* existing = InsertClass(descriptor, new_class.Get(), hash);
   if (existing == nullptr) {
+    jit::Jit::NewTypeLoadedIfUsingJit(new_class.Get());
     return new_class.Get();
   }
   // Another thread must have loaded the class after we
@@ -4825,7 +4830,6 @@
       return false;
     }
     bool has_defaults = false;
-    // TODO May need to replace this with real VTable for invoke_super
     // Assign each method an IMT index and set the default flag.
     for (size_t i = 0; i < num_virtual_methods; ++i) {
       ArtMethod* m = klass->GetVirtualMethodDuringLinking(i, image_pointer_size_);
@@ -5463,6 +5467,53 @@
   }
 }
 
+static void FillImtFromSuperClass(Handle<mirror::Class> klass,
+                                  Handle<mirror::IfTable> iftable,
+                                  ArtMethod* unimplemented_method,
+                                  ArtMethod* imt_conflict_method,
+                                  ArtMethod** out_imt,
+                                  size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_) {
+  DCHECK(klass->HasSuperClass());
+  mirror::Class* super_class = klass->GetSuperClass();
+  if (super_class->ShouldHaveEmbeddedImtAndVTable()) {
+    for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+      out_imt[i] = super_class->GetEmbeddedImTableEntry(i, pointer_size);
+    }
+  } else {
+    // No imt in the super class, need to reconstruct from the iftable.
+    mirror::IfTable* if_table = super_class->GetIfTable();
+    const size_t length = super_class->GetIfTableCount();
+    for (size_t i = 0; i < length; ++i) {
+      mirror::Class* interface = iftable->GetInterface(i);
+      const size_t num_virtuals = interface->NumDeclaredVirtualMethods();
+      const size_t method_array_count = if_table->GetMethodArrayCount(i);
+      DCHECK_EQ(num_virtuals, method_array_count);
+      if (method_array_count == 0) {
+        continue;
+      }
+      auto* method_array = if_table->GetMethodArray(i);
+      for (size_t j = 0; j < num_virtuals; ++j) {
+        auto method = method_array->GetElementPtrSize<ArtMethod*>(j, pointer_size);
+        DCHECK(method != nullptr) << PrettyClass(super_class);
+        // Miranda methods cannot be used to implement an interface method and defaults should be
+        // skipped in case we override it.
+        if (method->IsDefault() || method->IsMiranda()) {
+          continue;
+        }
+        ArtMethod* interface_method = interface->GetVirtualMethod(j, pointer_size);
+        uint32_t imt_index = interface_method->GetDexMethodIndex() % mirror::Class::kImtSize;
+        auto** imt_ref = &out_imt[imt_index];
+        if (*imt_ref == unimplemented_method) {
+          *imt_ref = method;
+        } else if (*imt_ref != imt_conflict_method) {
+          *imt_ref = imt_conflict_method;
+        }
+      }
+    }
+  }
+}
+
+// TODO This method needs to be split up into several smaller methods.
 bool ClassLinker::LinkInterfaceMethods(
     Thread* self,
     Handle<mirror::Class> klass,
@@ -5470,7 +5521,19 @@
     ArtMethod** out_imt) {
   StackHandleScope<3> hs(self);
   Runtime* const runtime = Runtime::Current();
+
+  const bool is_interface = klass->IsInterface();
+  // TODO It might in the future prove useful to make interfaces have full iftables, allowing a
+  // faster invoke-super implementation in the interpreter/across dex-files.
+  // We will just skip doing any of this on non-debug builds for speed.
+  if (is_interface &&
+      !kIsDebugBuild &&
+      !runtime->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods)) {
+    return true;
+  }
+
   const bool has_superclass = klass->HasSuperClass();
+  const bool fill_tables = !is_interface;
   const size_t super_ifcount = has_superclass ? klass->GetSuperClass()->GetIfTableCount() : 0U;
   const size_t method_alignment = ArtMethod::Alignment(image_pointer_size_);
   const size_t method_size = ArtMethod::Size(image_pointer_size_);
@@ -5478,10 +5541,6 @@
 
   MutableHandle<mirror::IfTable> iftable(hs.NewHandle(klass->GetIfTable()));
 
-  // If we're an interface, we don't need the vtable pointers, so we're done.
-  if (klass->IsInterface()) {
-    return true;
-  }
   // These are allocated on the heap to begin, we then transfer to linear alloc when we re-create
   // the virtual methods array.
   // Need to use low 4GB arenas for compiler or else the pointers wont fit in 32 bit method array
@@ -5498,71 +5557,42 @@
   ArtMethod* const unimplemented_method = runtime->GetImtUnimplementedMethod();
   ArtMethod* const imt_conflict_method = runtime->GetImtConflictMethod();
   // Copy the IMT from the super class if possible.
-  bool extend_super_iftable = false;
-  if (has_superclass) {
-    mirror::Class* super_class = klass->GetSuperClass();
-    extend_super_iftable = true;
-    if (super_class->ShouldHaveEmbeddedImtAndVTable()) {
-      for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-        out_imt[i] = super_class->GetEmbeddedImTableEntry(i, image_pointer_size_);
-      }
-    } else {
-      // No imt in the super class, need to reconstruct from the iftable.
-      mirror::IfTable* if_table = super_class->GetIfTable();
-      const size_t length = super_class->GetIfTableCount();
-      for (size_t i = 0; i < length; ++i) {
-        mirror::Class* interface = iftable->GetInterface(i);
-        const size_t num_virtuals = interface->NumVirtualMethods();
-        const size_t method_array_count = if_table->GetMethodArrayCount(i);
-        DCHECK_EQ(num_virtuals, method_array_count);
-        if (method_array_count == 0) {
-          continue;
-        }
-        auto* method_array = if_table->GetMethodArray(i);
-        for (size_t j = 0; j < num_virtuals; ++j) {
-          auto method = method_array->GetElementPtrSize<ArtMethod*>(j, image_pointer_size_);
-          DCHECK(method != nullptr) << PrettyClass(super_class);
-          // Miranda methods cannot be used to implement an interface method and defaults should be
-          // skipped in case we override it.
-          if (method->IsDefault() || method->IsMiranda()) {
-            continue;
-          }
-          ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_);
-          uint32_t imt_index = interface_method->GetDexMethodIndex() % mirror::Class::kImtSize;
-          auto** imt_ref = &out_imt[imt_index];
-          if (*imt_ref == unimplemented_method) {
-            *imt_ref = method;
-          } else if (*imt_ref != imt_conflict_method) {
-            *imt_ref = imt_conflict_method;
-          }
-        }
-      }
-    }
+  const bool extend_super_iftable = has_superclass;
+  if (has_superclass && fill_tables) {
+    FillImtFromSuperClass(klass,
+                          iftable,
+                          unimplemented_method,
+                          imt_conflict_method,
+                          out_imt,
+                          image_pointer_size_);
   }
+
   // Allocate method arrays before since we don't want miss visiting miranda method roots due to
   // thread suspension.
-  for (size_t i = 0; i < ifcount; ++i) {
-    size_t num_methods = iftable->GetInterface(i)->NumVirtualMethods();
-    if (num_methods > 0) {
-      const bool is_super = i < super_ifcount;
-      // This is an interface implemented by a super-class. Therefore we can just copy the method
-      // array from the superclass.
-      const bool super_interface = is_super && extend_super_iftable;
-      mirror::PointerArray* method_array;
-      if (super_interface) {
-        mirror::IfTable* if_table = klass->GetSuperClass()->GetIfTable();
-        DCHECK(if_table != nullptr);
-        DCHECK(if_table->GetMethodArray(i) != nullptr);
-        // If we are working on a super interface, try extending the existing method array.
-        method_array = down_cast<mirror::PointerArray*>(if_table->GetMethodArray(i)->Clone(self));
-      } else {
-        method_array = AllocPointerArray(self, num_methods);
+  if (fill_tables) {
+    for (size_t i = 0; i < ifcount; ++i) {
+      size_t num_methods = iftable->GetInterface(i)->NumDeclaredVirtualMethods();
+      if (num_methods > 0) {
+        const bool is_super = i < super_ifcount;
+        // This is an interface implemented by a super-class. Therefore we can just copy the method
+        // array from the superclass.
+        const bool super_interface = is_super && extend_super_iftable;
+        mirror::PointerArray* method_array;
+        if (super_interface) {
+          mirror::IfTable* if_table = klass->GetSuperClass()->GetIfTable();
+          DCHECK(if_table != nullptr);
+          DCHECK(if_table->GetMethodArray(i) != nullptr);
+          // If we are working on a super interface, try extending the existing method array.
+          method_array = down_cast<mirror::PointerArray*>(if_table->GetMethodArray(i)->Clone(self));
+        } else {
+          method_array = AllocPointerArray(self, num_methods);
+        }
+        if (UNLIKELY(method_array == nullptr)) {
+          self->AssertPendingOOMException();
+          return false;
+        }
+        iftable->SetMethodArray(i, method_array);
       }
-      if (UNLIKELY(method_array == nullptr)) {
-        self->AssertPendingOOMException();
-        return false;
-      }
-      iftable->SetMethodArray(i, method_array);
     }
   }
 
@@ -5578,12 +5608,16 @@
     DCHECK_GE(i, 0u);
     DCHECK_LT(i, ifcount);
 
-    size_t num_methods = iftable->GetInterface(i)->NumVirtualMethods();
+    size_t num_methods = iftable->GetInterface(i)->NumDeclaredVirtualMethods();
     if (num_methods > 0) {
       StackHandleScope<2> hs2(self);
       const bool is_super = i < super_ifcount;
       const bool super_interface = is_super && extend_super_iftable;
-      auto method_array(hs2.NewHandle(iftable->GetMethodArray(i)));
+      // We don't actually create or fill these tables for interfaces, we just copy some methods for
+      // conflict methods. Just set this as nullptr in those cases.
+      Handle<mirror::PointerArray> method_array(fill_tables
+                                                ? hs2.NewHandle(iftable->GetMethodArray(i))
+                                                : hs2.NewHandle<mirror::PointerArray>(nullptr));
 
       ArraySlice<ArtMethod> input_virtual_methods;
       ScopedNullHandle<mirror::PointerArray> null_handle;
@@ -5596,7 +5630,7 @@
       //      at the super-classes iftable methods (copied into method_array previously) when we are
       //      looking for the implementation of a super-interface method but that is rather dirty.
       bool using_virtuals;
-      if (super_interface) {
+      if (super_interface || is_interface) {
         // If we are overwriting a super class interface, try to only virtual methods instead of the
         // whole vtable.
         using_virtuals = true;
@@ -5606,6 +5640,7 @@
         // For a new interface, however, we need the whole vtable in case a new
         // interface method is implemented in the whole superclass.
         using_virtuals = false;
+        DCHECK(vtable.Get() != nullptr);
         input_vtable_array = vtable;
         input_array_length = input_vtable_array->GetLength();
       }
@@ -5655,13 +5690,15 @@
               break;
             } else {
               found_impl = true;
-              method_array->SetElementPtrSize(j, vtable_method, image_pointer_size_);
-              // Place method in imt if entry is empty, place conflict otherwise.
-              SetIMTRef(unimplemented_method,
-                        imt_conflict_method,
-                        image_pointer_size_,
-                        vtable_method,
-                        /*out*/imt_ptr);
+              if (LIKELY(fill_tables)) {
+                method_array->SetElementPtrSize(j, vtable_method, image_pointer_size_);
+                // Place method in imt if entry is empty, place conflict otherwise.
+                SetIMTRef(unimplemented_method,
+                          imt_conflict_method,
+                          image_pointer_size_,
+                          vtable_method,
+                          /*out*/imt_ptr);
+              }
               break;
             }
           }
@@ -5688,7 +5725,7 @@
               method_array->GetElementPtrSize<ArtMethod*>(j, image_pointer_size_);
           DCHECK(supers_method != nullptr);
           DCHECK(interface_name_comparator.HasSameNameAndSignature(supers_method));
-          if (!supers_method->IsOverridableByDefaultMethod()) {
+          if (LIKELY(!supers_method->IsOverridableByDefaultMethod())) {
             // The method is not overridable by a default method (i.e. it is directly implemented
             // in some class). Therefore move onto the next interface method.
             continue;
@@ -5710,11 +5747,13 @@
             } else {
               // See if we already have a conflict method for this method.
               ArtMethod* preexisting_conflict = FindSameNameAndSignature(interface_name_comparator,
-                                                                          default_conflict_methods);
+                                                                         default_conflict_methods);
               if (LIKELY(preexisting_conflict != nullptr)) {
                 // We already have another conflict we can reuse.
                 default_conflict_method = preexisting_conflict;
               } else {
+                // Note that we do this even if we are an interface since we need to create this and
+                // cannot reuse another classes.
                 // Create a new conflict method for this to use.
                 default_conflict_method =
                     reinterpret_cast<ArtMethod*>(allocator.Alloc(method_size));
@@ -5724,7 +5763,7 @@
             }
             current_method = default_conflict_method;
             break;
-          }
+          }  // case kDefaultConflict
           case DefaultMethodSearchResult::kDefaultFound: {
             DCHECK(current_method != nullptr);
             // Found a default method.
@@ -5733,8 +5772,12 @@
               // We found a default method but it was the same one we already have from our
               // superclass. Don't bother adding it to our vtable again.
               current_method = vtable_impl;
-            } else {
+            } else if (LIKELY(fill_tables)) {
+              // Interfaces don't need to copy default methods since they don't have vtables.
               // Only record this default method if it is new to save space.
+              // TODO It might be worthwhile to copy default methods on interfaces anyway since it
+              //      would make lookup for interface super much faster. (We would only need to scan
+              //      the iftable to find if there is a NSME or AME.)
               ArtMethod* old = FindSameNameAndSignature(interface_name_comparator, default_methods);
               if (old == nullptr) {
                 // We found a default method implementation and there were no conflicts.
@@ -5745,7 +5788,7 @@
               }
             }
             break;
-          }
+          }  // case kDefaultFound
           case DefaultMethodSearchResult::kAbstractFound: {
             DCHECK(current_method == nullptr);
             // Abstract method masks all defaults.
@@ -5757,38 +5800,46 @@
               current_method = vtable_impl;
             }
             break;
+          }  // case kAbstractFound
+        }
+        if (LIKELY(fill_tables)) {
+          if (current_method != nullptr) {
+            // We found a default method implementation. Record it in the iftable and IMT.
+            method_array->SetElementPtrSize(j, current_method, image_pointer_size_);
+            SetIMTRef(unimplemented_method,
+                      imt_conflict_method,
+                      image_pointer_size_,
+                      current_method,
+                      /*out*/imt_ptr);
+          } else if (!super_interface) {
+            // We could not find an implementation for this method and since it is a brand new
+            // interface we searched the entire vtable (and all default methods) for an
+            // implementation but couldn't find one. We therefore need to make a miranda method.
+            //
+            // Find out if there is already a miranda method we can use.
+            ArtMethod* miranda_method = FindSameNameAndSignature(interface_name_comparator,
+                                                                 miranda_methods);
+            if (miranda_method == nullptr) {
+              DCHECK(interface_method->IsAbstract()) << PrettyMethod(interface_method);
+              miranda_method = reinterpret_cast<ArtMethod*>(allocator.Alloc(method_size));
+              CHECK(miranda_method != nullptr);
+              // Point the interface table at a phantom slot.
+              new(miranda_method) ArtMethod(interface_method, image_pointer_size_);
+              miranda_methods.push_back(miranda_method);
+            }
+            method_array->SetElementPtrSize(j, miranda_method, image_pointer_size_);
           }
         }
-        if (current_method != nullptr) {
-          // We found a default method implementation. Record it in the iftable and IMT.
-          method_array->SetElementPtrSize(j, current_method, image_pointer_size_);
-          SetIMTRef(unimplemented_method,
-                    imt_conflict_method,
-                    image_pointer_size_,
-                    current_method,
-                    /*out*/imt_ptr);
-        } else if (!super_interface) {
-          // We could not find an implementation for this method and since it is a brand new
-          // interface we searched the entire vtable (and all default methods) for an implementation
-          // but couldn't find one. We therefore need to make a miranda method.
-          //
-          // Find out if there is already a miranda method we can use.
-          ArtMethod* miranda_method = FindSameNameAndSignature(interface_name_comparator,
-                                                               miranda_methods);
-          if (miranda_method == nullptr) {
-            DCHECK(interface_method->IsAbstract()) << PrettyMethod(interface_method);
-            miranda_method = reinterpret_cast<ArtMethod*>(allocator.Alloc(method_size));
-            CHECK(miranda_method != nullptr);
-            // Point the interface table at a phantom slot.
-            new(miranda_method) ArtMethod(interface_method, image_pointer_size_);
-            miranda_methods.push_back(miranda_method);
-          }
-          method_array->SetElementPtrSize(j, miranda_method, image_pointer_size_);
-        }
-      }
-    }
-  }
-  if (!miranda_methods.empty() || !default_methods.empty() || !default_conflict_methods.empty()) {
+      }  // For each method in interface end.
+    }  // if (num_methods > 0)
+  }  // For each interface.
+  const bool has_new_virtuals = !(miranda_methods.empty() &&
+                                  default_methods.empty() &&
+                                  default_conflict_methods.empty());
+  // TODO don't extend virtuals of interface unless necessary (when is it?).
+  if (has_new_virtuals) {
+    DCHECK(!is_interface || (default_methods.empty() && miranda_methods.empty()))
+        << "Interfaces should only have default-conflict methods appended to them.";
     VLOG(class_linker) << PrettyClass(klass.Get()) << ": miranda_methods=" << miranda_methods.size()
                        << " default_methods=" << default_methods.size()
                        << " default_conflict_methods=" << default_conflict_methods.size();
@@ -5885,101 +5936,102 @@
     // suspension assert.
     self->EndAssertNoThreadSuspension(old_cause);
 
-    const size_t old_vtable_count = vtable->GetLength();
-    const size_t new_vtable_count = old_vtable_count +
-                                    miranda_methods.size() +
-                                    default_methods.size() +
-                                    default_conflict_methods.size();
-    miranda_methods.clear();
-    vtable.Assign(down_cast<mirror::PointerArray*>(vtable->CopyOf(self, new_vtable_count)));
-    if (UNLIKELY(vtable.Get() == nullptr)) {
-      self->AssertPendingOOMException();
-      return false;
-    }
-    out = methods->begin(method_size, method_alignment) + old_method_count;
-    size_t vtable_pos = old_vtable_count;
-    for (size_t i = old_method_count; i < new_method_count; ++i) {
-      // Leave the declaring class alone as type indices are relative to it
-      out->SetMethodIndex(0xFFFF & vtable_pos);
-      vtable->SetElementPtrSize(vtable_pos, &*out, image_pointer_size_);
-      ++out;
-      ++vtable_pos;
-    }
-    CHECK_EQ(vtable_pos, new_vtable_count);
-    // Update old vtable methods. We use the default_translations map to figure out what each vtable
-    // entry should be updated to, if they need to be at all.
-    for (size_t i = 0; i < old_vtable_count; ++i) {
-      ArtMethod* translated_method = vtable->GetElementPtrSize<ArtMethod*>(i, image_pointer_size_);
-      // Try and find what we need to change this method to.
-      auto translation_it = default_translations.find(i);
-      bool found_translation = false;
-      if (translation_it != default_translations.end()) {
-        if (translation_it->second.IsInConflict()) {
-          // Find which conflict method we are to use for this method.
-          MethodNameAndSignatureComparator old_method_comparator(
-              translated_method->GetInterfaceMethodIfProxy(image_pointer_size_));
-          ArtMethod* new_conflict_method = FindSameNameAndSignature(old_method_comparator,
-                                                                    default_conflict_methods);
-          CHECK(new_conflict_method != nullptr) << "Expected a conflict method!";
-          translated_method = new_conflict_method;
-        } else if (translation_it->second.IsAbstract()) {
-          // Find which miranda method we are to use for this method.
-          MethodNameAndSignatureComparator old_method_comparator(
-              translated_method->GetInterfaceMethodIfProxy(image_pointer_size_));
-          ArtMethod* miranda_method = FindSameNameAndSignature(old_method_comparator,
-                                                               miranda_methods);
-          DCHECK(miranda_method != nullptr);
-          translated_method = miranda_method;
-        } else {
-          // Normal default method (changed from an older default or abstract interface method).
-          DCHECK(translation_it->second.IsTranslation());
-          translated_method = translation_it->second.GetTranslation();
+    if (fill_tables) {
+      // Update the vtable to the new method structures. We can skip this for interfaces since they
+      // do not have vtables.
+      const size_t old_vtable_count = vtable->GetLength();
+      const size_t new_vtable_count = old_vtable_count +
+                                      miranda_methods.size() +
+                                      default_methods.size() +
+                                      default_conflict_methods.size();
+      vtable.Assign(down_cast<mirror::PointerArray*>(vtable->CopyOf(self, new_vtable_count)));
+      if (UNLIKELY(vtable.Get() == nullptr)) {
+        self->AssertPendingOOMException();
+        return false;
+      }
+      out = methods->begin(method_size, method_alignment) + old_method_count;
+      size_t vtable_pos = old_vtable_count;
+      // Update all the newly copied method's indexes so they denote their placement in the vtable.
+      for (size_t i = old_method_count; i < new_method_count; ++i) {
+        // Leave the declaring class alone the method's dex_code_item_offset_ and dex_method_index_
+        // fields are references into the dex file the method was defined in. Since the ArtMethod
+        // does not store that information it uses declaring_class_->dex_cache_.
+        out->SetMethodIndex(0xFFFF & vtable_pos);
+        vtable->SetElementPtrSize(vtable_pos, &*out, image_pointer_size_);
+        ++out;
+        ++vtable_pos;
+      }
+      CHECK_EQ(vtable_pos, new_vtable_count);
+      // Update old vtable methods. We use the default_translations map to figure out what each
+      // vtable entry should be updated to, if they need to be at all.
+      for (size_t i = 0; i < old_vtable_count; ++i) {
+        ArtMethod* translated_method = vtable->GetElementPtrSize<ArtMethod*>(
+              i, image_pointer_size_);
+        // Try and find what we need to change this method to.
+        auto translation_it = default_translations.find(i);
+        bool found_translation = false;
+        if (translation_it != default_translations.end()) {
+          if (translation_it->second.IsInConflict()) {
+            // Find which conflict method we are to use for this method.
+            MethodNameAndSignatureComparator old_method_comparator(
+                translated_method->GetInterfaceMethodIfProxy(image_pointer_size_));
+            ArtMethod* new_conflict_method = FindSameNameAndSignature(old_method_comparator,
+                                                                      default_conflict_methods);
+            CHECK(new_conflict_method != nullptr) << "Expected a conflict method!";
+            translated_method = new_conflict_method;
+          } else if (translation_it->second.IsAbstract()) {
+            // Find which miranda method we are to use for this method.
+            MethodNameAndSignatureComparator old_method_comparator(
+                translated_method->GetInterfaceMethodIfProxy(image_pointer_size_));
+            ArtMethod* miranda_method = FindSameNameAndSignature(old_method_comparator,
+                                                                miranda_methods);
+            DCHECK(miranda_method != nullptr);
+            translated_method = miranda_method;
+          } else {
+            // Normal default method (changed from an older default or abstract interface method).
+            DCHECK(translation_it->second.IsTranslation());
+            translated_method = translation_it->second.GetTranslation();
+          }
+          found_translation = true;
         }
-        found_translation = true;
-      }
-      DCHECK(translated_method != nullptr);
-      auto it = move_table.find(translated_method);
-      if (it != move_table.end()) {
-        auto* new_method = it->second;
-        DCHECK(new_method != nullptr);
-        vtable->SetElementPtrSize(i, new_method, image_pointer_size_);
-      } else {
-        // If it was not going to be updated we wouldn't have put it into the default_translations
-        // map.
-        CHECK(!found_translation) << "We were asked to update this vtable entry. Must not fail.";
-      }
-    }
-
-    if (kIsDebugBuild) {
-      for (size_t i = 0; i < new_vtable_count; ++i) {
-        CHECK(move_table.find(vtable->GetElementPtrSize<ArtMethod*>(i, image_pointer_size_)) ==
-              move_table.end());
-      }
-    }
-
-    klass->SetVTable(vtable.Get());
-    // Go fix up all the stale (old miranda or default method) pointers.
-    // First do it on the iftable.
-    for (size_t i = 0; i < ifcount; ++i) {
-      for (size_t j = 0, count = iftable->GetMethodArrayCount(i); j < count; ++j) {
-        auto* method_array = iftable->GetMethodArray(i);
-        auto* m = method_array->GetElementPtrSize<ArtMethod*>(j, image_pointer_size_);
-        DCHECK(m != nullptr) << PrettyClass(klass.Get());
-        auto it = move_table.find(m);
+        DCHECK(translated_method != nullptr);
+        auto it = move_table.find(translated_method);
         if (it != move_table.end()) {
-          auto* new_m = it->second;
-          DCHECK(new_m != nullptr) << PrettyClass(klass.Get());
-          method_array->SetElementPtrSize(j, new_m, image_pointer_size_);
+          auto* new_method = it->second;
+          DCHECK(new_method != nullptr);
+          vtable->SetElementPtrSize(i, new_method, image_pointer_size_);
+        } else {
+          // If it was not going to be updated we wouldn't have put it into the default_translations
+          // map.
+          CHECK(!found_translation) << "We were asked to update this vtable entry. Must not fail.";
+        }
+      }
+      klass->SetVTable(vtable.Get());
+
+      // Go fix up all the stale iftable pointers.
+      for (size_t i = 0; i < ifcount; ++i) {
+        for (size_t j = 0, count = iftable->GetMethodArrayCount(i); j < count; ++j) {
+          auto* method_array = iftable->GetMethodArray(i);
+          auto* m = method_array->GetElementPtrSize<ArtMethod*>(j, image_pointer_size_);
+          DCHECK(m != nullptr) << PrettyClass(klass.Get());
+          auto it = move_table.find(m);
+          if (it != move_table.end()) {
+            auto* new_m = it->second;
+            DCHECK(new_m != nullptr) << PrettyClass(klass.Get());
+            method_array->SetElementPtrSize(j, new_m, image_pointer_size_);
+          }
+        }
+      }
+
+      // Fix up IMT next
+      for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+        auto it = move_table.find(out_imt[i]);
+        if (it != move_table.end()) {
+          out_imt[i] = it->second;
         }
       }
     }
-    // Fix up IMT next
-    for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-      auto it = move_table.find(out_imt[i]);
-      if (it != move_table.end()) {
-        out_imt[i] = it->second;
-      }
-    }
+
     // Check that there are no stale methods are in the dex cache array.
     if (kIsDebugBuild) {
       auto* resolved_methods = klass->GetDexCache()->GetResolvedMethods();
@@ -6003,7 +6055,7 @@
   } else {
     self->EndAssertNoThreadSuspension(old_cause);
   }
-  if (kIsDebugBuild) {
+  if (kIsDebugBuild && !is_interface) {
     SanityCheckVTable(klass, image_pointer_size_);
   }
   return true;
@@ -6393,7 +6445,13 @@
         DCHECK(resolved == nullptr || resolved->GetDeclaringClass()->IsInterface());
       }
       break;
-    case kSuper:  // Fall-through.
+    case kSuper:
+      if (klass->IsInterface()) {
+        resolved = klass->FindInterfaceMethod(dex_cache.Get(), method_idx, image_pointer_size_);
+      } else {
+        resolved = klass->FindVirtualMethod(dex_cache.Get(), method_idx, image_pointer_size_);
+      }
+      break;
     case kVirtual:
       resolved = klass->FindVirtualMethod(dex_cache.Get(), method_idx, image_pointer_size_);
       break;
@@ -6415,7 +6473,13 @@
         resolved = klass->FindInterfaceMethod(name, signature, image_pointer_size_);
         DCHECK(resolved == nullptr || resolved->GetDeclaringClass()->IsInterface());
         break;
-      case kSuper:  // Fall-through.
+      case kSuper:
+        if (klass->IsInterface()) {
+          resolved = klass->FindInterfaceMethod(name, signature, image_pointer_size_);
+        } else {
+          resolved = klass->FindVirtualMethod(name, signature, image_pointer_size_);
+        }
+        break;
       case kVirtual:
         resolved = klass->FindVirtualMethod(name, signature, image_pointer_size_);
         break;
@@ -6841,9 +6905,7 @@
   }
 }
 
-jobject ClassLinker::CreatePathClassLoader(Thread* self,
-                                           std::vector<const DexFile*>& dex_files,
-                                           jobject parent_loader) {
+jobject ClassLinker::CreatePathClassLoader(Thread* self, std::vector<const DexFile*>& dex_files) {
   // SOAAlreadyRunnable is protected, and we need something to add a global reference.
   // We could move the jobject to the callers, but all call-sites do this...
   ScopedObjectAccessUnchecked soa(self);
@@ -6874,8 +6936,8 @@
   for (const DexFile* dex_file : dex_files) {
     StackHandleScope<3> hs2(self);
 
-    // CreatePathClassLoader is only used by gtests and dex2oat. Index 0 of h_long_array is
-    // supposed to be the oat file but we can leave it null.
+    // CreatePathClassLoader is only used by gtests. Index 0 of h_long_array is supposed to be the
+    // oat file but we can leave it null.
     Handle<mirror::LongArray> h_long_array = hs2.NewHandle(mirror::LongArray::Alloc(
         self,
         kDexFileIndexStart + 1));
@@ -6921,10 +6983,9 @@
       mirror::Class::FindField(self, hs.NewHandle(h_path_class_loader->GetClass()), "parent",
                                "Ljava/lang/ClassLoader;");
   DCHECK(parent_field != nullptr);
-  mirror::Object* parent = (parent_loader != nullptr)
-      ? soa.Decode<mirror::ClassLoader*>(parent_loader)
-      : soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_BootClassLoader)->AllocObject(self);
-  parent_field->SetObject<false>(h_path_class_loader.Get(), parent);
+  mirror::Object* boot_cl =
+      soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_BootClassLoader)->AllocObject(self);
+  parent_field->SetObject<false>(h_path_class_loader.Get(), boot_cl);
 
   // Make it a global ref and return.
   ScopedLocalRef<jobject> local_ref(
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 9d432c6..f1fd0c3 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -523,10 +523,7 @@
 
   // Creates a GlobalRef PathClassLoader that can be used to load classes from the given dex files.
   // Note: the objects are not completely set up. Do not use this outside of tests and the compiler.
-  // If parent_loader is null then we use the boot class loader.
-  jobject CreatePathClassLoader(Thread* self,
-                                std::vector<const DexFile*>& dex_files,
-                                jobject parent_loader)
+  jobject CreatePathClassLoader(Thread* self, std::vector<const DexFile*>& dex_files)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 99353c5..471d7ca 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -24,6 +24,7 @@
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "dex_file.h"
+#include "experimental_flags.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "gc/heap.h"
 #include "mirror/abstract_method.h"
@@ -228,7 +229,7 @@
       if (klass->IsInterface()) {
         EXPECT_EQ(0U, iftable->GetMethodArrayCount(i));
       } else {
-        EXPECT_EQ(interface->NumVirtualMethods(), iftable->GetMethodArrayCount(i));
+        EXPECT_EQ(interface->NumDeclaredVirtualMethods(), iftable->GetMethodArrayCount(i));
       }
     }
     if (klass->IsAbstract()) {
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 624abb9..3df9101 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -117,14 +117,15 @@
 
 static bool unstarted_initialized_ = false;
 
-CommonRuntimeTest::CommonRuntimeTest() {}
-CommonRuntimeTest::~CommonRuntimeTest() {
+CommonRuntimeTestImpl::CommonRuntimeTestImpl() {}
+
+CommonRuntimeTestImpl::~CommonRuntimeTestImpl() {
   // Ensure the dex files are cleaned up before the runtime.
   loaded_dex_files_.clear();
   runtime_.reset();
 }
 
-void CommonRuntimeTest::SetUpAndroidRoot() {
+void CommonRuntimeTestImpl::SetUpAndroidRoot() {
   if (IsHost()) {
     // $ANDROID_ROOT is set on the device, but not necessarily on the host.
     // But it needs to be set so that icu4c can find its locale data.
@@ -166,7 +167,7 @@
   }
 }
 
-void CommonRuntimeTest::SetUpAndroidData(std::string& android_data) {
+void CommonRuntimeTestImpl::SetUpAndroidData(std::string& android_data) {
   // On target, Cannot use /mnt/sdcard because it is mounted noexec, so use subdir of dalvik-cache
   if (IsHost()) {
     const char* tmpdir = getenv("TMPDIR");
@@ -185,7 +186,8 @@
   setenv("ANDROID_DATA", android_data.c_str(), 1);
 }
 
-void CommonRuntimeTest::TearDownAndroidData(const std::string& android_data, bool fail_on_error) {
+void CommonRuntimeTestImpl::TearDownAndroidData(const std::string& android_data,
+                                                bool fail_on_error) {
   if (fail_on_error) {
     ASSERT_EQ(rmdir(android_data.c_str()), 0);
   } else {
@@ -230,18 +232,18 @@
   }
 
   if (founddir.empty()) {
-    ADD_FAILURE() << "Can not find Android tools directory.";
+    ADD_FAILURE() << "Cannot find Android tools directory.";
   }
   return founddir;
 }
 
-std::string CommonRuntimeTest::GetAndroidHostToolsDir() {
+std::string CommonRuntimeTestImpl::GetAndroidHostToolsDir() {
   return GetAndroidToolsDir("prebuilts/gcc/linux-x86/host",
                             "x86_64-linux-glibc2.15",
                             "x86_64-linux");
 }
 
-std::string CommonRuntimeTest::GetAndroidTargetToolsDir(InstructionSet isa) {
+std::string CommonRuntimeTestImpl::GetAndroidTargetToolsDir(InstructionSet isa) {
   switch (isa) {
     case kArm:
     case kThumb2:
@@ -269,15 +271,16 @@
   return "";
 }
 
-std::string CommonRuntimeTest::GetCoreArtLocation() {
+std::string CommonRuntimeTestImpl::GetCoreArtLocation() {
   return GetCoreFileLocation("art");
 }
 
-std::string CommonRuntimeTest::GetCoreOatLocation() {
+std::string CommonRuntimeTestImpl::GetCoreOatLocation() {
   return GetCoreFileLocation("oat");
 }
 
-std::unique_ptr<const DexFile> CommonRuntimeTest::LoadExpectSingleDexFile(const char* location) {
+std::unique_ptr<const DexFile> CommonRuntimeTestImpl::LoadExpectSingleDexFile(
+    const char* location) {
   std::vector<std::unique_ptr<const DexFile>> dex_files;
   std::string error_msg;
   MemMap::Init();
@@ -290,7 +293,7 @@
   }
 }
 
-void CommonRuntimeTest::SetUp() {
+void CommonRuntimeTestImpl::SetUp() {
   SetUpAndroidRoot();
   SetUpAndroidData(android_data_);
   dalvik_cache_.append(android_data_.c_str());
@@ -345,7 +348,7 @@
   FinalizeSetup();
 }
 
-void CommonRuntimeTest::FinalizeSetup() {
+void CommonRuntimeTestImpl::FinalizeSetup() {
   // Initialize maps for unstarted runtime. This needs to be here, as running clinits needs this
   // set up.
   if (!unstarted_initialized_) {
@@ -369,7 +372,7 @@
   runtime_->GetHeap()->SetMinIntervalHomogeneousSpaceCompactionByOom(0U);
 }
 
-void CommonRuntimeTest::ClearDirectory(const char* dirpath) {
+void CommonRuntimeTestImpl::ClearDirectory(const char* dirpath) {
   ASSERT_TRUE(dirpath != nullptr);
   DIR* dir = opendir(dirpath);
   ASSERT_TRUE(dir != nullptr);
@@ -396,7 +399,7 @@
   closedir(dir);
 }
 
-void CommonRuntimeTest::TearDown() {
+void CommonRuntimeTestImpl::TearDown() {
   const char* android_data = getenv("ANDROID_DATA");
   ASSERT_TRUE(android_data != nullptr);
   ClearDirectory(dalvik_cache_.c_str());
@@ -429,7 +432,7 @@
       CHECK_EQ(0, dlclose(handle));
     }
     {
-      void* handle = dlopen("libopenjdk.so", RTLD_LAZY);
+      void* handle = dlopen("libopenjdkd.so", RTLD_LAZY);
       dlclose(handle);
       CHECK_EQ(0, dlclose(handle));
     }
@@ -453,12 +456,12 @@
   return StringPrintf("%s/framework/%s%s.jar", path.c_str(), jar_prefix.c_str(), suffix.c_str());
 }
 
-std::vector<std::string> CommonRuntimeTest::GetLibCoreDexFileNames() {
+std::vector<std::string> CommonRuntimeTestImpl::GetLibCoreDexFileNames() {
   return std::vector<std::string>({GetDexFileName("core-oj", IsHost()),
                                    GetDexFileName("core-libart", IsHost())});
 }
 
-std::string CommonRuntimeTest::GetTestAndroidRoot() {
+std::string CommonRuntimeTestImpl::GetTestAndroidRoot() {
   if (IsHost()) {
     const char* host_dir = getenv("ANDROID_HOST_OUT");
     CHECK(host_dir != nullptr);
@@ -478,7 +481,7 @@
 #define ART_TARGET_NATIVETEST_DIR_STRING ""
 #endif
 
-std::string CommonRuntimeTest::GetTestDexFileName(const char* name) {
+std::string CommonRuntimeTestImpl::GetTestDexFileName(const char* name) {
   CHECK(name != nullptr);
   std::string filename;
   if (IsHost()) {
@@ -493,7 +496,8 @@
   return filename;
 }
 
-std::vector<std::unique_ptr<const DexFile>> CommonRuntimeTest::OpenTestDexFiles(const char* name) {
+std::vector<std::unique_ptr<const DexFile>> CommonRuntimeTestImpl::OpenTestDexFiles(
+    const char* name) {
   std::string filename = GetTestDexFileName(name);
   std::string error_msg;
   std::vector<std::unique_ptr<const DexFile>> dex_files;
@@ -506,13 +510,13 @@
   return dex_files;
 }
 
-std::unique_ptr<const DexFile> CommonRuntimeTest::OpenTestDexFile(const char* name) {
+std::unique_ptr<const DexFile> CommonRuntimeTestImpl::OpenTestDexFile(const char* name) {
   std::vector<std::unique_ptr<const DexFile>> vector = OpenTestDexFiles(name);
   EXPECT_EQ(1U, vector.size());
   return std::move(vector[0]);
 }
 
-std::vector<const DexFile*> CommonRuntimeTest::GetDexFiles(jobject jclass_loader) {
+std::vector<const DexFile*> CommonRuntimeTestImpl::GetDexFiles(jobject jclass_loader) {
   std::vector<const DexFile*> ret;
 
   ScopedObjectAccess soa(Thread::Current());
@@ -572,7 +576,7 @@
   return ret;
 }
 
-const DexFile* CommonRuntimeTest::GetFirstDexFile(jobject jclass_loader) {
+const DexFile* CommonRuntimeTestImpl::GetFirstDexFile(jobject jclass_loader) {
   std::vector<const DexFile*> tmp(GetDexFiles(jclass_loader));
   DCHECK(!tmp.empty());
   const DexFile* ret = tmp[0];
@@ -580,7 +584,7 @@
   return ret;
 }
 
-jobject CommonRuntimeTest::LoadDex(const char* dex_name) {
+jobject CommonRuntimeTestImpl::LoadDex(const char* dex_name) {
   std::vector<std::unique_ptr<const DexFile>> dex_files = OpenTestDexFiles(dex_name);
   std::vector<const DexFile*> class_path;
   CHECK_NE(0U, dex_files.size());
@@ -591,13 +595,12 @@
 
   Thread* self = Thread::Current();
   jobject class_loader = Runtime::Current()->GetClassLinker()->CreatePathClassLoader(self,
-                                                                                     class_path,
-                                                                                     nullptr);
+                                                                                     class_path);
   self->SetClassLoaderOverride(class_loader);
   return class_loader;
 }
 
-std::string CommonRuntimeTest::GetCoreFileLocation(const char* suffix) {
+std::string CommonRuntimeTestImpl::GetCoreFileLocation(const char* suffix) {
   CHECK(suffix != nullptr);
 
   std::string location;
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 7223b6e..0ce40e8 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -64,8 +64,10 @@
   std::unique_ptr<File> file_;
 };
 
-class CommonRuntimeTest : public testing::Test {
+class CommonRuntimeTestImpl {
  public:
+  CommonRuntimeTestImpl();
+  virtual ~CommonRuntimeTestImpl();
   static void SetUpAndroidRoot();
 
   // Note: setting up ANDROID_DATA may create a temporary directory. If this is used in a
@@ -74,19 +76,25 @@
 
   static void TearDownAndroidData(const std::string& android_data, bool fail_on_error);
 
-  CommonRuntimeTest();
-  ~CommonRuntimeTest();
-
   // Gets the paths of the libcore dex files.
   static std::vector<std::string> GetLibCoreDexFileNames();
 
   // Returns bin directory which contains host's prebuild tools.
   static std::string GetAndroidHostToolsDir();
 
-  // Returns bin directory which contains target's prebuild tools.
+  // Returns bin directory wahich contains target's prebuild tools.
   static std::string GetAndroidTargetToolsDir(InstructionSet isa);
 
  protected:
+  // Allow subclases such as CommonCompilerTest to add extra options.
+  virtual void SetUpRuntimeOptions(RuntimeOptions* options ATTRIBUTE_UNUSED) {}
+
+  // Called before the runtime is created.
+  virtual void PreRuntimeCreate() {}
+
+  // Called after the runtime is created.
+  virtual void PostRuntimeCreate() {}
+
   static bool IsHost() {
     return !kIsTargetBuild;
   }
@@ -99,25 +107,8 @@
 
   std::unique_ptr<const DexFile> LoadExpectSingleDexFile(const char* location);
 
-  virtual void SetUp();
-
-  // Allow subclases such as CommonCompilerTest to add extra options.
-  virtual void SetUpRuntimeOptions(RuntimeOptions* options ATTRIBUTE_UNUSED) {}
-
   void ClearDirectory(const char* dirpath);
 
-  virtual void TearDown();
-
-  // Called before the runtime is created.
-  virtual void PreRuntimeCreate() {}
-
-  // Called after the runtime is created.
-  virtual void PostRuntimeCreate() {}
-
-  // Called to finish up runtime creation and filling test fields. By default runs root
-  // initializers, initialize well-known classes, and creates the heap thread pool.
-  virtual void FinalizeSetup();
-
   std::string GetTestAndroidRoot();
 
   std::string GetTestDexFileName(const char* name);
@@ -150,12 +141,45 @@
 
   std::unique_ptr<CompilerCallbacks> callbacks_;
 
+  void SetUp();
+
+  void TearDown();
+
+  void FinalizeSetup();
+
  private:
   static std::string GetCoreFileLocation(const char* suffix);
 
   std::vector<std::unique_ptr<const DexFile>> loaded_dex_files_;
 };
 
+template <typename TestType>
+class CommonRuntimeTestBase : public TestType, public CommonRuntimeTestImpl {
+ public:
+  CommonRuntimeTestBase() {}
+  virtual ~CommonRuntimeTestBase() {}
+
+ protected:
+  virtual void SetUp() {
+    CommonRuntimeTestImpl::SetUp();
+  }
+
+  virtual void TearDown() {
+    CommonRuntimeTestImpl::TearDown();
+  }
+
+  // Called to finish up runtime creation and filling test fields. By default runs root
+  // initializers, initialize well-known classes, and creates the heap thread pool.
+  virtual void FinalizeSetup() {
+    CommonRuntimeTestImpl::FinalizeSetup();
+  }
+};
+
+using CommonRuntimeTest = CommonRuntimeTestBase<testing::Test>;
+
+template <typename Param>
+using CommonRuntimeTestWithParam = CommonRuntimeTestBase<testing::TestWithParam<Param>>;
+
 // Sets a CheckJni abort hook to catch failures. Note that this will cause CheckJNI to carry on
 // rather than aborting, so be careful!
 class CheckJniAbortCatcher {
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index 40e2b15..b4208fe 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -86,6 +86,14 @@
                               PrettyMethod(method).c_str()).c_str());
 }
 
+void ThrowAbstractMethodError(uint32_t method_idx, const DexFile& dex_file) {
+  ThrowException("Ljava/lang/AbstractMethodError;", /* referrer */ nullptr,
+                 StringPrintf("abstract method \"%s\"",
+                              PrettyMethod(method_idx,
+                                           dex_file,
+                                           /* with_signature */ true).c_str()).c_str());
+}
+
 // ArithmeticException
 
 void ThrowArithmeticExceptionDivideByZero() {
@@ -211,6 +219,22 @@
                  msg.str().c_str());
 }
 
+void ThrowIncompatibleClassChangeErrorClassForInterfaceSuper(ArtMethod* method,
+                                                             mirror::Class* target_class,
+                                                             mirror::Object* this_object,
+                                                             ArtMethod* referrer) {
+  // Referrer is calling interface_method on this_object, however, the interface_method isn't
+  // implemented by this_object.
+  CHECK(this_object != nullptr);
+  std::ostringstream msg;
+  msg << "Class '" << PrettyDescriptor(this_object->GetClass())
+      << "' does not implement interface '" << PrettyDescriptor(target_class) << "' in call to '"
+      << PrettyMethod(method) << "'";
+  ThrowException("Ljava/lang/IncompatibleClassChangeError;",
+                 referrer != nullptr ? referrer->GetDeclaringClass() : nullptr,
+                 msg.str().c_str());
+}
+
 void ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(ArtMethod* interface_method,
                                                                 mirror::Object* this_object,
                                                                 ArtMethod* referrer) {
diff --git a/runtime/common_throws.h b/runtime/common_throws.h
index 85fe2b3..39c4e52 100644
--- a/runtime/common_throws.h
+++ b/runtime/common_throws.h
@@ -27,6 +27,7 @@
 }  // namespace mirror
 class ArtField;
 class ArtMethod;
+class DexFile;
 class Signature;
 class StringPiece;
 
@@ -35,6 +36,9 @@
 void ThrowAbstractMethodError(ArtMethod* method)
     SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
+void ThrowAbstractMethodError(uint32_t method_idx, const DexFile& dex_file)
+    SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
+
 // ArithmeticException
 
 void ThrowArithmeticExceptionDivideByZero() SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
@@ -107,6 +111,12 @@
                                        ArtMethod* method, ArtMethod* referrer)
     SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
+void ThrowIncompatibleClassChangeErrorClassForInterfaceSuper(ArtMethod* method,
+                                                             mirror::Class* target_class,
+                                                             mirror::Object* this_object,
+                                                             ArtMethod* referrer)
+    SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
+
 void ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(ArtMethod* interface_method,
                                                                 mirror::Object* this_object,
                                                                 ArtMethod* referrer)
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 8a3db6c..108b8d2 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -1094,11 +1094,11 @@
   int32_t GetLineNumFromPC(ArtMethod* method, uint32_t rel_pc) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Returns false if there is no debugging information or if it can not be decoded.
+  // Returns false if there is no debugging information or if it cannot be decoded.
   bool DecodeDebugLocalInfo(const CodeItem* code_item, bool is_static, uint32_t method_idx,
                             DexDebugNewLocalCb local_cb, void* context) const;
 
-  // Returns false if there is no debugging information or if it can not be decoded.
+  // Returns false if there is no debugging information or if it cannot be decoded.
   bool DecodeDebugPositionInfo(const CodeItem* code_item, DexDebugNewPositionCb position_cb,
                                void* context) const;
 
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index 57d623e..52da28b 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -27,7 +27,6 @@
 #include "base/unix_file/fd_file.h"
 #include "elf_file_impl.h"
 #include "elf_utils.h"
-#include "jit/debugger_interface.h"
 #include "leb128.h"
 #include "utils.h"
 
@@ -53,8 +52,6 @@
     hash_section_start_(nullptr),
     symtab_symbol_table_(nullptr),
     dynsym_symbol_table_(nullptr),
-    jit_elf_image_(nullptr),
-    jit_gdb_entry_(nullptr),
     requested_base_(requested_base) {
   CHECK(file != nullptr);
 }
@@ -273,10 +270,6 @@
   STLDeleteElements(&segments_);
   delete symtab_symbol_table_;
   delete dynsym_symbol_table_;
-  delete jit_elf_image_;
-  if (jit_gdb_entry_) {
-    DeleteJITCodeEntry(jit_gdb_entry_);
-  }
 }
 
 template <typename ElfTypes>
@@ -1300,11 +1293,6 @@
     return false;
   }
 
-  // Use GDB JIT support to do stack backtrace, etc.
-  if (executable) {
-    GdbJITSupport();
-  }
-
   return true;
 }
 
@@ -1395,50 +1383,6 @@
 }
 
 template <typename ElfTypes>
-void ElfFileImpl<ElfTypes>::GdbJITSupport() {
-  // We only get here if we only are mapping the program header.
-  DCHECK(program_header_only_);
-
-  // Well, we need the whole file to do this.
-  std::string error_msg;
-  // Make it MAP_PRIVATE so we can just give it to gdb if all the necessary
-  // sections are there.
-  std::unique_ptr<ElfFileImpl<ElfTypes>> all_ptr(
-      Open(const_cast<File*>(file_), PROT_READ | PROT_WRITE, MAP_PRIVATE, &error_msg));
-  if (all_ptr.get() == nullptr) {
-    return;
-  }
-  ElfFileImpl<ElfTypes>& all = *all_ptr;
-
-  // We need the eh_frame for gdb but debug info might be present without it.
-  const Elf_Shdr* eh_frame = all.FindSectionByName(".eh_frame");
-  if (eh_frame == nullptr) {
-    return;
-  }
-
-  // Do we have interesting sections?
-  // We need to add in a strtab and symtab to the image.
-  // all is MAP_PRIVATE so it can be written to freely.
-  // We also already have strtab and symtab so we are fine there.
-  Elf_Ehdr& elf_hdr = all.GetHeader();
-  elf_hdr.e_entry = 0;
-  elf_hdr.e_phoff = 0;
-  elf_hdr.e_phnum = 0;
-  elf_hdr.e_phentsize = 0;
-  elf_hdr.e_type = ET_EXEC;
-
-  // Since base_address_ is 0 if we are actually loaded at a known address (i.e. this is boot.oat)
-  // and the actual address stuff starts at in regular files this is good.
-  if (!all.FixupDebugSections(reinterpret_cast<intptr_t>(base_address_))) {
-    LOG(ERROR) << "Failed to load GDB data";
-    return;
-  }
-
-  jit_gdb_entry_ = CreateJITCodeEntry(all.Begin(), all.Size());
-  gdb_file_mapping_.reset(all_ptr.release());
-}
-
-template <typename ElfTypes>
 bool ElfFileImpl<ElfTypes>::Strip(std::string* error_msg) {
   // ELF files produced by MCLinker look roughly like this
   //
diff --git a/runtime/elf_file_impl.h b/runtime/elf_file_impl.h
index 0f466bd..2af31dc 100644
--- a/runtime/elf_file_impl.h
+++ b/runtime/elf_file_impl.h
@@ -213,12 +213,6 @@
   SymbolTable* symtab_symbol_table_;
   SymbolTable* dynsym_symbol_table_;
 
-  // Support for GDB JIT
-  uint8_t* jit_elf_image_;
-  JITCodeEntry* jit_gdb_entry_;
-  std::unique_ptr<ElfFileImpl<ElfTypes>> gdb_file_mapping_;
-  void GdbJITSupport();
-
   // Override the 'base' p_vaddr in the first LOAD segment with this value (if non-null).
   uint8_t* requested_base_;
 
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index ba2fb94..0663b7e 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -193,10 +193,10 @@
       return nullptr;
     }
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    // Pass in false since the object can not be finalizable.
+    // Pass in false since the object cannot be finalizable.
     return klass->Alloc<kInstrumented, false>(self, heap->GetCurrentAllocator());
   }
-  // Pass in false since the object can not be finalizable.
+  // Pass in false since the object cannot be finalizable.
   return klass->Alloc<kInstrumented, false>(self, allocator_type);
 }
 
@@ -207,7 +207,7 @@
                                                       Thread* self,
                                                       gc::AllocatorType allocator_type) {
   DCHECK(klass != nullptr);
-  // Pass in false since the object can not be finalizable.
+  // Pass in false since the object cannot be finalizable.
   return klass->Alloc<kInstrumented, false>(self, allocator_type);
 }
 
@@ -410,16 +410,25 @@
     DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
     return nullptr;  // Failure.
   } else if (UNLIKELY(*this_object == nullptr && type != kStatic)) {
-    // Maintain interpreter-like semantics where NullPointerException is thrown
-    // after potential NoSuchMethodError from class linker.
-    ThrowNullPointerExceptionForMethodAccess(method_idx, type);
-    return nullptr;  // Failure.
+    if (UNLIKELY(resolved_method->GetDeclaringClass()->IsStringClass() &&
+                 resolved_method->IsConstructor())) {
+      // Hack for String init:
+      //
+      // We assume that the input of String.<init> in verified code is always
+      // an unitialized reference. If it is a null constant, it must have been
+      // optimized out by the compiler. Do not throw NullPointerException.
+    } else {
+      // Maintain interpreter-like semantics where NullPointerException is thrown
+      // after potential NoSuchMethodError from class linker.
+      ThrowNullPointerExceptionForMethodAccess(method_idx, type);
+      return nullptr;  // Failure.
+    }
   } else if (access_check) {
     mirror::Class* methods_class = resolved_method->GetDeclaringClass();
-    mirror::Class* referring_class = referrer->GetDeclaringClass();
     bool can_access_resolved_method =
-        referring_class->CheckResolvedMethodAccess<type>(methods_class, resolved_method,
-                                                         method_idx);
+        referrer->GetDeclaringClass()->CheckResolvedMethodAccess<type>(methods_class,
+                                                                       resolved_method,
+                                                                       method_idx);
     if (UNLIKELY(!can_access_resolved_method)) {
       DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
       return nullptr;  // Failure.
@@ -450,23 +459,56 @@
       return klass->GetVTableEntry(vtable_index, class_linker->GetImagePointerSize());
     }
     case kSuper: {
-      mirror::Class* super_class = referrer->GetDeclaringClass()->GetSuperClass();
-      uint16_t vtable_index = resolved_method->GetMethodIndex();
-      if (access_check) {
-        // Check existence of super class.
-        if (super_class == nullptr || !super_class->HasVTable() ||
-            vtable_index >= static_cast<uint32_t>(super_class->GetVTableLength())) {
-          // Behavior to agree with that of the verifier.
+      // TODO This lookup is quite slow.
+      // NB This is actually quite tricky to do any other way. We cannot use GetDeclaringClass since
+      //    that will actually not be what we want in some cases where there are miranda methods or
+      //    defaults. What we actually need is a GetContainingClass that says which classes virtuals
+      //    this method is coming from.
+      mirror::Class* referring_class = referrer->GetDeclaringClass();
+      uint16_t method_type_idx = referring_class->GetDexFile().GetMethodId(method_idx).class_idx_;
+      mirror::Class* method_reference_class = class_linker->ResolveType(method_type_idx, referrer);
+      if (UNLIKELY(method_reference_class == nullptr)) {
+        // Bad type idx.
+        CHECK(self->IsExceptionPending());
+        return nullptr;
+      } else if (!method_reference_class->IsInterface()) {
+        // It is not an interface.
+        mirror::Class* super_class = referring_class->GetSuperClass();
+        uint16_t vtable_index = resolved_method->GetMethodIndex();
+        if (access_check) {
+          // Check existence of super class.
+          if (super_class == nullptr || !super_class->HasVTable() ||
+              vtable_index >= static_cast<uint32_t>(super_class->GetVTableLength())) {
+            // Behavior to agree with that of the verifier.
+            ThrowNoSuchMethodError(type, resolved_method->GetDeclaringClass(),
+                                   resolved_method->GetName(), resolved_method->GetSignature());
+            return nullptr;  // Failure.
+          }
+        }
+        DCHECK(super_class != nullptr);
+        DCHECK(super_class->HasVTable());
+        return super_class->GetVTableEntry(vtable_index, class_linker->GetImagePointerSize());
+      } else {
+        // It is an interface.
+        if (access_check) {
+          if (!method_reference_class->IsAssignableFrom((*this_object)->GetClass())) {
+            ThrowIncompatibleClassChangeErrorClassForInterfaceSuper(resolved_method,
+                                                                    method_reference_class,
+                                                                    *this_object,
+                                                                    referrer);
+            return nullptr;  // Failure.
+          }
+        }
+        // TODO We can do better than this for a (compiled) fastpath.
+        ArtMethod* result = method_reference_class->FindVirtualMethodForInterfaceSuper(
+            resolved_method, class_linker->GetImagePointerSize());
+        // Throw an NSME if nullptr;
+        if (result == nullptr) {
           ThrowNoSuchMethodError(type, resolved_method->GetDeclaringClass(),
                                  resolved_method->GetName(), resolved_method->GetSignature());
-          return nullptr;  // Failure.
         }
-      } else {
-        // Super class must exist.
-        DCHECK(super_class != nullptr);
+        return result;
       }
-      DCHECK(super_class->HasVTable());
-      return super_class->GetVTableEntry(vtable_index, class_linker->GetImagePointerSize());
     }
     case kInterface: {
       uint32_t imt_index = resolved_method->GetDexMethodIndex() % mirror::Class::kImtSize;
@@ -576,8 +618,9 @@
   if (UNLIKELY(this_object == nullptr && type != kStatic)) {
     return nullptr;
   }
+  mirror::Class* referring_class = referrer->GetDeclaringClass();
   ArtMethod* resolved_method =
-      referrer->GetDeclaringClass()->GetDexCache()->GetResolvedMethod(method_idx, sizeof(void*));
+      referring_class->GetDexCache()->GetResolvedMethod(method_idx, sizeof(void*));
   if (UNLIKELY(resolved_method == nullptr)) {
     return nullptr;
   }
@@ -588,7 +631,6 @@
       return nullptr;
     }
     mirror::Class* methods_class = resolved_method->GetDeclaringClass();
-    mirror::Class* referring_class = referrer->GetDeclaringClass();
     if (UNLIKELY(!referring_class->CanAccess(methods_class) ||
                  !referring_class->CanAccessMember(methods_class,
                                                    resolved_method->GetAccessFlags()))) {
@@ -601,12 +643,25 @@
   } else if (type == kStatic || type == kDirect) {
     return resolved_method;
   } else if (type == kSuper) {
-    mirror::Class* super_class = referrer->GetDeclaringClass()->GetSuperClass();
-    if (resolved_method->GetMethodIndex() >= super_class->GetVTableLength()) {
-      // The super class does not have the method.
+    // TODO This lookup is rather slow.
+    uint16_t method_type_idx = referring_class->GetDexFile().GetMethodId(method_idx).class_idx_;
+    mirror::Class* method_reference_class =
+        referring_class->GetDexCache()->GetResolvedType(method_type_idx);
+    if (method_reference_class == nullptr) {
+      // Need to do full type resolution...
       return nullptr;
+    } else if (!method_reference_class->IsInterface()) {
+      // It is not an interface.
+      mirror::Class* super_class = referrer->GetDeclaringClass()->GetSuperClass();
+      if (resolved_method->GetMethodIndex() >= super_class->GetVTableLength()) {
+        // The super class does not have the method.
+        return nullptr;
+      }
+      return super_class->GetVTableEntry(resolved_method->GetMethodIndex(), sizeof(void*));
+    } else {
+      return method_reference_class->FindVirtualMethodForInterfaceSuper(
+          resolved_method, sizeof(void*));
     }
-    return super_class->GetVTableEntry(resolved_method->GetMethodIndex(), sizeof(void*));
   } else {
     DCHECK(type == kVirtual);
     return this_object->GetClass()->GetVTableEntry(
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index dc9f14c..f87d48d 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -122,7 +122,10 @@
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_pos, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_pos, thread_local_end, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_objects, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, rosalloc_runs, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, mterp_current_ibase, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_current_ibase, mterp_default_ibase, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_default_ibase, mterp_alt_ibase, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_alt_ibase, rosalloc_runs, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, rosalloc_runs, thread_local_alloc_stack_top,
                         sizeof(void*) * kNumRosAllocThreadLocalSizeBrackets);
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_alloc_stack_top, thread_local_alloc_stack_end,
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index ce6467a..7727b2d 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -180,7 +180,7 @@
   t.NewTiming("ProcessCards");
   // Process dirty cards and add dirty cards to mod-union tables.
   heap_->ProcessCards(GetTimings(), false, false, true);
-  // Clear the whole card table since we can not Get any additional dirty cards during the
+  // Clear the whole card table since we cannot get any additional dirty cards during the
   // paused GC. This saves memory but only works for pause the world collectors.
   t.NewTiming("ClearCardTable");
   heap_->GetCardTable()->ClearCardTable();
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 99e98bb..2784693 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -227,7 +227,7 @@
   BindBitmaps();
   // Process dirty cards and add dirty cards to mod-union tables.
   heap_->ProcessCards(GetTimings(), kUseRememberedSet && generational_, false, true);
-  // Clear the whole card table since we can not Get any additional dirty cards during the
+  // Clear the whole card table since we cannot get any additional dirty cards during the
   // paused GC. This saves memory but only works for pause the world collectors.
   t.NewTiming("ClearCardTable");
   heap_->GetCardTable()->ClearCardTable();
diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h
index 416510d..c8e913c 100644
--- a/runtime/gc/collector_type.h
+++ b/runtime/gc/collector_type.h
@@ -34,7 +34,7 @@
   kCollectorTypeSS,
   // A generational variant of kCollectorTypeSS.
   kCollectorTypeGSS,
-  // Mark compact colector.
+  // Mark compact collector.
   kCollectorTypeMC,
   // Heap trimming collector, doesn't do any actual collecting.
   kCollectorTypeHeapTrim,
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 39ba743..5e7f1a2 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -86,7 +86,7 @@
     // it to the mutator as long as the GC is not preserving references.
     if (LIKELY(collector_ != nullptr)) {
       // If it's null it means not marked, but it could become marked if the referent is reachable
-      // by finalizer referents. So we can not return in this case and must block. Otherwise, we
+      // by finalizer referents. So we cannot return in this case and must block. Otherwise, we
       // can return it to the mutator as long as the GC is not preserving references, in which
       // case only black nodes can be safely returned. If the GC is preserving references, the
       // mutator could take a white field from a grey or white node and move it somewhere else
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index 2798b21..e70fe21 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -521,7 +521,7 @@
   num_bytes_allocated_ += allocation_size;
   total_bytes_allocated_ += allocation_size;
   mirror::Object* obj = reinterpret_cast<mirror::Object*>(GetAddressForAllocationInfo(new_info));
-  // We always put our object at the start of the free block, there can not be another free block
+  // We always put our object at the start of the free block, there cannot be another free block
   // before it.
   if (kIsDebugBuild) {
     mprotect(obj, allocation_size, PROT_READ | PROT_WRITE);
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 9f61449..7d55e8c 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -931,6 +931,9 @@
                                                    ArtMethod* caller,
                                                    uint32_t dex_pc,
                                                    ArtMethod* callee) const {
+  // We cannot have thread suspension since that would cause the this_object parameter to
+  // potentially become a dangling pointer. An alternative could be to put it in a handle instead.
+  ScopedAssertNoThreadSuspension ants(thread, __FUNCTION__);
   for (InstrumentationListener* listener : invoke_virtual_or_interface_listeners_) {
     if (listener != nullptr) {
       listener->InvokeVirtualOrInterface(thread, this_object, caller, dex_pc, callee);
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 726cf1b..b29245f 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -104,6 +104,7 @@
                                         ArtMethod* caller,
                                         uint32_t dex_pc,
                                         ArtMethod* callee)
+      REQUIRES(Roles::uninterruptible_)
       SHARED_REQUIRES(Locks::mutator_lock_) = 0;
 };
 
@@ -417,6 +418,13 @@
                !Locks::classlinker_classes_lock_);
 
   void UpdateInterpreterHandlerTable() REQUIRES(Locks::mutator_lock_) {
+    /*
+     * TUNING: Dalvik's mterp stashes the actual current handler table base in a
+     * tls field.  For Arm, this enables all suspend, debug & tracing checks to be
+     * collapsed into a single conditionally-executed ldw instruction.
+     * Move to Dalvik-style handler-table management for both the goto interpreter and
+     * mterp.
+     */
     interpreter_handler_table_ = IsActive() ? kAlternativeHandlerTable : kMainHandlerTable;
   }
 
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 8f715a3..2b2176e 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -61,7 +61,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
   // Only used by image writer. Special version that may not cause thread suspension since the GC
-  // can not be running while we are doing image writing. Maybe be called while while holding a
+  // cannot be running while we are doing image writing. Maybe be called while while holding a
   // lock since there will not be thread suspension.
   mirror::String* InternStrongImageString(mirror::String* s)
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 8d5a61a..6b5218d 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -25,6 +25,7 @@
 #include "ScopedLocalRef.h"
 #include "stack.h"
 #include "unstarted_runtime.h"
+#include "mterp/mterp.h"
 
 namespace art {
 namespace interpreter {
@@ -224,19 +225,33 @@
 }
 
 enum InterpreterImplKind {
-  kSwitchImpl,            // Switch-based interpreter implementation.
-  kComputedGotoImplKind   // Computed-goto-based interpreter implementation.
+  kSwitchImplKind,        // Switch-based interpreter implementation.
+  kComputedGotoImplKind,  // Computed-goto-based interpreter implementation.
+  kMterpImplKind          // Assembly interpreter
 };
 static std::ostream& operator<<(std::ostream& os, const InterpreterImplKind& rhs) {
-  os << ((rhs == kSwitchImpl) ? "Switch-based interpreter" : "Computed-goto-based interpreter");
+  os << ((rhs == kSwitchImplKind)
+              ? "Switch-based interpreter"
+              : (rhs == kComputedGotoImplKind)
+                  ? "Computed-goto-based interpreter"
+                  : "Asm interpreter");
   return os;
 }
 
 #if !defined(__clang__)
+#if defined(__arm__)
+// TODO: remove when all targets implemented.
+static constexpr InterpreterImplKind kInterpreterImplKind = kMterpImplKind;
+#else
 static constexpr InterpreterImplKind kInterpreterImplKind = kComputedGotoImplKind;
+#endif
 #else
 // Clang 3.4 fails to build the goto interpreter implementation.
-static constexpr InterpreterImplKind kInterpreterImplKind = kSwitchImpl;
+#if defined(__arm__)
+static constexpr InterpreterImplKind kInterpreterImplKind = kMterpImplKind;
+#else
+static constexpr InterpreterImplKind kInterpreterImplKind = kSwitchImplKind;
+#endif
 template<bool do_access_check, bool transaction_active>
 JValue ExecuteGotoImpl(Thread*, const DexFile::CodeItem*, ShadowFrame&, JValue) {
   LOG(FATAL) << "UNREACHABLE";
@@ -263,18 +278,52 @@
 
 static inline JValue Execute(Thread* self, const DexFile::CodeItem* code_item,
                              ShadowFrame& shadow_frame, JValue result_register) {
-  DCHECK(shadow_frame.GetMethod()->IsInvokable());
+  DCHECK(!shadow_frame.GetMethod()->IsAbstract());
   DCHECK(!shadow_frame.GetMethod()->IsNative());
   shadow_frame.GetMethod()->GetDeclaringClass()->AssertInitializedOrInitializingInThread(self);
 
   bool transaction_active = Runtime::Current()->IsActiveTransaction();
   if (LIKELY(shadow_frame.GetMethod()->IsPreverified())) {
     // Enter the "without access check" interpreter.
-    if (kInterpreterImplKind == kSwitchImpl) {
+    if (kInterpreterImplKind == kMterpImplKind) {
       if (transaction_active) {
-        return ExecuteSwitchImpl<false, true>(self, code_item, shadow_frame, result_register);
+        // No Mterp variant - just use the switch interpreter.
+        return ExecuteSwitchImpl<false, true>(self, code_item, shadow_frame, result_register,
+                                              false);
       } else {
-        return ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame, result_register);
+        const instrumentation::Instrumentation* const instrumentation =
+            Runtime::Current()->GetInstrumentation();
+        while (true) {
+          if (instrumentation->IsActive() || !Runtime::Current()->IsStarted()) {
+            // TODO: allow JIT profiling instrumentation.  Now, just punt on all instrumentation.
+#if !defined(__clang__)
+            return ExecuteGotoImpl<false, false>(self, code_item, shadow_frame, result_register);
+#else
+            return ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame, result_register,
+                                                   false);
+#endif
+          }
+          bool returned = ExecuteMterpImpl(self, code_item, &shadow_frame, &result_register);
+          if (returned) {
+            return result_register;
+          } else {
+            // Mterp didn't like that instruction.  Single-step it with the reference interpreter.
+            JValue res = ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame,
+                                                               result_register, true);
+            if (shadow_frame.GetDexPC() == DexFile::kDexNoIndex) {
+              // Single-stepped a return or an exception not handled locally.  Return to caller.
+              return res;
+            }
+          }
+        }
+      }
+    } else if (kInterpreterImplKind == kSwitchImplKind) {
+      if (transaction_active) {
+        return ExecuteSwitchImpl<false, true>(self, code_item, shadow_frame, result_register,
+                                              false);
+      } else {
+        return ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame, result_register,
+                                               false);
       }
     } else {
       DCHECK_EQ(kInterpreterImplKind, kComputedGotoImplKind);
@@ -286,11 +335,22 @@
     }
   } else {
     // Enter the "with access check" interpreter.
-    if (kInterpreterImplKind == kSwitchImpl) {
+    if (kInterpreterImplKind == kMterpImplKind) {
+      // No access check variants for Mterp.  Just use the switch version.
       if (transaction_active) {
-        return ExecuteSwitchImpl<true, true>(self, code_item, shadow_frame, result_register);
+        return ExecuteSwitchImpl<true, true>(self, code_item, shadow_frame, result_register,
+                                             false);
       } else {
-        return ExecuteSwitchImpl<true, false>(self, code_item, shadow_frame, result_register);
+        return ExecuteSwitchImpl<true, false>(self, code_item, shadow_frame, result_register,
+                                              false);
+      }
+    } else if (kInterpreterImplKind == kSwitchImplKind) {
+      if (transaction_active) {
+        return ExecuteSwitchImpl<true, true>(self, code_item, shadow_frame, result_register,
+                                             false);
+      } else {
+        return ExecuteSwitchImpl<true, false>(self, code_item, shadow_frame, result_register,
+                                              false);
       }
     } else {
       DCHECK_EQ(kInterpreterImplKind, kComputedGotoImplKind);
@@ -501,5 +561,13 @@
   self->PopShadowFrame();
 }
 
+void CheckInterpreterAsmConstants() {
+  CheckMterpAsmConstants();
+}
+
+void InitInterpreterTls(Thread* self) {
+  InitMterpTls(self);
+}
+
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/interpreter.h b/runtime/interpreter/interpreter.h
index 8e7f3da..6353a9b 100644
--- a/runtime/interpreter/interpreter.h
+++ b/runtime/interpreter/interpreter.h
@@ -50,6 +50,11 @@
                                        ShadowFrame* shadow_frame, JValue* result)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
+// One-time sanity check.
+void CheckInterpreterAsmConstants();
+
+void InitInterpreterTls(Thread* self);
+
 }  // namespace interpreter
 
 }  // namespace art
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 18fb0d8..ecd4de9 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -592,6 +592,10 @@
   //
   // (at this point the ArtMethod has already been replaced,
   // so we just need to fix-up the arguments)
+  //
+  // Note that FindMethodFromCode in entrypoint_utils-inl.h was also special-cased
+  // to handle the compiler optimization of replacing `this` with null without
+  // throwing NullPointerException.
   uint32_t string_init_vreg_this = is_range ? vregC : arg[0];
   if (UNLIKELY(string_init)) {
     DCHECK_GT(num_regs, 0u);  // As the method is an instance method, there should be at least 1.
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 9f6699f..932d255 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -67,16 +67,21 @@
 namespace art {
 namespace interpreter {
 
-// External references to both interpreter implementations.
+// External references to all interpreter implementations.
 
 template<bool do_access_check, bool transaction_active>
 extern JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
-                                ShadowFrame& shadow_frame, JValue result_register);
+                                ShadowFrame& shadow_frame, JValue result_register,
+                                bool interpret_one_instruction);
 
 template<bool do_access_check, bool transaction_active>
 extern JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item,
                               ShadowFrame& shadow_frame, JValue result_register);
 
+// Mterp does not support transactions or access check, thus no templated versions.
+extern "C" bool ExecuteMterpImpl(Thread* self, const DexFile::CodeItem* code_item,
+                                 ShadowFrame* shadow_frame, JValue* result_register);
+
 void ThrowNullPointerExceptionFromInterpreter()
     SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index c9831e6..bab0d40 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -35,6 +35,9 @@
       /* Structured locking is to be enforced for abnormal termination, too. */                 \
       shadow_frame.GetLockCountData().                                                          \
           CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self);                        \
+      if (interpret_one_instruction) {                                                          \
+        shadow_frame.SetDexPC(DexFile::kDexNoIndex);                                            \
+      }                                                                                         \
       return JValue(); /* Handled in caller. */                                                 \
     } else {                                                                                    \
       int32_t displacement = static_cast<int32_t>(found_dex_pc) - static_cast<int32_t>(dex_pc); \
@@ -78,7 +81,8 @@
 
 template<bool do_access_check, bool transaction_active>
 JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
-                         ShadowFrame& shadow_frame, JValue result_register) {
+                         ShadowFrame& shadow_frame, JValue result_register,
+                         bool interpret_one_instruction) {
   constexpr bool do_assignability_check = do_access_check;
   if (UNLIKELY(!shadow_frame.HasReferenceArray())) {
     LOG(FATAL) << "Invalid shadow frame for interpreter use";
@@ -105,7 +109,7 @@
   // to keep this live for the scope of the entire function call.
   std::unique_ptr<lambda::ClosureBuilder> lambda_closure_builder;
   size_t lambda_captured_variable_index = 0;
-  while (true) {
+  do {
     dex_pc = inst->GetDexPc(insns);
     shadow_frame.SetDexPC(dex_pc);
     TraceExecution(shadow_frame, inst, dex_pc);
@@ -203,6 +207,9 @@
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
         }
+        if (interpret_one_instruction) {
+          shadow_frame.SetDexPC(DexFile::kDexNoIndex);
+        }
         return result;
       }
       case Instruction::RETURN_VOID: {
@@ -216,6 +223,9 @@
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
         }
+        if (interpret_one_instruction) {
+          shadow_frame.SetDexPC(DexFile::kDexNoIndex);
+        }
         return result;
       }
       case Instruction::RETURN: {
@@ -230,6 +240,9 @@
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
         }
+        if (interpret_one_instruction) {
+          shadow_frame.SetDexPC(DexFile::kDexNoIndex);
+        }
         return result;
       }
       case Instruction::RETURN_WIDE: {
@@ -243,6 +256,9 @@
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
         }
+        if (interpret_one_instruction) {
+          shadow_frame.SetDexPC(DexFile::kDexNoIndex);
+        }
         return result;
       }
       case Instruction::RETURN_OBJECT: {
@@ -278,6 +294,9 @@
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
         }
+        if (interpret_one_instruction) {
+          shadow_frame.SetDexPC(DexFile::kDexNoIndex);
+        }
         return result;
       }
       case Instruction::CONST_4: {
@@ -2370,22 +2389,29 @@
       case Instruction::UNUSED_7A:
         UnexpectedOpcode(inst, shadow_frame);
     }
-  }
+  } while (!interpret_one_instruction);
+  // Record where we stopped.
+  shadow_frame.SetDexPC(inst->GetDexPc(insns));
+  return JValue();
 }  // NOLINT(readability/fn_size)
 
 // Explicit definitions of ExecuteSwitchImpl.
 template SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR
 JValue ExecuteSwitchImpl<true, false>(Thread* self, const DexFile::CodeItem* code_item,
-                                      ShadowFrame& shadow_frame, JValue result_register);
+                                      ShadowFrame& shadow_frame, JValue result_register,
+                                      bool interpret_one_instruction);
 template SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR
 JValue ExecuteSwitchImpl<false, false>(Thread* self, const DexFile::CodeItem* code_item,
-                                       ShadowFrame& shadow_frame, JValue result_register);
+                                       ShadowFrame& shadow_frame, JValue result_register,
+                                       bool interpret_one_instruction);
 template SHARED_REQUIRES(Locks::mutator_lock_)
 JValue ExecuteSwitchImpl<true, true>(Thread* self, const DexFile::CodeItem* code_item,
-                                     ShadowFrame& shadow_frame, JValue result_register);
+                                     ShadowFrame& shadow_frame, JValue result_register,
+                                     bool interpret_one_instruction);
 template SHARED_REQUIRES(Locks::mutator_lock_)
 JValue ExecuteSwitchImpl<false, true>(Thread* self, const DexFile::CodeItem* code_item,
-                                      ShadowFrame& shadow_frame, JValue result_register);
+                                      ShadowFrame& shadow_frame, JValue result_register,
+                                      bool interpret_one_instruction);
 
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/mterp/Makefile_mterp b/runtime/interpreter/mterp/Makefile_mterp
new file mode 100644
index 0000000..f0c30ad
--- /dev/null
+++ b/runtime/interpreter/mterp/Makefile_mterp
@@ -0,0 +1,49 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Makefile for the Art fast interpreter.  This is not currently
+# integrated into the build system.
+#
+
+SHELL := /bin/sh
+
+# Build system has TARGET_ARCH=arm, but we can support the exact architecture
+# if it is worthwhile.
+#
+# To generate sources:
+# for arch in arm arm64 x86 x86_64 mips mips64
+# do
+#   TARGET_ARCH_EXT=$arch make -f Makefile-mterp
+# done
+#
+
+OUTPUT_DIR := out
+
+# Accumulate all possible dependencies for the generated files in a very
+# conservative fashion.  If it's not one of the generated files in "out",
+# assume it's a dependency.
+SOURCE_DEPS := \
+	$(shell find . -path ./$(OUTPUT_DIR) -prune -o -type f -print) \
+
+# Source files generated by the script.  There's always one C and one
+# assembly file, though in practice one or the other could be empty.
+GEN_SOURCES := \
+	$(OUTPUT_DIR)/interp_asm_$(TARGET_ARCH_EXT).S
+
+target: $(GEN_SOURCES)
+
+$(GEN_SOURCES): $(SOURCE_DEPS)
+	@mkdir -p out
+	./gen_mterp.py $(TARGET_ARCH_EXT) $(OUTPUT_DIR)
diff --git a/runtime/interpreter/mterp/README.txt b/runtime/interpreter/mterp/README.txt
new file mode 100644
index 0000000..19e02be
--- /dev/null
+++ b/runtime/interpreter/mterp/README.txt
@@ -0,0 +1,197 @@
+rt "mterp" README
+
+NOTE: Find rebuilding instructions at the bottom of this file.
+
+
+==== Overview ====
+
+Every configuration has a "config-*" file that controls how the sources
+are generated.  The sources are written into the "out" directory, where
+they are picked up by the Android build system.
+
+The best way to become familiar with the interpreter is to look at the
+generated files in the "out" directory.
+
+
+==== Config file format ====
+
+The config files are parsed from top to bottom.  Each line in the file
+may be blank, hold a comment (line starts with '#'), or be a command.
+
+The commands are:
+
+  handler-style <computed-goto|jump-table>
+
+    Specify which style of interpreter to generate.  In computed-goto,
+    each handler is allocated a fixed region, allowing transitions to
+    be done via table-start-address + (opcode * handler-size). With
+    jump-table style, handlers may be of any length, and the generated
+    table is an array of pointers to the handlers.  This command is required,
+    and must be the first command in the config file.
+
+  handler-size <bytes>
+
+    Specify the size of the fixed region, in bytes.  On most platforms
+    this will need to be a power of 2.  For jump-table implementations,
+    this command is ignored.
+
+  import <filename>
+
+    The specified file is included immediately, in its entirety.  No
+    substitutions are performed.  ".cpp" and ".h" files are copied to the
+    C output, ".S" files are copied to the asm output.
+
+  asm-alt-stub <filename>
+
+    When present, this command will cause the generation of an alternate
+    set of entry points (for computed-goto interpreters) or an alternate
+    jump table (for jump-table interpreters).
+
+  fallback-stub <filename>
+
+    Specifies a file to be used for the special FALLBACK tag on the "op"
+    command below.  Intended to be used to transfer control to an alternate
+    interpreter to single-step a not-yet-implemented opcode.  Note: should
+    note be used on RETURN-class instructions.
+
+  op-start <directory>
+
+    Indicates the start of the opcode list.  Must precede any "op"
+    commands.  The specified directory is the default location to pull
+    instruction files from.
+
+  op <opcode> <directory>|FALLBACK
+
+    Can only appear after "op-start" and before "op-end".  Overrides the
+    default source file location of the specified opcode.  The opcode
+    definition will come from the specified file, e.g. "op OP_NOP arm"
+    will load from "arm/OP_NOP.S".  A substitution dictionary will be
+    applied (see below).  If the special "FALLBACK" token is used instead of
+    a directory name, the source file specified in fallback-stub will instead
+    be used for this opcode.
+
+  alt <opcode> <directory>
+
+    Can only appear after "op-start" and before "op-end".  Similar to the
+    "op" command above, but denotes a source file to override the entry
+    in the alternate handler table.  The opcode definition will come from
+    the specified file, e.g. "alt OP_NOP arm" will load from
+    "arm/ALT_OP_NOP.S".  A substitution dictionary will be applied
+    (see below).
+
+  op-end
+
+    Indicates the end of the opcode list.  All kNumPackedOpcodes
+    opcodes are emitted when this is seen, followed by any code that
+    didn't fit inside the fixed-size instruction handler space.
+
+The order of "op" and "alt" directives are not significant; the generation
+tool will extract ordering info from the VM sources.
+
+Typically the form in which most opcodes currently exist is used in
+the "op-start" directive.
+
+==== Instruction file format ====
+
+The assembly instruction files are simply fragments of assembly sources.
+The starting label will be provided by the generation tool, as will
+declarations for the segment type and alignment.  The expected target
+assembler is GNU "as", but others will work (may require fiddling with
+some of the pseudo-ops emitted by the generation tool).
+
+A substitution dictionary is applied to all opcode fragments as they are
+appended to the output.  Substitutions can look like "$value" or "${value}".
+
+The dictionary always includes:
+
+  $opcode - opcode name, e.g. "OP_NOP"
+  $opnum - opcode number, e.g. 0 for OP_NOP
+  $handler_size_bytes - max size of an instruction handler, in bytes
+  $handler_size_bits - max size of an instruction handler, log 2
+
+Both C and assembly sources will be passed through the C pre-processor,
+so you can take advantage of C-style comments and preprocessor directives
+like "#define".
+
+Some generator operations are available.
+
+  %include "filename" [subst-dict]
+
+    Includes the file, which should look like "arm/OP_NOP.S".  You can
+    specify values for the substitution dictionary, using standard Python
+    syntax.  For example, this:
+      %include "arm/unop.S" {"result":"r1"}
+    would insert "arm/unop.S" at the current file position, replacing
+    occurrences of "$result" with "r1".
+
+  %default <subst-dict>
+
+    Specify default substitution dictionary values, using standard Python
+    syntax.  Useful if you want to have a "base" version and variants.
+
+  %break
+
+    Identifies the split between the main portion of the instruction
+    handler (which must fit in "handler-size" bytes) and the "sister"
+    code, which is appended to the end of the instruction handler block.
+    In jump table implementations, %break is ignored.
+
+The generation tool does *not* print a warning if your instructions
+exceed "handler-size", but the VM will abort on startup if it detects an
+oversized handler.  On architectures with fixed-width instructions this
+is easy to work with, on others this you will need to count bytes.
+
+
+==== Using C constants from assembly sources ====
+
+The file "art/runtime/asm_support.h" has some definitions for constant
+values, structure sizes, and struct member offsets.  The format is fairly
+restricted, as simple macros are used to massage it for use with both C
+(where it is verified) and assembly (where the definitions are used).
+
+If a constant in the file becomes out of sync, the VM will log an error
+message and abort during startup.
+
+
+==== Development tips ====
+
+If you need to debug the initial piece of an opcode handler, and your
+debug code expands it beyond the handler size limit, you can insert a
+generic header at the top:
+
+    b       ${opcode}_start
+%break
+${opcode}_start:
+
+If you already have a %break, it's okay to leave it in place -- the second
+%break is ignored.
+
+
+==== Rebuilding ====
+
+If you change any of the source file fragments, you need to rebuild the
+combined source files in the "out" directory.  Make sure the files in
+"out" are editable, then:
+
+    $ cd mterp
+    $ ./rebuild.sh
+
+The ultimate goal is to have the build system generate the necessary
+output files without requiring this separate step, but we're not yet
+ready to require Python in the build.
+
+==== Interpreter Control ====
+
+The mterp fast interpreter achieves much of its performance advantage
+over the C++ interpreter through its efficient mechanism of
+transitioning from one Dalvik bytecode to the next.  Mterp for ARM targets
+uses a computed-goto mechanism, in which the handler entrypoints are
+located at the base of the handler table + (opcode * 128).
+
+In normal operation, the dedicated register rIBASE
+(r8 for ARM, edx for x86) holds a mainHandlerTable.  If we need to switch
+to a mode that requires inter-instruction checking, rIBASE is changed
+to altHandlerTable.  Note that this change is not immediate.  What is actually
+changed is the value of curHandlerTable - which is part of the interpBreak
+structure.  Rather than explicitly check for changes, each thread will
+blindly refresh rIBASE at backward branches, exception throws and returns.
diff --git a/runtime/interpreter/mterp/arm/alt_stub.S b/runtime/interpreter/mterp/arm/alt_stub.S
new file mode 100644
index 0000000..92ae0c6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/alt_stub.S
@@ -0,0 +1,12 @@
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (${opnum} * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
diff --git a/runtime/interpreter/mterp/arm/bincmp.S b/runtime/interpreter/mterp/arm/bincmp.S
new file mode 100644
index 0000000..474bc3c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/bincmp.S
@@ -0,0 +1,36 @@
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    mov${revcmp} r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    mov${revcmp} r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm/binop.S b/runtime/interpreter/mterp/arm/binop.S
new file mode 100644
index 0000000..eeb72ef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/binop.S
@@ -0,0 +1,35 @@
+%default {"preinstr":"", "result":"r0", "chkzero":"0"}
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if $chkzero
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    $preinstr                           @ optional op; may set condition codes
+    $instr                              @ $result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG $result, r9                @ vAA<- $result
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
diff --git a/runtime/interpreter/mterp/arm/binop2addr.S b/runtime/interpreter/mterp/arm/binop2addr.S
new file mode 100644
index 0000000..d09a43a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/binop2addr.S
@@ -0,0 +1,32 @@
+%default {"preinstr":"", "result":"r0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if $chkzero
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    $preinstr                           @ optional op; may set condition codes
+    $instr                              @ $result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG $result, r9                @ vAA<- $result
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm/binopLit16.S b/runtime/interpreter/mterp/arm/binopLit16.S
new file mode 100644
index 0000000..065394e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/binopLit16.S
@@ -0,0 +1,29 @@
+%default {"result":"r0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    .if $chkzero
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    $instr                              @ $result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG $result, r9                @ vAA<- $result
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm/binopLit8.S b/runtime/interpreter/mterp/arm/binopLit8.S
new file mode 100644
index 0000000..ec0b3c4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/binopLit8.S
@@ -0,0 +1,32 @@
+%default {"preinstr":"", "result":"r0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    .if $chkzero
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    $preinstr                           @ optional op; may set condition codes
+    $instr                              @ $result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG $result, r9                @ vAA<- $result
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
diff --git a/runtime/interpreter/mterp/arm/binopWide.S b/runtime/interpreter/mterp/arm/binopWide.S
new file mode 100644
index 0000000..57d43c6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/binopWide.S
@@ -0,0 +1,38 @@
+%default {"preinstr":"", "result0":"r0", "result1":"r1", "chkzero":"0"}
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if $chkzero
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    $preinstr                           @ optional op; may set condition codes
+    $instr                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {$result0,$result1}     @ vAA/vAA+1<- $result0/$result1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
diff --git a/runtime/interpreter/mterp/arm/binopWide2addr.S b/runtime/interpreter/mterp/arm/binopWide2addr.S
new file mode 100644
index 0000000..4e855f2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/binopWide2addr.S
@@ -0,0 +1,34 @@
+%default {"preinstr":"", "result0":"r0", "result1":"r1", "chkzero":"0"}
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if $chkzero
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    $preinstr                           @ optional op; may set condition codes
+    $instr                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {$result0,$result1}     @ vAA/vAA+1<- $result0/$result1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
diff --git a/runtime/interpreter/mterp/arm/entry.S b/runtime/interpreter/mterp/arm/entry.S
new file mode 100644
index 0000000..4c5ffc5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/entry.S
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Interpreter entry point.
+ */
+
+    .text
+    .align  2
+    .global ExecuteMterpImpl
+    .type   ExecuteMterpImpl, %function
+
+/*
+ * On entry:
+ *  r0  Thread* self/
+ *  r1  code_item
+ *  r2  ShadowFrame
+ *  r3  JValue* result_register
+ *
+ */
+
+ExecuteMterpImpl:
+    .fnstart
+    .save {r4-r10,fp,lr}
+    stmfd   sp!, {r4-r10,fp,lr}         @ save 9 regs
+    .pad    #4
+    sub     sp, sp, #4                  @ align 64
+
+    /* Remember the return register */
+    str     r3, [r2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
+
+    /* Remember the code_item */
+    str     r1, [r2, #SHADOWFRAME_CODE_ITEM_OFFSET]
+
+    /* set up "named" registers */
+    mov     rSELF, r0
+    ldr     r0, [r2, #SHADOWFRAME_NUMBER_OF_VREGS_OFFSET]
+    add     rFP, r2, #SHADOWFRAME_VREGS_OFFSET     @ point to insns[] (i.e. - the dalivk byte code).
+    add     rREFS, rFP, r0, lsl #2                 @ point to reference array in shadow frame
+    ldr     r0, [r2, #SHADOWFRAME_DEX_PC_OFFSET]   @ Get starting dex_pc.
+    add     rPC, r1, #CODEITEM_INSNS_OFFSET        @ Point to base of insns[]
+    add     rPC, rPC, r0, lsl #1                   @ Create direct pointer to 1st dex opcode
+    EXPORT_PC
+
+    /* Starting ibase */
+    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
+
+    /* start executing the instruction at rPC */
+    FETCH_INST                          @ load rINST from rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* NOTE: no fallthrough */
diff --git a/runtime/interpreter/mterp/arm/fallback.S b/runtime/interpreter/mterp/arm/fallback.S
new file mode 100644
index 0000000..44e7e12
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/fallback.S
@@ -0,0 +1,3 @@
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
diff --git a/runtime/interpreter/mterp/arm/fbinop.S b/runtime/interpreter/mterp/arm/fbinop.S
new file mode 100644
index 0000000..594ee03
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/fbinop.S
@@ -0,0 +1,23 @@
+    /*
+     * Generic 32-bit floating-point operation.  Provide an "instr" line that
+     * specifies an instruction that performs "s2 = s0 op s1".  Because we
+     * use the "softfp" ABI, this must be an instruction, not a function call.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    flds    s0, [r2]                    @ s0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    $instr                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/fbinop2addr.S b/runtime/interpreter/mterp/arm/fbinop2addr.S
new file mode 100644
index 0000000..b052a29
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/fbinop2addr.S
@@ -0,0 +1,21 @@
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    and     r9, r9, #15                 @ r9<- A
+    flds    s1, [r3]                    @ s1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    flds    s0, [r9]                    @ s0<- vA
+
+    $instr                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/fbinopWide.S b/runtime/interpreter/mterp/arm/fbinopWide.S
new file mode 100644
index 0000000..1bed817
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/fbinopWide.S
@@ -0,0 +1,23 @@
+    /*
+     * Generic 64-bit double-precision floating point binary operation.
+     * Provide an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * for: add-double, sub-double, mul-double, div-double
+     */
+    /* doubleop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    $instr                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/fbinopWide2addr.S b/runtime/interpreter/mterp/arm/fbinopWide2addr.S
new file mode 100644
index 0000000..9f56986
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/fbinopWide2addr.S
@@ -0,0 +1,22 @@
+    /*
+     * Generic 64-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+     *      div-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    and     r9, r9, #15                 @ r9<- A
+    fldd    d1, [r3]                    @ d1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    fldd    d0, [r9]                    @ d0<- vA
+
+    $instr                              @ d2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/footer.S b/runtime/interpreter/mterp/arm/footer.S
new file mode 100644
index 0000000..617f572
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/footer.S
@@ -0,0 +1,167 @@
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align  2
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+#define MTERP_LOGGING 0
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogDivideByZeroException
+#endif
+    b MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogArrayIndexException
+#endif
+    b MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNegativeArraySizeException
+#endif
+    b MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNoSuchMethodException
+#endif
+    b MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNullObjectException
+#endif
+    b MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogExceptionThrownException
+#endif
+    b MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    ldr  r2, [rSELF, #THREAD_FLAGS_OFFSET]
+    bl MterpLogSuspendFallback
+#endif
+    b MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    ldr     r0, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    cmp     r0, #0                                  @ Exception pending?
+    beq     MterpFallback                           @ If not, fall back to reference interpreter.
+    /* intentional fallthrough - handle pending exception. */
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    bl      MterpHandleException                    @ (self, shadow_frame)
+    cmp     r0, #0
+    beq     MterpExceptionReturn                    @ no local catch, back to caller.
+    ldr     r0, [rFP, #OFF_FP_CODE_ITEM]
+    ldr     r1, [rFP, #OFF_FP_DEX_PC]
+    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
+    add     rPC, r0, #CODEITEM_INSNS_OFFSET
+    add     rPC, rPC, r1, lsl #1                    @ generate new dex_pc_ptr
+    str     rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+    /* resume execution at catch block */
+    FETCH_INST
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+    /* NOTE: no fallthrough */
+
+/*
+ * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
+ */
+MterpCheckSuspendAndContinue:
+    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    EXPORT_PC
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck           @ (self)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogFallback
+#endif
+MterpCommonFallback:
+    mov     r0, #0                                  @ signal retry with reference interpreter.
+    b       MterpDone
+
+/*
+ * We pushed some registers on the stack in ExecuteMterpImpl, then saved
+ * SP and LR.  Here we restore SP, restore the registers, and then restore
+ * LR to PC.
+ *
+ * On entry:
+ *  uint32_t* rFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    mov     r0, #1                                  @ signal return to caller.
+    b MterpDone
+MterpReturn:
+    ldr     r2, [rFP, #OFF_FP_RESULT_REGISTER]
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    str     r0, [r2]
+    str     r1, [r2, #4]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
+    mov     r0, #1                                  @ signal return to caller.
+MterpDone:
+    add     sp, sp, #4                              @ un-align 64
+    ldmfd   sp!, {r4-r10,fp,pc}                     @ restore 9 regs and return
+
+
+    .fnend
+    .size   ExecuteMterpImpl, .-ExecuteMterpImpl
+
diff --git a/runtime/interpreter/mterp/arm/funop.S b/runtime/interpreter/mterp/arm/funop.S
new file mode 100644
index 0000000..d7a0859
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/funop.S
@@ -0,0 +1,18 @@
+    /*
+     * Generic 32-bit unary floating-point operation.  Provide an "instr"
+     * line that specifies an instruction that performs "s1 = op s0".
+     *
+     * for: int-to-float, float-to-int
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    flds    s0, [r3]                    @ s0<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    and     r9, r9, #15                 @ r9<- A
+    $instr                              @ s1<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fsts    s1, [r9]                    @ vA<- s1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/funopNarrower.S b/runtime/interpreter/mterp/arm/funopNarrower.S
new file mode 100644
index 0000000..9daec28
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/funopNarrower.S
@@ -0,0 +1,18 @@
+    /*
+     * Generic 64bit-to-32bit unary floating point operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op d0".
+     *
+     * For: double-to-int, double-to-float
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    fldd    d0, [r3]                    @ d0<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    and     r9, r9, #15                 @ r9<- A
+    $instr                              @ s0<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fsts    s0, [r9]                    @ vA<- s0
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/funopWider.S b/runtime/interpreter/mterp/arm/funopWider.S
new file mode 100644
index 0000000..087a1f2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/funopWider.S
@@ -0,0 +1,18 @@
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op s0".
+     *
+     * For: int-to-double, float-to-double
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    flds    s0, [r3]                    @ s0<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    and     r9, r9, #15                 @ r9<- A
+    $instr                              @ d0<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fstd    d0, [r9]                    @ vA<- d0
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/header.S b/runtime/interpreter/mterp/arm/header.S
new file mode 100644
index 0000000..14319d9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/header.S
@@ -0,0 +1,279 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat rFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via rFP &
+  number_of_vregs_.
+
+ */
+
+/*
+ARM EABI general notes:
+
+r0-r3 hold first 4 args to a method; they are not preserved across method calls
+r4-r8 are available for general use
+r9 is given special treatment in some situations, but not for us
+r10 (sl) seems to be generally available
+r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
+r12 (ip) is scratch -- not preserved across method calls
+r13 (sp) should be managed carefully in case a signal arrives
+r14 (lr) must be preserved
+r15 (pc) can be tinkered with directly
+
+r0 holds returns of <= 4 bytes
+r0-r1 hold returns of 8 bytes, low word in r0
+
+Callee must save/restore r4+ (except r12) if it modifies them.  If VFP
+is present, registers s16-s31 (a/k/a d8-d15, a/k/a q4-q7) must be preserved,
+s0-s15 (d0-d7, q0-a3) do not need to be.
+
+Stack is "full descending".  Only the arguments that don't fit in the first 4
+registers are placed on the stack.  "sp" points at the first stacked argument
+(i.e. the 5th arg).
+
+VFP: single-precision results in s0, double-precision results in d0.
+
+In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
+64-bit quantities (long long, double) must be 64-bit aligned.
+*/
+
+/*
+Mterp and ARM notes:
+
+The following registers have fixed assignments:
+
+  reg nick      purpose
+  r4  rPC       interpreted program counter, used for fetching instructions
+  r5  rFP       interpreted frame pointer, used for accessing locals and args
+  r6  rSELF     self (Thread) pointer
+  r7  rINST     first 16-bit code unit of current instruction
+  r8  rIBASE    interpreted instruction base pointer, used for computed goto
+  r11 rREFS	base of object references in shadow frame  (ideally, we'll get rid of this later).
+
+Macros are provided for common operations.  Each macro MUST emit only
+one instruction to make instruction-counting easier.  They MUST NOT alter
+unspecified registers or condition codes.
+*/
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+/* During bringup, we'll use the shadow frame model instead of rFP */
+/* single-purpose registers, given names for clarity */
+#define rPC     r4
+#define rFP     r5
+#define rSELF   r6
+#define rINST   r7
+#define rIBASE  r8
+#define rREFS   r11
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
+/*
+ *
+ * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
+ * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
+ * mterp should do so as well.
+ */
+#define MTERP_SUSPEND 0
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    str  rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+.endm
+
+.macro EXPORT_DEX_PC tmp
+    ldr  \tmp, [rFP, #OFF_FP_CODE_ITEM]
+    str  rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+    add  \tmp, #CODEITEM_INSNS_OFFSET
+    sub  \tmp, rPC, \tmp
+    asr  \tmp, #1
+    str  \tmp, [rFP, #OFF_FP_DEX_PC]
+.endm
+
+/*
+ * Fetch the next instruction from rPC into rINST.  Does not advance rPC.
+ */
+.macro FETCH_INST
+    ldrh    rINST, [rPC]
+.endm
+
+/*
+ * Fetch the next instruction from the specified offset.  Advances rPC
+ * to point to the next instruction.  "_count" is in 16-bit code units.
+ *
+ * Because of the limited size of immediate constants on ARM, this is only
+ * suitable for small forward movements (i.e. don't try to implement "goto"
+ * with this).
+ *
+ * This must come AFTER anything that can throw an exception, or the
+ * exception catch may miss.  (This also implies that it must come after
+ * EXPORT_PC.)
+ */
+.macro FETCH_ADVANCE_INST count
+    ldrh    rINST, [rPC, #((\count)*2)]!
+.endm
+
+/*
+ * The operation performed here is similar to FETCH_ADVANCE_INST, except the
+ * src and dest registers are parameterized (not hard-wired to rPC and rINST).
+ */
+.macro PREFETCH_ADVANCE_INST dreg, sreg, count
+    ldrh    \dreg, [\sreg, #((\count)*2)]!
+.endm
+
+/*
+ * Similar to FETCH_ADVANCE_INST, but does not update rPC.  Used to load
+ * rINST ahead of possible exception point.  Be sure to manually advance rPC
+ * later.
+ */
+.macro PREFETCH_INST count
+    ldrh    rINST, [rPC, #((\count)*2)]
+.endm
+
+/* Advance rPC by some number of code units. */
+.macro ADVANCE count
+  add  rPC, #((\count)*2)
+.endm
+
+/*
+ * Fetch the next instruction from an offset specified by _reg.  Updates
+ * rPC to point to the next instruction.  "_reg" must specify the distance
+ * in bytes, *not* 16-bit code units, and may be a signed value.
+ *
+ * We want to write "ldrh rINST, [rPC, _reg, lsl #1]!", but some of the
+ * bits that hold the shift distance are used for the half/byte/sign flags.
+ * In some cases we can pre-double _reg for free, so we require a byte offset
+ * here.
+ */
+.macro FETCH_ADVANCE_INST_RB reg
+    ldrh    rINST, [rPC, \reg]!
+.endm
+
+/*
+ * Fetch a half-word code unit from an offset past the current PC.  The
+ * "_count" value is in 16-bit code units.  Does not advance rPC.
+ *
+ * The "_S" variant works the same but treats the value as signed.
+ */
+.macro FETCH reg, count
+    ldrh    \reg, [rPC, #((\count)*2)]
+.endm
+
+.macro FETCH_S reg, count
+    ldrsh   \reg, [rPC, #((\count)*2)]
+.endm
+
+/*
+ * Fetch one byte from an offset past the current PC.  Pass in the same
+ * "_count" as you would for FETCH, and an additional 0/1 indicating which
+ * byte of the halfword you want (lo/hi).
+ */
+.macro FETCH_B reg, count, byte
+    ldrb     \reg, [rPC, #((\count)*2+(\byte))]
+.endm
+
+/*
+ * Put the instruction's opcode field into the specified register.
+ */
+.macro GET_INST_OPCODE reg
+    and     \reg, rINST, #255
+.endm
+
+/*
+ * Put the prefetched instruction's opcode field into the specified register.
+ */
+.macro GET_PREFETCHED_OPCODE oreg, ireg
+    and     \oreg, \ireg, #255
+.endm
+
+/*
+ * Begin executing the opcode in _reg.  Because this only jumps within the
+ * interpreter, we don't have to worry about pre-ARMv5 THUMB interwork.
+ */
+.macro GOTO_OPCODE reg
+    add     pc, rIBASE, \reg, lsl #${handler_size_bits}
+.endm
+.macro GOTO_OPCODE_BASE base,reg
+    add     pc, \base, \reg, lsl #${handler_size_bits}
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+.macro GET_VREG reg, vreg
+    ldr     \reg, [rFP, \vreg, lsl #2]
+.endm
+.macro SET_VREG reg, vreg
+    str     \reg, [rFP, \vreg, lsl #2]
+    mov     \reg, #0
+    str     \reg, [rREFS, \vreg, lsl #2]
+.endm
+.macro SET_VREG_OBJECT reg, vreg, tmpreg
+    str     \reg, [rFP, \vreg, lsl #2]
+    str     \reg, [rREFS, \vreg, lsl #2]
+.endm
+
+/*
+ * Convert a virtual register index into an address.
+ */
+.macro VREG_INDEX_TO_ADDR reg, vreg
+    add     \reg, rFP, \vreg, lsl #2   /* WARNING/FIXME: handle shadow frame vreg zero if store */
+.endm
+
+/*
+ * Refresh handler table.
+ */
+.macro REFRESH_IBASE
+  ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
+.endm
diff --git a/runtime/interpreter/mterp/arm/invoke.S b/runtime/interpreter/mterp/arm/invoke.S
new file mode 100644
index 0000000..7575865
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/invoke.S
@@ -0,0 +1,19 @@
+%default { "helper":"UndefinedInvokeHandler" }
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern $helper
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      $helper
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
diff --git a/runtime/interpreter/mterp/arm/op_add_double.S b/runtime/interpreter/mterp/arm/op_add_double.S
new file mode 100644
index 0000000..9332bf2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_double.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide.S" {"instr":"faddd   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_double_2addr.S b/runtime/interpreter/mterp/arm/op_add_double_2addr.S
new file mode 100644
index 0000000..3242c53
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_double_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide2addr.S" {"instr":"faddd   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_float.S b/runtime/interpreter/mterp/arm/op_add_float.S
new file mode 100644
index 0000000..afb7967
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_float.S
@@ -0,0 +1 @@
+%include "arm/fbinop.S" {"instr":"fadds   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_float_2addr.S b/runtime/interpreter/mterp/arm/op_add_float_2addr.S
new file mode 100644
index 0000000..0067b6a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_float_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinop2addr.S" {"instr":"fadds   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_int.S b/runtime/interpreter/mterp/arm/op_add_int.S
new file mode 100644
index 0000000..1dcae7e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"instr":"add     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_int_2addr.S b/runtime/interpreter/mterp/arm/op_add_int_2addr.S
new file mode 100644
index 0000000..9ea98f1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"instr":"add     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_int_lit16.S b/runtime/interpreter/mterp/arm/op_add_int_lit16.S
new file mode 100644
index 0000000..5763ab8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_int_lit16.S
@@ -0,0 +1 @@
+%include "arm/binopLit16.S" {"instr":"add     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_int_lit8.S b/runtime/interpreter/mterp/arm/op_add_int_lit8.S
new file mode 100644
index 0000000..b84684a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"instr":"add     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_long.S b/runtime/interpreter/mterp/arm/op_add_long.S
new file mode 100644
index 0000000..093223e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_long.S
@@ -0,0 +1 @@
+%include "arm/binopWide.S" {"preinstr":"adds    r0, r0, r2", "instr":"adc     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_add_long_2addr.S b/runtime/interpreter/mterp/arm/op_add_long_2addr.S
new file mode 100644
index 0000000..c11e0af
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_long_2addr.S
@@ -0,0 +1 @@
+%include "arm/binopWide2addr.S" {"preinstr":"adds    r0, r0, r2", "instr":"adc     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_aget.S b/runtime/interpreter/mterp/arm/op_aget.S
new file mode 100644
index 0000000..11f7079
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aget.S
@@ -0,0 +1,29 @@
+%default { "load":"ldr", "shift":"2", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #$shift     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    $load   r2, [r0, #$data_offset]     @ r2<- vBB[vCC]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r2, r9                     @ vAA<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_aget_boolean.S b/runtime/interpreter/mterp/arm/op_aget_boolean.S
new file mode 100644
index 0000000..8f678dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aget_boolean.S
@@ -0,0 +1 @@
+%include "arm/op_aget.S" { "load":"ldrb", "shift":"0", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aget_byte.S b/runtime/interpreter/mterp/arm/op_aget_byte.S
new file mode 100644
index 0000000..a304650
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aget_byte.S
@@ -0,0 +1 @@
+%include "arm/op_aget.S" { "load":"ldrsb", "shift":"0", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aget_char.S b/runtime/interpreter/mterp/arm/op_aget_char.S
new file mode 100644
index 0000000..4908306
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aget_char.S
@@ -0,0 +1 @@
+%include "arm/op_aget.S" { "load":"ldrh", "shift":"1", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aget_object.S b/runtime/interpreter/mterp/arm/op_aget_object.S
new file mode 100644
index 0000000..4e0aab5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aget_object.S
@@ -0,0 +1,21 @@
+    /*
+     * Array object get.  vAA <- vBB[vCC].
+     *
+     * for: aget-object
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    EXPORT_PC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    bl       artAGetObjectFromMterp     @ (array, index)
+    ldr      r1, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    PREFETCH_INST 2
+    cmp      r1, #0
+    bne      MterpException
+    SET_VREG_OBJECT r0, r9
+    ADVANCE 2
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_aget_short.S b/runtime/interpreter/mterp/arm/op_aget_short.S
new file mode 100644
index 0000000..b71e659
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aget_short.S
@@ -0,0 +1 @@
+%include "arm/op_aget.S" { "load":"ldrsh", "shift":"1", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aget_wide.S b/runtime/interpreter/mterp/arm/op_aget_wide.S
new file mode 100644
index 0000000..caaec71
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aget_wide.S
@@ -0,0 +1,24 @@
+    /*
+     * Array get, 64 bits.  vAA <- vBB[vCC].
+     *
+     * Arrays of long/double are 64-bit aligned, so it's okay to use LDRD.
+     */
+    /* aget-wide vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #3          @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldrd    r2, [r0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  @ r2/r3<- vBB[vCC]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r2-r3}                 @ vAA/vAA+1<- r2/r3
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_and_int.S b/runtime/interpreter/mterp/arm/op_and_int.S
new file mode 100644
index 0000000..7c16d37
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_and_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"instr":"and     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_and_int_2addr.S b/runtime/interpreter/mterp/arm/op_and_int_2addr.S
new file mode 100644
index 0000000..0fbab02
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_and_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"instr":"and     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_and_int_lit16.S b/runtime/interpreter/mterp/arm/op_and_int_lit16.S
new file mode 100644
index 0000000..541e9b7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_and_int_lit16.S
@@ -0,0 +1 @@
+%include "arm/binopLit16.S" {"instr":"and     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_and_int_lit8.S b/runtime/interpreter/mterp/arm/op_and_int_lit8.S
new file mode 100644
index 0000000..d5783e5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_and_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"instr":"and     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_and_long.S b/runtime/interpreter/mterp/arm/op_and_long.S
new file mode 100644
index 0000000..4ad5158
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_and_long.S
@@ -0,0 +1 @@
+%include "arm/binopWide.S" {"preinstr":"and     r0, r0, r2", "instr":"and     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_and_long_2addr.S b/runtime/interpreter/mterp/arm/op_and_long_2addr.S
new file mode 100644
index 0000000..e23ea44
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_and_long_2addr.S
@@ -0,0 +1 @@
+%include "arm/binopWide2addr.S" {"preinstr":"and     r0, r0, r2", "instr":"and     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_aput.S b/runtime/interpreter/mterp/arm/op_aput.S
new file mode 100644
index 0000000..a511fa5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aput.S
@@ -0,0 +1,29 @@
+%default { "store":"str", "shift":"2", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]     @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #$shift     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r9                     @ r2<- vAA
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    $store  r2, [r0, #$data_offset]     @ vBB[vCC]<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_aput_boolean.S b/runtime/interpreter/mterp/arm/op_aput_boolean.S
new file mode 100644
index 0000000..e86663f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aput_boolean.S
@@ -0,0 +1 @@
+%include "arm/op_aput.S" { "store":"strb", "shift":"0", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aput_byte.S b/runtime/interpreter/mterp/arm/op_aput_byte.S
new file mode 100644
index 0000000..83694b7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aput_byte.S
@@ -0,0 +1 @@
+%include "arm/op_aput.S" { "store":"strb", "shift":"0", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aput_char.S b/runtime/interpreter/mterp/arm/op_aput_char.S
new file mode 100644
index 0000000..3551cac
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aput_char.S
@@ -0,0 +1 @@
+%include "arm/op_aput.S" { "store":"strh", "shift":"1", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aput_object.S b/runtime/interpreter/mterp/arm/op_aput_object.S
new file mode 100644
index 0000000..c539916
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aput_object.S
@@ -0,0 +1,14 @@
+    /*
+     * Store an object into an array.  vBB[vCC] <- vAA.
+     */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    bl      MterpAputObject
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_aput_short.S b/runtime/interpreter/mterp/arm/op_aput_short.S
new file mode 100644
index 0000000..0a0590e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aput_short.S
@@ -0,0 +1 @@
+%include "arm/op_aput.S" { "store":"strh", "shift":"1", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aput_wide.S b/runtime/interpreter/mterp/arm/op_aput_wide.S
new file mode 100644
index 0000000..49839d1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aput_wide.S
@@ -0,0 +1,24 @@
+    /*
+     * Array put, 64 bits.  vBB[vCC] <- vAA.
+     *
+     * Arrays of long/double are 64-bit aligned, so it's okay to use STRD.
+     */
+    /* aput-wide vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #3          @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldmia   r9, {r2-r3}                 @ r2/r3<- vAA/vAA+1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    strd    r2, [r0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  @ r2/r3<- vBB[vCC]
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_array_length.S b/runtime/interpreter/mterp/arm/op_array_length.S
new file mode 100644
index 0000000..43b1682
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_array_length.S
@@ -0,0 +1,13 @@
+    /*
+     * Return the length of an array.
+     */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    GET_VREG r0, r1                     @ r0<- vB (object ref)
+    cmp     r0, #0                      @ is object null?
+    beq     common_errNullObject        @ yup, fail
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- array length
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r3, r2                     @ vB<- length
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_check_cast.S b/runtime/interpreter/mterp/arm/op_check_cast.S
new file mode 100644
index 0000000..3e3ac70
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_check_cast.S
@@ -0,0 +1,17 @@
+    /*
+     * Check to see if a cast from one class to another is allowed.
+     */
+    /* check-cast vAA, class@BBBB */
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- BBBB
+    mov      r1, rINST, lsr #8          @ r1<- AA
+    GET_VREG r1, r1                     @ r1<- object
+    ldr      r2, [rFP, #OFF_FP_METHOD]  @ r2<- method
+    mov      r3, rSELF                  @ r3<- self
+    bl       MterpCheckCast             @ (index, obj, method, self)
+    PREFETCH_INST 2
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_cmp_long.S b/runtime/interpreter/mterp/arm/op_cmp_long.S
new file mode 100644
index 0000000..2b4c0ea
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_cmp_long.S
@@ -0,0 +1,56 @@
+    /*
+     * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
+     * register based on the results of the comparison.
+     *
+     * We load the full values with LDM, but in practice many values could
+     * be resolved by only looking at the high word.  This could be made
+     * faster or slower by splitting the LDM into a pair of LDRs.
+     *
+     * If we just wanted to set condition flags, we could do this:
+     *  subs    ip, r0, r2
+     *  sbcs    ip, r1, r3
+     *  subeqs  ip, r0, r2
+     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
+     * integer value, which we can do with 2 conditional mov/mvn instructions
+     * (set 1, set -1; if they're equal we already have 0 in ip), giving
+     * us a constant 5-cycle path plus a branch at the end to the
+     * instruction epilogue code.  The multi-compare approach below needs
+     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
+     * in the worst case (the 64-bit values are equal).
+     */
+    /* cmp-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
+    blt     .L${opcode}_less            @ signed compare on high part
+    bgt     .L${opcode}_greater
+    subs    r1, r0, r2                  @ r1<- r0 - r2
+    bhi     .L${opcode}_greater         @ unsigned compare on low part
+    bne     .L${opcode}_less
+    b       .L${opcode}_finish          @ equal; r1 already holds 0
+%break
+
+.L${opcode}_less:
+    mvn     r1, #0                      @ r1<- -1
+    @ Want to cond code the next mov so we can avoid branch, but don't see it;
+    @ instead, we just replicate the tail end.
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+.L${opcode}_greater:
+    mov     r1, #1                      @ r1<- 1
+    @ fall through to _finish
+
+.L${opcode}_finish:
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_cmpg_double.S b/runtime/interpreter/mterp/arm/op_cmpg_double.S
new file mode 100644
index 0000000..4b05c44
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_cmpg_double.S
@@ -0,0 +1,34 @@
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else {
+     *         return 1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fcmped  d0, d1                      @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r0, #1                      @ r0<- 1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    mvnmi   r0, #0                      @ (less than) r1<- -1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_cmpg_float.S b/runtime/interpreter/mterp/arm/op_cmpg_float.S
new file mode 100644
index 0000000..d5d2df2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_cmpg_float.S
@@ -0,0 +1,34 @@
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else {
+     *         return 1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    flds    s0, [r2]                    @ s0<- vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    fcmpes  s0, s1                      @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r0, #1                      @ r0<- 1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    mvnmi   r0, #0                      @ (less than) r1<- -1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_cmpl_double.S b/runtime/interpreter/mterp/arm/op_cmpl_double.S
new file mode 100644
index 0000000..6ee53b3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_cmpl_double.S
@@ -0,0 +1,34 @@
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else {
+     *         return -1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fcmped  d0, d1                      @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mvn     r0, #0                      @ r0<- -1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    movgt   r0, #1                      @ (greater than) r1<- 1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_cmpl_float.S b/runtime/interpreter/mterp/arm/op_cmpl_float.S
new file mode 100644
index 0000000..64535b6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_cmpl_float.S
@@ -0,0 +1,34 @@
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else {
+     *         return -1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    flds    s0, [r2]                    @ s0<- vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    fcmpes  s0, s1                      @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mvn     r0, #0                      @ r0<- -1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    movgt   r0, #1                      @ (greater than) r1<- 1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const.S b/runtime/interpreter/mterp/arm/op_const.S
new file mode 100644
index 0000000..de3e3c3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const.S
@@ -0,0 +1,9 @@
+    /* const vAA, #+BBBBbbbb */
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    FETCH r0, 1                         @ r0<- bbbb (low
+    FETCH r1, 2                         @ r1<- BBBB (high
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r3                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_16.S b/runtime/interpreter/mterp/arm/op_const_16.S
new file mode 100644
index 0000000..59c6dac
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_16.S
@@ -0,0 +1,7 @@
+    /* const/16 vAA, #+BBBB */
+    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r3                     @ vAA<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_4.S b/runtime/interpreter/mterp/arm/op_const_4.S
new file mode 100644
index 0000000..c177bb9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_4.S
@@ -0,0 +1,8 @@
+    /* const/4 vA, #+B */
+    mov     r1, rINST, lsl #16          @ r1<- Bxxx0000
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    mov     r1, r1, asr #28             @ r1<- sssssssB (sign-extended)
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    SET_VREG r1, r0                     @ fp[A]<- r1
+    GOTO_OPCODE ip                      @ execute next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_class.S b/runtime/interpreter/mterp/arm/op_const_class.S
new file mode 100644
index 0000000..0b111f4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_class.S
@@ -0,0 +1,13 @@
+    /* const/class vAA, Class@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- BBBB
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    add     r2, rFP, #OFF_FP_SHADOWFRAME
+    mov     r3, rSELF
+    bl      MterpConstClass             @ (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2
+    cmp     r0, #0
+    bne     MterpPossibleException
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_high16.S b/runtime/interpreter/mterp/arm/op_const_high16.S
new file mode 100644
index 0000000..460d546
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_high16.S
@@ -0,0 +1,8 @@
+    /* const/high16 vAA, #+BBBB0000 */
+    FETCH r0, 1                         @ r0<- 0000BBBB (zero-extended
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    mov     r0, r0, lsl #16             @ r0<- BBBB0000
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r3                     @ vAA<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_string.S b/runtime/interpreter/mterp/arm/op_const_string.S
new file mode 100644
index 0000000..4b8302a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_string.S
@@ -0,0 +1,13 @@
+    /* const/string vAA, String@BBBB */
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- BBBB
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    add     r2, rFP, #OFF_FP_SHADOWFRAME
+    mov     r3, rSELF
+    bl      MterpConstString            @ (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2                     @ load rINST
+    cmp     r0, #0                      @ fail?
+    bne     MterpPossibleException      @ let reference interpreter deal with it.
+    ADVANCE 2                           @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_string_jumbo.S b/runtime/interpreter/mterp/arm/op_const_string_jumbo.S
new file mode 100644
index 0000000..1a3d0b2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_string_jumbo.S
@@ -0,0 +1,15 @@
+    /* const/string vAA, String@BBBBBBBB */
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- bbbb (low
+    FETCH r2, 2                         @ r2<- BBBB (high
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    orr     r0, r0, r2, lsl #16         @ r1<- BBBBbbbb
+    add     r2, rFP, #OFF_FP_SHADOWFRAME
+    mov     r3, rSELF
+    bl      MterpConstString            @ (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 3                     @ advance rPC
+    cmp     r0, #0                      @ fail?
+    bne     MterpPossibleException      @ let reference interpreter deal with it.
+    ADVANCE 3                           @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_wide.S b/runtime/interpreter/mterp/arm/op_const_wide.S
new file mode 100644
index 0000000..2cdc426
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_wide.S
@@ -0,0 +1,13 @@
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    FETCH r0, 1                         @ r0<- bbbb (low)
+    FETCH r1, 2                         @ r1<- BBBB (low middle)
+    FETCH r2, 3                         @ r2<- hhhh (high middle)
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb (low word)
+    FETCH r3, 4                         @ r3<- HHHH (high)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    orr     r1, r2, r3, lsl #16         @ r1<- HHHHhhhh (high word)
+    FETCH_ADVANCE_INST 5                @ advance rPC, load rINST
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_wide_16.S b/runtime/interpreter/mterp/arm/op_const_wide_16.S
new file mode 100644
index 0000000..56bfc17
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_wide_16.S
@@ -0,0 +1,9 @@
+    /* const-wide/16 vAA, #+BBBB */
+    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    mov     r1, r0, asr #31             @ r1<- ssssssss
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_wide_32.S b/runtime/interpreter/mterp/arm/op_const_wide_32.S
new file mode 100644
index 0000000..36d4628
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_wide_32.S
@@ -0,0 +1,11 @@
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    FETCH r0, 1                         @ r0<- 0000bbbb (low)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    FETCH_S r2, 2                       @ r2<- ssssBBBB (high)
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    orr     r0, r0, r2, lsl #16         @ r0<- BBBBbbbb
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    mov     r1, r0, asr #31             @ r1<- ssssssss
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_wide_high16.S b/runtime/interpreter/mterp/arm/op_const_wide_high16.S
new file mode 100644
index 0000000..bee592d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_wide_high16.S
@@ -0,0 +1,10 @@
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    FETCH r1, 1                         @ r1<- 0000BBBB (zero-extended)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    mov     r0, #0                      @ r0<- 00000000
+    mov     r1, r1, lsl #16             @ r1<- BBBB0000
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_div_double.S b/runtime/interpreter/mterp/arm/op_div_double.S
new file mode 100644
index 0000000..5147550
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_double.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide.S" {"instr":"fdivd   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_div_double_2addr.S b/runtime/interpreter/mterp/arm/op_div_double_2addr.S
new file mode 100644
index 0000000..b812f17
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_double_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide2addr.S" {"instr":"fdivd   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_div_float.S b/runtime/interpreter/mterp/arm/op_div_float.S
new file mode 100644
index 0000000..0f24d11
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_float.S
@@ -0,0 +1 @@
+%include "arm/fbinop.S" {"instr":"fdivs   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_div_float_2addr.S b/runtime/interpreter/mterp/arm/op_div_float_2addr.S
new file mode 100644
index 0000000..a1dbf01
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_float_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinop2addr.S" {"instr":"fdivs   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_div_int.S b/runtime/interpreter/mterp/arm/op_div_int.S
new file mode 100644
index 0000000..251064b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_int.S
@@ -0,0 +1,30 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int
+     *
+     */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl    __aeabi_idiv                  @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
diff --git a/runtime/interpreter/mterp/arm/op_div_int_2addr.S b/runtime/interpreter/mterp/arm/op_div_int_2addr.S
new file mode 100644
index 0000000..9be4cd8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_int_2addr.S
@@ -0,0 +1,29 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int/2addr
+     *
+     */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl       __aeabi_idiv               @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
diff --git a/runtime/interpreter/mterp/arm/op_div_int_lit16.S b/runtime/interpreter/mterp/arm/op_div_int_lit16.S
new file mode 100644
index 0000000..d9bc7d6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_int_lit16.S
@@ -0,0 +1,28 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int/lit16
+     *
+     */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl       __aeabi_idiv               @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm/op_div_int_lit8.S b/runtime/interpreter/mterp/arm/op_div_int_lit8.S
new file mode 100644
index 0000000..5d2dbd3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_int_lit8.S
@@ -0,0 +1,29 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int/lit8
+     *
+     */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl   __aeabi_idiv                   @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
diff --git a/runtime/interpreter/mterp/arm/op_div_long.S b/runtime/interpreter/mterp/arm/op_div_long.S
new file mode 100644
index 0000000..0f21a84
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_long.S
@@ -0,0 +1 @@
+%include "arm/binopWide.S" {"instr":"bl      __aeabi_ldivmod", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_div_long_2addr.S b/runtime/interpreter/mterp/arm/op_div_long_2addr.S
new file mode 100644
index 0000000..e172b29
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_long_2addr.S
@@ -0,0 +1 @@
+%include "arm/binopWide2addr.S" {"instr":"bl      __aeabi_ldivmod", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_double_to_float.S b/runtime/interpreter/mterp/arm/op_double_to_float.S
new file mode 100644
index 0000000..e327000
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_double_to_float.S
@@ -0,0 +1 @@
+%include "arm/funopNarrower.S" {"instr":"fcvtsd  s0, d0"}
diff --git a/runtime/interpreter/mterp/arm/op_double_to_int.S b/runtime/interpreter/mterp/arm/op_double_to_int.S
new file mode 100644
index 0000000..aa035de
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_double_to_int.S
@@ -0,0 +1 @@
+%include "arm/funopNarrower.S" {"instr":"ftosizd  s0, d0"}
diff --git a/runtime/interpreter/mterp/arm/op_double_to_long.S b/runtime/interpreter/mterp/arm/op_double_to_long.S
new file mode 100644
index 0000000..b100810
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_double_to_long.S
@@ -0,0 +1,52 @@
+@include "arm/unopWide.S" {"instr":"bl      __aeabi_d2lz"}
+%include "arm/unopWide.S" {"instr":"bl      d2l_doconv"}
+
+%break
+/*
+ * Convert the double in r0/r1 to a long in r0/r1.
+ *
+ * We have to clip values to long min/max per the specification.  The
+ * expected common case is a "reasonable" value that converts directly
+ * to modest integer.  The EABI convert function isn't doing this for us.
+ */
+d2l_doconv:
+    stmfd   sp!, {r4, r5, lr}           @ save regs
+    mov     r3, #0x43000000             @ maxlong, as a double (high word)
+    add     r3, #0x00e00000             @  0x43e00000
+    mov     r2, #0                      @ maxlong, as a double (low word)
+    sub     sp, sp, #4                  @ align for EABI
+    mov     r4, r0                      @ save a copy of r0
+    mov     r5, r1                      @  and r1
+    bl      __aeabi_dcmpge              @ is arg >= maxlong?
+    cmp     r0, #0                      @ nonzero == yes
+    mvnne   r0, #0                      @ return maxlong (7fffffffffffffff)
+    mvnne   r1, #0x80000000
+    bne     1f
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r5
+    mov     r3, #0xc3000000             @ minlong, as a double (high word)
+    add     r3, #0x00e00000             @  0xc3e00000
+    mov     r2, #0                      @ minlong, as a double (low word)
+    bl      __aeabi_dcmple              @ is arg <= minlong?
+    cmp     r0, #0                      @ nonzero == yes
+    movne   r0, #0                      @ return minlong (8000000000000000)
+    movne   r1, #0x80000000
+    bne     1f
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r5
+    mov     r2, r4                      @ compare against self
+    mov     r3, r5
+    bl      __aeabi_dcmpeq              @ is arg == self?
+    cmp     r0, #0                      @ zero == no
+    moveq   r1, #0                      @ return zero for NaN
+    beq     1f
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r5
+    bl      __aeabi_d2lz                @ convert double to long
+
+1:
+    add     sp, sp, #4
+    ldmfd   sp!, {r4, r5, pc}
diff --git a/runtime/interpreter/mterp/arm/op_fill_array_data.S b/runtime/interpreter/mterp/arm/op_fill_array_data.S
new file mode 100644
index 0000000..e1ca85c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_fill_array_data.S
@@ -0,0 +1,14 @@
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r1, r0, r1, lsl #16         @ r1<- BBBBbbbb
+    GET_VREG r0, r3                     @ r0<- vAA (array object)
+    add     r1, rPC, r1, lsl #1         @ r1<- PC + BBBBbbbb*2 (array data off.)
+    bl      MterpFillArrayData          @ (obj, payload)
+    cmp     r0, #0                      @ 0 means an exception is thrown
+    beq     MterpPossibleException      @ exception?
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_filled_new_array.S b/runtime/interpreter/mterp/arm/op_filled_new_array.S
new file mode 100644
index 0000000..1075f0c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_filled_new_array.S
@@ -0,0 +1,19 @@
+%default { "helper":"MterpFilledNewArray" }
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+    .extern $helper
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rSELF
+    bl      $helper
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_filled_new_array_range.S b/runtime/interpreter/mterp/arm/op_filled_new_array_range.S
new file mode 100644
index 0000000..16567af
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_filled_new_array_range.S
@@ -0,0 +1 @@
+%include "arm/op_filled_new_array.S" { "helper":"MterpFilledNewArrayRange" }
diff --git a/runtime/interpreter/mterp/arm/op_float_to_double.S b/runtime/interpreter/mterp/arm/op_float_to_double.S
new file mode 100644
index 0000000..fb1892b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_float_to_double.S
@@ -0,0 +1 @@
+%include "arm/funopWider.S" {"instr":"fcvtds  d0, s0"}
diff --git a/runtime/interpreter/mterp/arm/op_float_to_int.S b/runtime/interpreter/mterp/arm/op_float_to_int.S
new file mode 100644
index 0000000..aab8716
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_float_to_int.S
@@ -0,0 +1 @@
+%include "arm/funop.S" {"instr":"ftosizs s1, s0"}
diff --git a/runtime/interpreter/mterp/arm/op_float_to_long.S b/runtime/interpreter/mterp/arm/op_float_to_long.S
new file mode 100644
index 0000000..24416d3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_float_to_long.S
@@ -0,0 +1,39 @@
+@include "arm/unopWider.S" {"instr":"bl      __aeabi_f2lz"}
+%include "arm/unopWider.S" {"instr":"bl      f2l_doconv"}
+
+%break
+/*
+ * Convert the float in r0 to a long in r0/r1.
+ *
+ * We have to clip values to long min/max per the specification.  The
+ * expected common case is a "reasonable" value that converts directly
+ * to modest integer.  The EABI convert function isn't doing this for us.
+ */
+f2l_doconv:
+    stmfd   sp!, {r4, lr}
+    mov     r1, #0x5f000000             @ (float)maxlong
+    mov     r4, r0
+    bl      __aeabi_fcmpge              @ is arg >= maxlong?
+    cmp     r0, #0                      @ nonzero == yes
+    mvnne   r0, #0                      @ return maxlong (7fffffff)
+    mvnne   r1, #0x80000000
+    ldmnefd sp!, {r4, pc}
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, #0xdf000000             @ (float)minlong
+    bl      __aeabi_fcmple              @ is arg <= minlong?
+    cmp     r0, #0                      @ nonzero == yes
+    movne   r0, #0                      @ return minlong (80000000)
+    movne   r1, #0x80000000
+    ldmnefd sp!, {r4, pc}
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r4
+    bl      __aeabi_fcmpeq              @ is arg == self?
+    cmp     r0, #0                      @ zero == no
+    moveq   r1, #0                      @ return zero for NaN
+    ldmeqfd sp!, {r4, pc}
+
+    mov     r0, r4                      @ recover arg
+    bl      __aeabi_f2lz                @ convert float to long
+    ldmfd   sp!, {r4, pc}
diff --git a/runtime/interpreter/mterp/arm/op_goto.S b/runtime/interpreter/mterp/arm/op_goto.S
new file mode 100644
index 0000000..9b3632a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_goto.S
@@ -0,0 +1,28 @@
+    /*
+     * Unconditional branch, 8-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto +AA */
+    /* tuning: use sbfx for 6t2+ targets */
+#if MTERP_SUSPEND
+    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
+    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
+    add     r2, r1, r1                  @ r2<- byte offset, set flags
+       @ If backwards branch refresh rIBASE
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
+    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
+    add     r2, r1, r1                  @ r2<- byte offset, set flags
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+       @ If backwards branch refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm/op_goto_16.S b/runtime/interpreter/mterp/arm/op_goto_16.S
new file mode 100644
index 0000000..2231acd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_goto_16.S
@@ -0,0 +1,23 @@
+    /*
+     * Unconditional branch, 16-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto/16 +AAAA */
+#if MTERP_SUSPEND
+    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
+    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm/op_goto_32.S b/runtime/interpreter/mterp/arm/op_goto_32.S
new file mode 100644
index 0000000..6b72ff5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_goto_32.S
@@ -0,0 +1,32 @@
+    /*
+     * Unconditional branch, 32-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     *
+     * Unlike most opcodes, this one is allowed to branch to itself, so
+     * our "backward branch" test must be "<=0" instead of "<0".  Because
+     * we need the V bit set, we'll use an adds to convert from Dalvik
+     * offset to byte offset.
+     */
+    /* goto/32 +AAAAAAAA */
+#if MTERP_SUSPEND
+    FETCH r0, 1                         @ r0<- aaaa (lo)
+    FETCH r1, 2                         @ r1<- AAAA (hi)
+    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
+    adds    r1, r0, r0                  @ r1<- byte offset
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    FETCH r0, 1                         @ r0<- aaaa (lo)
+    FETCH r1, 2                         @ r1<- AAAA (hi)
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
+    adds    r1, r0, r0                  @ r1<- byte offset
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm/op_if_eq.S b/runtime/interpreter/mterp/arm/op_if_eq.S
new file mode 100644
index 0000000..5685686
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_eq.S
@@ -0,0 +1 @@
+%include "arm/bincmp.S" { "revcmp":"ne" }
diff --git a/runtime/interpreter/mterp/arm/op_if_eqz.S b/runtime/interpreter/mterp/arm/op_if_eqz.S
new file mode 100644
index 0000000..2a9c0f9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_eqz.S
@@ -0,0 +1 @@
+%include "arm/zcmp.S" { "revcmp":"ne" }
diff --git a/runtime/interpreter/mterp/arm/op_if_ge.S b/runtime/interpreter/mterp/arm/op_if_ge.S
new file mode 100644
index 0000000..60a0307
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_ge.S
@@ -0,0 +1 @@
+%include "arm/bincmp.S" { "revcmp":"lt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_gez.S b/runtime/interpreter/mterp/arm/op_if_gez.S
new file mode 100644
index 0000000..981cdec
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_gez.S
@@ -0,0 +1 @@
+%include "arm/zcmp.S" { "revcmp":"lt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_gt.S b/runtime/interpreter/mterp/arm/op_if_gt.S
new file mode 100644
index 0000000..ca50cd7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_gt.S
@@ -0,0 +1 @@
+%include "arm/bincmp.S" { "revcmp":"le" }
diff --git a/runtime/interpreter/mterp/arm/op_if_gtz.S b/runtime/interpreter/mterp/arm/op_if_gtz.S
new file mode 100644
index 0000000..c621812
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_gtz.S
@@ -0,0 +1 @@
+%include "arm/zcmp.S" { "revcmp":"le" }
diff --git a/runtime/interpreter/mterp/arm/op_if_le.S b/runtime/interpreter/mterp/arm/op_if_le.S
new file mode 100644
index 0000000..7e060f2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_le.S
@@ -0,0 +1 @@
+%include "arm/bincmp.S" { "revcmp":"gt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_lez.S b/runtime/interpreter/mterp/arm/op_if_lez.S
new file mode 100644
index 0000000..f92be23
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_lez.S
@@ -0,0 +1 @@
+%include "arm/zcmp.S" { "revcmp":"gt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_lt.S b/runtime/interpreter/mterp/arm/op_if_lt.S
new file mode 100644
index 0000000..213344d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_lt.S
@@ -0,0 +1 @@
+%include "arm/bincmp.S" { "revcmp":"ge" }
diff --git a/runtime/interpreter/mterp/arm/op_if_ltz.S b/runtime/interpreter/mterp/arm/op_if_ltz.S
new file mode 100644
index 0000000..dfd4e44
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_ltz.S
@@ -0,0 +1 @@
+%include "arm/zcmp.S" { "revcmp":"ge" }
diff --git a/runtime/interpreter/mterp/arm/op_if_ne.S b/runtime/interpreter/mterp/arm/op_if_ne.S
new file mode 100644
index 0000000..4a58b4a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_ne.S
@@ -0,0 +1 @@
+%include "arm/bincmp.S" { "revcmp":"eq" }
diff --git a/runtime/interpreter/mterp/arm/op_if_nez.S b/runtime/interpreter/mterp/arm/op_if_nez.S
new file mode 100644
index 0000000..d864ef4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_nez.S
@@ -0,0 +1 @@
+%include "arm/zcmp.S" { "revcmp":"eq" }
diff --git a/runtime/interpreter/mterp/arm/op_iget.S b/runtime/interpreter/mterp/arm/op_iget.S
new file mode 100644
index 0000000..c7f777b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget.S
@@ -0,0 +1,26 @@
+%default { "is_object":"0", "helper":"artGet32InstanceFromCode"}
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       $helper
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpPossibleException        @ bail out
+    .if $is_object
+    SET_VREG_OBJECT r0, r2                 @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                        @ fp[A]<- r0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iget_boolean.S b/runtime/interpreter/mterp/arm/op_iget_boolean.S
new file mode 100644
index 0000000..628f40a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_boolean.S
@@ -0,0 +1 @@
+%include "arm/op_iget.S" { "helper":"artGetBooleanInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_boolean_quick.S b/runtime/interpreter/mterp/arm/op_iget_boolean_quick.S
new file mode 100644
index 0000000..0ae4843
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_boolean_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iget_quick.S" { "load":"ldrb" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_byte.S b/runtime/interpreter/mterp/arm/op_iget_byte.S
new file mode 100644
index 0000000..c4e08e2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_byte.S
@@ -0,0 +1 @@
+%include "arm/op_iget.S" { "helper":"artGetByteInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_byte_quick.S b/runtime/interpreter/mterp/arm/op_iget_byte_quick.S
new file mode 100644
index 0000000..e1b3083
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_byte_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iget_quick.S" { "load":"ldrsb" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_char.S b/runtime/interpreter/mterp/arm/op_iget_char.S
new file mode 100644
index 0000000..5e8da66
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_char.S
@@ -0,0 +1 @@
+%include "arm/op_iget.S" { "helper":"artGetCharInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_char_quick.S b/runtime/interpreter/mterp/arm/op_iget_char_quick.S
new file mode 100644
index 0000000..b44d8f1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_char_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iget_quick.S" { "load":"ldrh" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_object.S b/runtime/interpreter/mterp/arm/op_iget_object.S
new file mode 100644
index 0000000..1cf2e3c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_object.S
@@ -0,0 +1 @@
+%include "arm/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_object_quick.S b/runtime/interpreter/mterp/arm/op_iget_object_quick.S
new file mode 100644
index 0000000..16cb118
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_object_quick.S
@@ -0,0 +1,16 @@
+    /* For: iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    EXPORT_PC
+    GET_VREG r0, r2                     @ r0<- object we're operating on
+    bl      artIGetObjectFromMterp      @ (obj, offset)
+    ldr     r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    PREFETCH_INST 2
+    cmp     r3, #0
+    bne     MterpPossibleException      @ bail out
+    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
+    ADVANCE 2                           @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iget_quick.S b/runtime/interpreter/mterp/arm/op_iget_quick.S
new file mode 100644
index 0000000..0eaf364
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_quick.S
@@ -0,0 +1,14 @@
+%default { "load":"ldr" }
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    $load   r0, [r3, r1]                @ r0<- obj.field
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r2                     @ fp[A]<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iget_short.S b/runtime/interpreter/mterp/arm/op_iget_short.S
new file mode 100644
index 0000000..460f045
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_short.S
@@ -0,0 +1 @@
+%include "arm/op_iget.S" { "helper":"artGetShortInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_short_quick.S b/runtime/interpreter/mterp/arm/op_iget_short_quick.S
new file mode 100644
index 0000000..1831b99
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_short_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iget_quick.S" { "load":"ldrsh" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_wide.S b/runtime/interpreter/mterp/arm/op_iget_wide.S
new file mode 100644
index 0000000..f8d2f41
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_wide.S
@@ -0,0 +1,22 @@
+    /*
+     * 64-bit instance field get.
+     *
+     * for: iget-wide
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGet64InstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpException                @ bail out
+    add     r3, rFP, r2, lsl #2            @ r3<- &fp[A]
+    stmia   r3, {r0-r1}                    @ fp[A]<- r0/r1
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iget_wide_quick.S b/runtime/interpreter/mterp/arm/op_iget_wide_quick.S
new file mode 100644
index 0000000..4d6976e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_wide_quick.S
@@ -0,0 +1,13 @@
+    /* iget-wide-quick vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH ip, 1                         @ ip<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldrd    r0, [r3, ip]                @ r0<- obj.field (64 bits, aligned)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    add     r3, rFP, r2, lsl #2         @ r3<- &fp[A]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ fp[A]<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_instance_of.S b/runtime/interpreter/mterp/arm/op_instance_of.S
new file mode 100644
index 0000000..e94108c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_instance_of.S
@@ -0,0 +1,24 @@
+    /*
+     * Check to see if an object reference is an instance of a class.
+     *
+     * Most common situation is a non-null object, being compared against
+     * an already-resolved class.
+     */
+    /* instance-of vA, vB, class@CCCC */
+    EXPORT_PC
+    FETCH     r0, 1                     @ r0<- CCCC
+    mov       r1, rINST, lsr #12        @ r1<- B
+    GET_VREG  r1, r1                    @ r1<- vB (object)
+    ldr       r2, [rFP, #OFF_FP_METHOD] @ r2<- method
+    mov       r3, rSELF                 @ r3<- self
+    mov       r9, rINST, lsr #8         @ r9<- A+
+    and       r9, r9, #15               @ r9<- A
+    bl        MterpInstanceOf           @ (index, obj, method, self)
+    ldr       r1, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    PREFETCH_INST 2
+    cmp       r1, #0                    @ exception pending?
+    bne       MterpException
+    ADVANCE 2                           @ advance rPC
+    SET_VREG r0, r9                     @ vA<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_int_to_byte.S b/runtime/interpreter/mterp/arm/op_int_to_byte.S
new file mode 100644
index 0000000..059d5c2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_int_to_byte.S
@@ -0,0 +1 @@
+%include "arm/unop.S" {"instr":"sxtb    r0, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_int_to_char.S b/runtime/interpreter/mterp/arm/op_int_to_char.S
new file mode 100644
index 0000000..83a0c19
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_int_to_char.S
@@ -0,0 +1 @@
+%include "arm/unop.S" {"instr":"uxth    r0, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_int_to_double.S b/runtime/interpreter/mterp/arm/op_int_to_double.S
new file mode 100644
index 0000000..810c2e4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_int_to_double.S
@@ -0,0 +1 @@
+%include "arm/funopWider.S" {"instr":"fsitod  d0, s0"}
diff --git a/runtime/interpreter/mterp/arm/op_int_to_float.S b/runtime/interpreter/mterp/arm/op_int_to_float.S
new file mode 100644
index 0000000..f41654c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_int_to_float.S
@@ -0,0 +1 @@
+%include "arm/funop.S" {"instr":"fsitos  s1, s0"}
diff --git a/runtime/interpreter/mterp/arm/op_int_to_long.S b/runtime/interpreter/mterp/arm/op_int_to_long.S
new file mode 100644
index 0000000..b5aed8e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_int_to_long.S
@@ -0,0 +1 @@
+%include "arm/unopWider.S" {"instr":"mov     r1, r0, asr #31"}
diff --git a/runtime/interpreter/mterp/arm/op_int_to_short.S b/runtime/interpreter/mterp/arm/op_int_to_short.S
new file mode 100644
index 0000000..717bd96
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_int_to_short.S
@@ -0,0 +1 @@
+%include "arm/unop.S" {"instr":"sxth    r0, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_invoke_direct.S b/runtime/interpreter/mterp/arm/op_invoke_direct.S
new file mode 100644
index 0000000..1edf221
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_direct.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeDirect" }
diff --git a/runtime/interpreter/mterp/arm/op_invoke_direct_range.S b/runtime/interpreter/mterp/arm/op_invoke_direct_range.S
new file mode 100644
index 0000000..3097b8e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_direct_range.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeDirectRange" }
diff --git a/runtime/interpreter/mterp/arm/op_invoke_interface.S b/runtime/interpreter/mterp/arm/op_invoke_interface.S
new file mode 100644
index 0000000..f6d565b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_interface.S
@@ -0,0 +1,8 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeInterface" }
+    /*
+     * Handle an interface method call.
+     *
+     * for: invoke-interface, invoke-interface/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/arm/op_invoke_interface_range.S b/runtime/interpreter/mterp/arm/op_invoke_interface_range.S
new file mode 100644
index 0000000..c8443b0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_interface_range.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeInterfaceRange" }
diff --git a/runtime/interpreter/mterp/arm/op_invoke_static.S b/runtime/interpreter/mterp/arm/op_invoke_static.S
new file mode 100644
index 0000000..c3cefcf
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_static.S
@@ -0,0 +1,2 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeStatic" }
+
diff --git a/runtime/interpreter/mterp/arm/op_invoke_static_range.S b/runtime/interpreter/mterp/arm/op_invoke_static_range.S
new file mode 100644
index 0000000..dd60d7b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_static_range.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeStaticRange" }
diff --git a/runtime/interpreter/mterp/arm/op_invoke_super.S b/runtime/interpreter/mterp/arm/op_invoke_super.S
new file mode 100644
index 0000000..92ef2a4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_super.S
@@ -0,0 +1,8 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeSuper" }
+    /*
+     * Handle a "super" method call.
+     *
+     * for: invoke-super, invoke-super/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/arm/op_invoke_super_range.S b/runtime/interpreter/mterp/arm/op_invoke_super_range.S
new file mode 100644
index 0000000..9e4fb1c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_super_range.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeSuperRange" }
diff --git a/runtime/interpreter/mterp/arm/op_invoke_virtual.S b/runtime/interpreter/mterp/arm/op_invoke_virtual.S
new file mode 100644
index 0000000..5b893ff
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_virtual.S
@@ -0,0 +1,8 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeVirtual" }
+    /*
+     * Handle a virtual method call.
+     *
+     * for: invoke-virtual, invoke-virtual/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/arm/op_invoke_virtual_quick.S b/runtime/interpreter/mterp/arm/op_invoke_virtual_quick.S
new file mode 100644
index 0000000..020e8b8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_virtual_quick.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeVirtualQuick" }
diff --git a/runtime/interpreter/mterp/arm/op_invoke_virtual_range.S b/runtime/interpreter/mterp/arm/op_invoke_virtual_range.S
new file mode 100644
index 0000000..2b42a78
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_virtual_range.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeVirtualRange" }
diff --git a/runtime/interpreter/mterp/arm/op_invoke_virtual_range_quick.S b/runtime/interpreter/mterp/arm/op_invoke_virtual_range_quick.S
new file mode 100644
index 0000000..42f2ded
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_virtual_range_quick.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeVirtualQuickRange" }
diff --git a/runtime/interpreter/mterp/arm/op_iput.S b/runtime/interpreter/mterp/arm/op_iput.S
new file mode 100644
index 0000000..d224cd8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput.S
@@ -0,0 +1,22 @@
+%default { "is_object":"0", "handler":"artSet32InstanceFromMterp" }
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field@CCCC */
+    .extern $handler
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    GET_VREG r2, r2                     @ r2<- fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       $handler
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iput_boolean.S b/runtime/interpreter/mterp/arm/op_iput_boolean.S
new file mode 100644
index 0000000..c9e8589
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_boolean.S
@@ -0,0 +1 @@
+%include "arm/op_iput.S" { "handler":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_boolean_quick.S b/runtime/interpreter/mterp/arm/op_iput_boolean_quick.S
new file mode 100644
index 0000000..f0a2777
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_boolean_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iput_quick.S" { "store":"strb" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_byte.S b/runtime/interpreter/mterp/arm/op_iput_byte.S
new file mode 100644
index 0000000..c9e8589
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_byte.S
@@ -0,0 +1 @@
+%include "arm/op_iput.S" { "handler":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_byte_quick.S b/runtime/interpreter/mterp/arm/op_iput_byte_quick.S
new file mode 100644
index 0000000..f0a2777
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_byte_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iput_quick.S" { "store":"strb" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_char.S b/runtime/interpreter/mterp/arm/op_iput_char.S
new file mode 100644
index 0000000..5046f6b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_char.S
@@ -0,0 +1 @@
+%include "arm/op_iput.S" { "handler":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_char_quick.S b/runtime/interpreter/mterp/arm/op_iput_char_quick.S
new file mode 100644
index 0000000..5212fc3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_char_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iput_quick.S" { "store":"strh" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_object.S b/runtime/interpreter/mterp/arm/op_iput_object.S
new file mode 100644
index 0000000..d942e84
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_object.S
@@ -0,0 +1,11 @@
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    mov     r3, rSELF
+    bl      MterpIputObject
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iput_object_quick.S b/runtime/interpreter/mterp/arm/op_iput_object_quick.S
new file mode 100644
index 0000000..876b3da
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_object_quick.S
@@ -0,0 +1,10 @@
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    bl      MterpIputObjectQuick
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iput_quick.S b/runtime/interpreter/mterp/arm/op_iput_quick.S
new file mode 100644
index 0000000..98c8150
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_quick.S
@@ -0,0 +1,14 @@
+%default { "store":"str" }
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- fp[B], the object pointer
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    GET_VREG r0, r2                     @ r0<- fp[A]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    $store     r0, [r3, r1]             @ obj.field<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iput_short.S b/runtime/interpreter/mterp/arm/op_iput_short.S
new file mode 100644
index 0000000..5046f6b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_short.S
@@ -0,0 +1 @@
+%include "arm/op_iput.S" { "handler":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_short_quick.S b/runtime/interpreter/mterp/arm/op_iput_short_quick.S
new file mode 100644
index 0000000..5212fc3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_short_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iput_quick.S" { "store":"strh" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_wide.S b/runtime/interpreter/mterp/arm/op_iput_wide.S
new file mode 100644
index 0000000..8bbd63e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_wide.S
@@ -0,0 +1,16 @@
+    /* iput-wide vA, vB, field@CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    add      r2, rFP, r2, lsl #2        @ r2<- &fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       artSet64InstanceFromMterp
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iput_wide_quick.S b/runtime/interpreter/mterp/arm/op_iput_wide_quick.S
new file mode 100644
index 0000000..a2fc9e1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_wide_quick.S
@@ -0,0 +1,13 @@
+    /* iput-wide-quick vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r3, 1                         @ r3<- field byte offset
+    GET_VREG r2, r2                     @ r2<- fp[B], the object pointer
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    cmp     r2, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    add     r0, rFP, r0, lsl #2         @ r0<- &fp[A]
+    ldmia   r0, {r0-r1}                 @ r0/r1<- fp[A]/fp[A+1]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    strd    r0, [r2, r3]                @ obj.field<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_long_to_double.S b/runtime/interpreter/mterp/arm/op_long_to_double.S
new file mode 100644
index 0000000..1d48a2a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_long_to_double.S
@@ -0,0 +1,27 @@
+%default {}
+    /*
+     * Specialised 64-bit floating point operation.
+     *
+     * Note: The result will be returned in d2.
+     *
+     * For: long-to-double
+     */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    vldr    d0, [r3]                    @ d0<- vAA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    vcvt.f64.s32    d1, s1              @ d1<- (double)(vAAh)
+    vcvt.f64.u32    d2, s0              @ d2<- (double)(vAAl)
+    vldr            d3, constval$opcode
+    vmla.f64        d2, d1, d3          @ d2<- vAAh*2^32 + vAAl
+
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    vstr.64 d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+    /* literal pool helper */
+constval${opcode}:
+    .8byte          0x41f0000000000000
diff --git a/runtime/interpreter/mterp/arm/op_long_to_float.S b/runtime/interpreter/mterp/arm/op_long_to_float.S
new file mode 100644
index 0000000..efa5a66
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_long_to_float.S
@@ -0,0 +1 @@
+%include "arm/unopNarrower.S" {"instr":"bl      __aeabi_l2f"}
diff --git a/runtime/interpreter/mterp/arm/op_long_to_int.S b/runtime/interpreter/mterp/arm/op_long_to_int.S
new file mode 100644
index 0000000..3e91f23
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_long_to_int.S
@@ -0,0 +1,2 @@
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+%include "arm/op_move.S"
diff --git a/runtime/interpreter/mterp/arm/op_monitor_enter.S b/runtime/interpreter/mterp/arm/op_monitor_enter.S
new file mode 100644
index 0000000..3c34f75
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_monitor_enter.S
@@ -0,0 +1,14 @@
+    /*
+     * Synchronize on an object.
+     */
+    /* monitor-enter vAA */
+    EXPORT_PC
+    mov      r2, rINST, lsr #8           @ r2<- AA
+    GET_VREG r0, r2                      @ r0<- vAA (object)
+    mov      r1, rSELF                   @ r1<- self
+    bl       artLockObjectFromCode
+    cmp      r0, #0
+    bne      MterpException
+    FETCH_ADVANCE_INST 1
+    GET_INST_OPCODE ip                   @ extract opcode from rINST
+    GOTO_OPCODE ip                       @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_monitor_exit.S b/runtime/interpreter/mterp/arm/op_monitor_exit.S
new file mode 100644
index 0000000..fc7cef5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_monitor_exit.S
@@ -0,0 +1,18 @@
+    /*
+     * Unlock an object.
+     *
+     * Exceptions that occur when unlocking a monitor need to appear as
+     * if they happened at the following instruction.  See the Dalvik
+     * instruction spec.
+     */
+    /* monitor-exit vAA */
+    EXPORT_PC
+    mov      r2, rINST, lsr #8          @ r2<- AA
+    GET_VREG r0, r2                     @ r0<- vAA (object)
+    mov      r1, rSELF                  @ r0<- self
+    bl       artUnlockObjectFromCode    @ r0<- success for unlock(self, obj)
+    cmp     r0, #0                      @ failed?
+    bne     MterpException
+    FETCH_ADVANCE_INST 1                @ before throw: advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move.S b/runtime/interpreter/mterp/arm/op_move.S
new file mode 100644
index 0000000..dfecc24
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B from 15:12
+    ubfx    r0, rINST, #8, #4           @ r0<- A from 11:8
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[B]
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    .if $is_object
+    SET_VREG_OBJECT r2, r0              @ fp[A]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[A]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ execute next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_16.S b/runtime/interpreter/mterp/arm/op_move_16.S
new file mode 100644
index 0000000..78138a2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_16.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    FETCH r1, 2                         @ r1<- BBBB
+    FETCH r0, 1                         @ r0<- AAAA
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if $is_object
+    SET_VREG_OBJECT r2, r0              @ fp[AAAA]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[AAAA]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_exception.S b/runtime/interpreter/mterp/arm/op_move_exception.S
new file mode 100644
index 0000000..0242e26
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_exception.S
@@ -0,0 +1,9 @@
+    /* move-exception vAA */
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    ldr     r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov     r1, #0                      @ r1<- 0
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    SET_VREG_OBJECT r3, r2              @ fp[AA]<- exception obj
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    str     r1, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ clear exception
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_from16.S b/runtime/interpreter/mterp/arm/op_move_from16.S
new file mode 100644
index 0000000..3e79417
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_from16.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    FETCH r1, 1                         @ r1<- BBBB
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if $is_object
+    SET_VREG_OBJECT r2, r0              @ fp[AA]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[AA]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_object.S b/runtime/interpreter/mterp/arm/op_move_object.S
new file mode 100644
index 0000000..16de57b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_object.S
@@ -0,0 +1 @@
+%include "arm/op_move.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_move_object_16.S b/runtime/interpreter/mterp/arm/op_move_object_16.S
new file mode 100644
index 0000000..2534300
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_object_16.S
@@ -0,0 +1 @@
+%include "arm/op_move_16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_move_object_from16.S b/runtime/interpreter/mterp/arm/op_move_object_from16.S
new file mode 100644
index 0000000..9e0cf02
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_object_from16.S
@@ -0,0 +1 @@
+%include "arm/op_move_from16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_move_result.S b/runtime/interpreter/mterp/arm/op_move_result.S
new file mode 100644
index 0000000..f2586a0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_result.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    ldr     r0, [rFP, #OFF_FP_RESULT_REGISTER]  @ get pointer to result JType.
+    ldr     r0, [r0]                    @ r0 <- result.i.
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if $is_object
+    SET_VREG_OBJECT r0, r2, r1          @ fp[AA]<- r0
+    .else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_result_object.S b/runtime/interpreter/mterp/arm/op_move_result_object.S
new file mode 100644
index 0000000..643296a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_result_object.S
@@ -0,0 +1 @@
+%include "arm/op_move_result.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_move_result_wide.S b/runtime/interpreter/mterp/arm/op_move_result_wide.S
new file mode 100644
index 0000000..c64103c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_result_wide.S
@@ -0,0 +1,9 @@
+    /* move-result-wide vAA */
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    ldr     r3, [rFP, #OFF_FP_RESULT_REGISTER]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[AA]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- retval.j
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    stmia   r2, {r0-r1}                 @ fp[AA]<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_wide.S b/runtime/interpreter/mterp/arm/op_move_wide.S
new file mode 100644
index 0000000..1345b95
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_wide.S
@@ -0,0 +1,11 @@
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[A]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[B]
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r2, {r0-r1}                 @ fp[A]<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_wide_16.S b/runtime/interpreter/mterp/arm/op_move_wide_16.S
new file mode 100644
index 0000000..133a4c3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_wide_16.S
@@ -0,0 +1,11 @@
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH r3, 2                         @ r3<- BBBB
+    FETCH r2, 1                         @ r2<- AAAA
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BBBB]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[AAAA]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    stmia   r2, {r0-r1}                 @ fp[AAAA]<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_wide_from16.S b/runtime/interpreter/mterp/arm/op_move_wide_from16.S
new file mode 100644
index 0000000..f2ae785
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_wide_from16.S
@@ -0,0 +1,11 @@
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH r3, 1                         @ r3<- BBBB
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BBBB]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[AA]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r2, {r0-r1}                 @ fp[AA]<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_mul_double.S b/runtime/interpreter/mterp/arm/op_mul_double.S
new file mode 100644
index 0000000..530e85a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_double.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide.S" {"instr":"fmuld   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_double_2addr.S b/runtime/interpreter/mterp/arm/op_mul_double_2addr.S
new file mode 100644
index 0000000..da1abc6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_double_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide2addr.S" {"instr":"fmuld   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_float.S b/runtime/interpreter/mterp/arm/op_mul_float.S
new file mode 100644
index 0000000..6a72e6f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_float.S
@@ -0,0 +1 @@
+%include "arm/fbinop.S" {"instr":"fmuls   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_float_2addr.S b/runtime/interpreter/mterp/arm/op_mul_float_2addr.S
new file mode 100644
index 0000000..edb5101
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_float_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinop2addr.S" {"instr":"fmuls   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_int.S b/runtime/interpreter/mterp/arm/op_mul_int.S
new file mode 100644
index 0000000..d6151d4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_int.S
@@ -0,0 +1,2 @@
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+%include "arm/binop.S" {"instr":"mul     r0, r1, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_int_2addr.S b/runtime/interpreter/mterp/arm/op_mul_int_2addr.S
new file mode 100644
index 0000000..66a797d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_int_2addr.S
@@ -0,0 +1,2 @@
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+%include "arm/binop2addr.S" {"instr":"mul     r0, r1, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_int_lit16.S b/runtime/interpreter/mterp/arm/op_mul_int_lit16.S
new file mode 100644
index 0000000..4e40c43
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_int_lit16.S
@@ -0,0 +1,2 @@
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+%include "arm/binopLit16.S" {"instr":"mul     r0, r1, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_int_lit8.S b/runtime/interpreter/mterp/arm/op_mul_int_lit8.S
new file mode 100644
index 0000000..dbafae9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_int_lit8.S
@@ -0,0 +1,2 @@
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+%include "arm/binopLit8.S" {"instr":"mul     r0, r1, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_long.S b/runtime/interpreter/mterp/arm/op_mul_long.S
new file mode 100644
index 0000000..9e83778
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_long.S
@@ -0,0 +1,36 @@
+    /*
+     * Signed 64-bit integer multiply.
+     *
+     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
+     *        WX
+     *      x YZ
+     *  --------
+     *     ZW ZX
+     *  YW YX
+     *
+     * The low word of the result holds ZX, the high word holds
+     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
+     * it doesn't fit in the low 64 bits.
+     *
+     * Unlike most ARM math operations, multiply instructions have
+     * restrictions on using the same register more than once (Rd and Rm
+     * cannot be the same).
+     */
+    /* mul-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    mul     ip, r2, r1                  @  ip<- ZxW
+    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
+    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    add     r0, rFP, r0, lsl #2         @ r0<- &fp[AA]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_mul_long_2addr.S b/runtime/interpreter/mterp/arm/op_mul_long_2addr.S
new file mode 100644
index 0000000..789dbd3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_long_2addr.S
@@ -0,0 +1,24 @@
+    /*
+     * Signed 64-bit integer multiply, "/2addr" version.
+     *
+     * See op_mul_long for an explanation.
+     *
+     * We get a little tight on registers, so to avoid looking up &fp[A]
+     * again we stuff it into rINST.
+     */
+    /* mul-long/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     rINST, rFP, r9, lsl #2      @ rINST<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   rINST, {r0-r1}              @ r0/r1<- vAA/vAA+1
+    mul     ip, r2, r1                  @  ip<- ZxW
+    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
+    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    mov     r0, rINST                   @ r0<- &fp[A] (free up rINST)
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_neg_double.S b/runtime/interpreter/mterp/arm/op_neg_double.S
new file mode 100644
index 0000000..33e609c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_neg_double.S
@@ -0,0 +1 @@
+%include "arm/unopWide.S" {"instr":"add     r1, r1, #0x80000000"}
diff --git a/runtime/interpreter/mterp/arm/op_neg_float.S b/runtime/interpreter/mterp/arm/op_neg_float.S
new file mode 100644
index 0000000..993583f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_neg_float.S
@@ -0,0 +1 @@
+%include "arm/unop.S" {"instr":"add     r0, r0, #0x80000000"}
diff --git a/runtime/interpreter/mterp/arm/op_neg_int.S b/runtime/interpreter/mterp/arm/op_neg_int.S
new file mode 100644
index 0000000..ec0b253
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_neg_int.S
@@ -0,0 +1 @@
+%include "arm/unop.S" {"instr":"rsb     r0, r0, #0"}
diff --git a/runtime/interpreter/mterp/arm/op_neg_long.S b/runtime/interpreter/mterp/arm/op_neg_long.S
new file mode 100644
index 0000000..dab2eb4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_neg_long.S
@@ -0,0 +1 @@
+%include "arm/unopWide.S" {"preinstr":"rsbs    r0, r0, #0", "instr":"rsc     r1, r1, #0"}
diff --git a/runtime/interpreter/mterp/arm/op_new_array.S b/runtime/interpreter/mterp/arm/op_new_array.S
new file mode 100644
index 0000000..8bb792c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_new_array.S
@@ -0,0 +1,19 @@
+    /*
+     * Allocate an array of objects, specified with the array class
+     * and a count.
+     *
+     * The verifier guarantees that this is an array class, so we don't
+     * check for it here.
+     */
+    /* new-array vA, vB, class@CCCC */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    mov     r3, rSELF
+    bl      MterpNewArray
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_new_instance.S b/runtime/interpreter/mterp/arm/op_new_instance.S
new file mode 100644
index 0000000..95d4be8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_new_instance.S
@@ -0,0 +1,14 @@
+    /*
+     * Create a new instance of a class.
+     */
+    /* new-instance vAA, class@BBBB */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rSELF
+    mov     r2, rINST
+    bl      MterpNewInstance           @ (shadow_frame, self, inst_data)
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 2               @ advance rPC, load rINST
+    GET_INST_OPCODE ip                 @ extract opcode from rINST
+    GOTO_OPCODE ip                     @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_nop.S b/runtime/interpreter/mterp/arm/op_nop.S
new file mode 100644
index 0000000..af0f88f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_nop.S
@@ -0,0 +1,3 @@
+    FETCH_ADVANCE_INST 1                @ advance to next instr, load rINST
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    GOTO_OPCODE ip                      @ execute it
diff --git a/runtime/interpreter/mterp/arm/op_not_int.S b/runtime/interpreter/mterp/arm/op_not_int.S
new file mode 100644
index 0000000..816485a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_not_int.S
@@ -0,0 +1 @@
+%include "arm/unop.S" {"instr":"mvn     r0, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_not_long.S b/runtime/interpreter/mterp/arm/op_not_long.S
new file mode 100644
index 0000000..49a5905
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_not_long.S
@@ -0,0 +1 @@
+%include "arm/unopWide.S" {"preinstr":"mvn     r0, r0", "instr":"mvn     r1, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_or_int.S b/runtime/interpreter/mterp/arm/op_or_int.S
new file mode 100644
index 0000000..b046e8d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_or_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"instr":"orr     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_or_int_2addr.S b/runtime/interpreter/mterp/arm/op_or_int_2addr.S
new file mode 100644
index 0000000..493c59f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_or_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"instr":"orr     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_or_int_lit16.S b/runtime/interpreter/mterp/arm/op_or_int_lit16.S
new file mode 100644
index 0000000..0a01db8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_or_int_lit16.S
@@ -0,0 +1 @@
+%include "arm/binopLit16.S" {"instr":"orr     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_or_int_lit8.S b/runtime/interpreter/mterp/arm/op_or_int_lit8.S
new file mode 100644
index 0000000..2d85038
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_or_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"instr":"orr     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_or_long.S b/runtime/interpreter/mterp/arm/op_or_long.S
new file mode 100644
index 0000000..048c45c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_or_long.S
@@ -0,0 +1 @@
+%include "arm/binopWide.S" {"preinstr":"orr     r0, r0, r2", "instr":"orr     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_or_long_2addr.S b/runtime/interpreter/mterp/arm/op_or_long_2addr.S
new file mode 100644
index 0000000..9395346
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_or_long_2addr.S
@@ -0,0 +1 @@
+%include "arm/binopWide2addr.S" {"preinstr":"orr     r0, r0, r2", "instr":"orr     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_packed_switch.S b/runtime/interpreter/mterp/arm/op_packed_switch.S
new file mode 100644
index 0000000..1e3370e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_packed_switch.S
@@ -0,0 +1,39 @@
+%default { "func":"MterpDoPackedSwitch" }
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBB */
+#if MTERP_SUSPEND
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_VREG r1, r3                     @ r1<- vAA
+    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
+    bl      $func                       @ r0<- code-unit branch offset
+    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_VREG r1, r3                     @ r1<- vAA
+    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
+    bl      $func                       @ r0<- code-unit branch offset
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm/op_rem_double.S b/runtime/interpreter/mterp/arm/op_rem_double.S
new file mode 100644
index 0000000..b539221
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_double.S
@@ -0,0 +1,2 @@
+/* EABI doesn't define a double remainder function, but libm does */
+%include "arm/binopWide.S" {"instr":"bl      fmod"}
diff --git a/runtime/interpreter/mterp/arm/op_rem_double_2addr.S b/runtime/interpreter/mterp/arm/op_rem_double_2addr.S
new file mode 100644
index 0000000..372ef1d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_double_2addr.S
@@ -0,0 +1,2 @@
+/* EABI doesn't define a double remainder function, but libm does */
+%include "arm/binopWide2addr.S" {"instr":"bl      fmod"}
diff --git a/runtime/interpreter/mterp/arm/op_rem_float.S b/runtime/interpreter/mterp/arm/op_rem_float.S
new file mode 100644
index 0000000..7bd10de
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_float.S
@@ -0,0 +1,2 @@
+/* EABI doesn't define a float remainder function, but libm does */
+%include "arm/binop.S" {"instr":"bl      fmodf"}
diff --git a/runtime/interpreter/mterp/arm/op_rem_float_2addr.S b/runtime/interpreter/mterp/arm/op_rem_float_2addr.S
new file mode 100644
index 0000000..93c5fae
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_float_2addr.S
@@ -0,0 +1,2 @@
+/* EABI doesn't define a float remainder function, but libm does */
+%include "arm/binop2addr.S" {"instr":"bl      fmodf"}
diff --git a/runtime/interpreter/mterp/arm/op_rem_int.S b/runtime/interpreter/mterp/arm/op_rem_int.S
new file mode 100644
index 0000000..ff62573
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_int.S
@@ -0,0 +1,33 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int
+     *
+     */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls  r1, r1, r2, r0                 @ r1<- op, r0-r2 changed
+#else
+    bl   __aeabi_idivmod                @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
diff --git a/runtime/interpreter/mterp/arm/op_rem_int_2addr.S b/runtime/interpreter/mterp/arm/op_rem_int_2addr.S
new file mode 100644
index 0000000..ba5751a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_int_2addr.S
@@ -0,0 +1,32 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int/2addr
+     *
+     */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls     r1, r1, r2, r0              @ r1<- op
+#else
+    bl      __aeabi_idivmod             @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
diff --git a/runtime/interpreter/mterp/arm/op_rem_int_lit16.S b/runtime/interpreter/mterp/arm/op_rem_int_lit16.S
new file mode 100644
index 0000000..4edb187
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_int_lit16.S
@@ -0,0 +1,31 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int/lit16
+     *
+     */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls     r1, r1, r2, r0              @ r1<- op
+#else
+    bl     __aeabi_idivmod              @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm/op_rem_int_lit8.S b/runtime/interpreter/mterp/arm/op_rem_int_lit8.S
new file mode 100644
index 0000000..3888361
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_int_lit8.S
@@ -0,0 +1,32 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int/lit8
+     *
+     */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls     r1, r1, r2, r0              @ r1<- op
+#else
+    bl       __aeabi_idivmod            @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
diff --git a/runtime/interpreter/mterp/arm/op_rem_long.S b/runtime/interpreter/mterp/arm/op_rem_long.S
new file mode 100644
index 0000000..b2b1c24
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_long.S
@@ -0,0 +1,2 @@
+/* ldivmod returns quotient in r0/r1 and remainder in r2/r3 */
+%include "arm/binopWide.S" {"instr":"bl      __aeabi_ldivmod", "result0":"r2", "result1":"r3", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_rem_long_2addr.S b/runtime/interpreter/mterp/arm/op_rem_long_2addr.S
new file mode 100644
index 0000000..f87d493
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_long_2addr.S
@@ -0,0 +1,2 @@
+/* ldivmod returns quotient in r0/r1 and remainder in r2/r3 */
+%include "arm/binopWide2addr.S" {"instr":"bl      __aeabi_ldivmod", "result0":"r2", "result1":"r3", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_return.S b/runtime/interpreter/mterp/arm/op_return.S
new file mode 100644
index 0000000..a4ffd04
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_return.S
@@ -0,0 +1,12 @@
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    GET_VREG r0, r2                     @ r0<- vAA
+    mov     r1, #0
+    b       MterpReturn
diff --git a/runtime/interpreter/mterp/arm/op_return_object.S b/runtime/interpreter/mterp/arm/op_return_object.S
new file mode 100644
index 0000000..c490730
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_return_object.S
@@ -0,0 +1 @@
+%include "arm/op_return.S"
diff --git a/runtime/interpreter/mterp/arm/op_return_void.S b/runtime/interpreter/mterp/arm/op_return_void.S
new file mode 100644
index 0000000..f6dfd99
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_return_void.S
@@ -0,0 +1,5 @@
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    mov    r0, #0
+    mov    r1, #0
+    b      MterpReturn
diff --git a/runtime/interpreter/mterp/arm/op_return_void_no_barrier.S b/runtime/interpreter/mterp/arm/op_return_void_no_barrier.S
new file mode 100644
index 0000000..7322940
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_return_void_no_barrier.S
@@ -0,0 +1,3 @@
+    mov    r0, #0
+    mov    r1, #0
+    b      MterpReturn
diff --git a/runtime/interpreter/mterp/arm/op_return_wide.S b/runtime/interpreter/mterp/arm/op_return_wide.S
new file mode 100644
index 0000000..2881c87
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_return_wide.S
@@ -0,0 +1,10 @@
+    /*
+     * Return a 64-bit value.
+     */
+    /* return-wide vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[AA]
+    ldmia   r2, {r0-r1}                 @ r0/r1 <- vAA/vAA+1
+    b       MterpReturn
diff --git a/runtime/interpreter/mterp/arm/op_rsub_int.S b/runtime/interpreter/mterp/arm/op_rsub_int.S
new file mode 100644
index 0000000..1508dd4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rsub_int.S
@@ -0,0 +1,2 @@
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+%include "arm/binopLit16.S" {"instr":"rsb     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_rsub_int_lit8.S b/runtime/interpreter/mterp/arm/op_rsub_int_lit8.S
new file mode 100644
index 0000000..2ee11e1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rsub_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"instr":"rsb     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_sget.S b/runtime/interpreter/mterp/arm/op_sget.S
new file mode 100644
index 0000000..2b81f50
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sget.S
@@ -0,0 +1,27 @@
+%default { "is_object":"0", "helper":"artGet32StaticFromCode" }
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field@BBBB */
+
+    .extern $helper
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    $helper
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r2, rINST, lsr #8             @ r2<- AA
+    PREFETCH_INST 2
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+.if $is_object
+    SET_VREG_OBJECT r0, r2              @ fp[AA]<- r0
+.else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip
diff --git a/runtime/interpreter/mterp/arm/op_sget_boolean.S b/runtime/interpreter/mterp/arm/op_sget_boolean.S
new file mode 100644
index 0000000..ebfb44c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sget_boolean.S
@@ -0,0 +1 @@
+%include "arm/op_sget.S" {"helper":"artGetBooleanStaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_byte.S b/runtime/interpreter/mterp/arm/op_sget_byte.S
new file mode 100644
index 0000000..d76862e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sget_byte.S
@@ -0,0 +1 @@
+%include "arm/op_sget.S" {"helper":"artGetByteStaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_char.S b/runtime/interpreter/mterp/arm/op_sget_char.S
new file mode 100644
index 0000000..b7fcfc2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sget_char.S
@@ -0,0 +1 @@
+%include "arm/op_sget.S" {"helper":"artGetCharStaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_object.S b/runtime/interpreter/mterp/arm/op_sget_object.S
new file mode 100644
index 0000000..8e7d075
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sget_object.S
@@ -0,0 +1 @@
+%include "arm/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_short.S b/runtime/interpreter/mterp/arm/op_sget_short.S
new file mode 100644
index 0000000..3e80f0d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sget_short.S
@@ -0,0 +1 @@
+%include "arm/op_sget.S" {"helper":"artGetShortStaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_wide.S b/runtime/interpreter/mterp/arm/op_sget_wide.S
new file mode 100644
index 0000000..97db05f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sget_wide.S
@@ -0,0 +1,21 @@
+    /*
+     * SGET_WIDE handler wrapper.
+     *
+     */
+    /* sget-wide vAA, field@BBBB */
+
+    .extern artGet64StaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGet64StaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r9, rINST, lsr #8             @ r9<- AA
+    add   r9, rFP, r9, lsl #2           @ r9<- &fp[AA]
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_shl_int.S b/runtime/interpreter/mterp/arm/op_shl_int.S
new file mode 100644
index 0000000..7e4c768
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shl_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asl r1"}
diff --git a/runtime/interpreter/mterp/arm/op_shl_int_2addr.S b/runtime/interpreter/mterp/arm/op_shl_int_2addr.S
new file mode 100644
index 0000000..4286577
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shl_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asl r1"}
diff --git a/runtime/interpreter/mterp/arm/op_shl_int_lit8.S b/runtime/interpreter/mterp/arm/op_shl_int_lit8.S
new file mode 100644
index 0000000..6a48bfc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shl_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asl r1"}
diff --git a/runtime/interpreter/mterp/arm/op_shl_long.S b/runtime/interpreter/mterp/arm/op_shl_long.S
new file mode 100644
index 0000000..dc8a679
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shl_long.S
@@ -0,0 +1,27 @@
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* shl-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r3, r0, #255                @ r3<- BB
+    mov     r0, r0, lsr #8              @ r0<- CC
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    GET_VREG r2, r0                     @ r2<- vCC
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+
+    mov     r1, r1, asl r2              @  r1<- r1 << r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_shl_long_2addr.S b/runtime/interpreter/mterp/arm/op_shl_long_2addr.S
new file mode 100644
index 0000000..fd7668d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shl_long_2addr.S
@@ -0,0 +1,22 @@
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* shl-long/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r2, r3                     @ r2<- vB
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+
+    mov     r1, r1, asl r2              @  r1<- r1 << r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_shr_int.S b/runtime/interpreter/mterp/arm/op_shr_int.S
new file mode 100644
index 0000000..6317605
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shr_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_shr_int_2addr.S b/runtime/interpreter/mterp/arm/op_shr_int_2addr.S
new file mode 100644
index 0000000..cc8632f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shr_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_shr_int_lit8.S b/runtime/interpreter/mterp/arm/op_shr_int_lit8.S
new file mode 100644
index 0000000..60fe5fc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shr_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_shr_long.S b/runtime/interpreter/mterp/arm/op_shr_long.S
new file mode 100644
index 0000000..c0edf90
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shr_long.S
@@ -0,0 +1,27 @@
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* shr-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r3, r0, #255                @ r3<- BB
+    mov     r0, r0, lsr #8              @ r0<- CC
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    GET_VREG r2, r0                     @ r2<- vCC
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_shr_long_2addr.S b/runtime/interpreter/mterp/arm/op_shr_long_2addr.S
new file mode 100644
index 0000000..ffeaf9c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shr_long_2addr.S
@@ -0,0 +1,22 @@
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* shr-long/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r2, r3                     @ r2<- vB
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_sparse_switch.S b/runtime/interpreter/mterp/arm/op_sparse_switch.S
new file mode 100644
index 0000000..9f7a42b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sparse_switch.S
@@ -0,0 +1 @@
+%include "arm/op_packed_switch.S" { "func":"MterpDoSparseSwitch" }
diff --git a/runtime/interpreter/mterp/arm/op_sput.S b/runtime/interpreter/mterp/arm/op_sput.S
new file mode 100644
index 0000000..7e0c1a6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sput.S
@@ -0,0 +1,20 @@
+%default { "helper":"artSet32StaticFromCode"}
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    GET_VREG r1, r3                     @ r1<= fp[AA]
+    ldr     r2, [rFP, #OFF_FP_METHOD]
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      $helper
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_sput_boolean.S b/runtime/interpreter/mterp/arm/op_sput_boolean.S
new file mode 100644
index 0000000..e3bbf2b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sput_boolean.S
@@ -0,0 +1 @@
+%include "arm/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sput_byte.S b/runtime/interpreter/mterp/arm/op_sput_byte.S
new file mode 100644
index 0000000..e3bbf2b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sput_byte.S
@@ -0,0 +1 @@
+%include "arm/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sput_char.S b/runtime/interpreter/mterp/arm/op_sput_char.S
new file mode 100644
index 0000000..d8d65cb
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sput_char.S
@@ -0,0 +1 @@
+%include "arm/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sput_object.S b/runtime/interpreter/mterp/arm/op_sput_object.S
new file mode 100644
index 0000000..6d3a9a7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sput_object.S
@@ -0,0 +1,11 @@
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    mov     r3, rSELF
+    bl      MterpSputObject
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_sput_short.S b/runtime/interpreter/mterp/arm/op_sput_short.S
new file mode 100644
index 0000000..d8d65cb
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sput_short.S
@@ -0,0 +1 @@
+%include "arm/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sput_wide.S b/runtime/interpreter/mterp/arm/op_sput_wide.S
new file mode 100644
index 0000000..adbcffa
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sput_wide.S
@@ -0,0 +1,19 @@
+    /*
+     * SPUT_WIDE handler wrapper.
+     *
+     */
+    /* sput-wide vAA, field@BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    ldr     r1, [rFP, #OFF_FP_METHOD]
+    mov     r2, rINST, lsr #8           @ r3<- AA
+    add     r2, rFP, r2, lsl #2
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      artSet64IndirectStaticFromMterp
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_sub_double.S b/runtime/interpreter/mterp/arm/op_sub_double.S
new file mode 100644
index 0000000..69bcc67
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_double.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide.S" {"instr":"fsubd   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_sub_double_2addr.S b/runtime/interpreter/mterp/arm/op_sub_double_2addr.S
new file mode 100644
index 0000000..2ea59fe
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_double_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide2addr.S" {"instr":"fsubd   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_sub_float.S b/runtime/interpreter/mterp/arm/op_sub_float.S
new file mode 100644
index 0000000..3f17a0d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_float.S
@@ -0,0 +1 @@
+%include "arm/fbinop.S" {"instr":"fsubs   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_sub_float_2addr.S b/runtime/interpreter/mterp/arm/op_sub_float_2addr.S
new file mode 100644
index 0000000..2f4aac4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_float_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinop2addr.S" {"instr":"fsubs   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_sub_int.S b/runtime/interpreter/mterp/arm/op_sub_int.S
new file mode 100644
index 0000000..efb9e10
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"instr":"sub     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_sub_int_2addr.S b/runtime/interpreter/mterp/arm/op_sub_int_2addr.S
new file mode 100644
index 0000000..4d3036b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"instr":"sub     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_sub_long.S b/runtime/interpreter/mterp/arm/op_sub_long.S
new file mode 100644
index 0000000..6f1eb6e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_long.S
@@ -0,0 +1 @@
+%include "arm/binopWide.S" {"preinstr":"subs    r0, r0, r2", "instr":"sbc     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_sub_long_2addr.S b/runtime/interpreter/mterp/arm/op_sub_long_2addr.S
new file mode 100644
index 0000000..8e9da05
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_long_2addr.S
@@ -0,0 +1 @@
+%include "arm/binopWide2addr.S" {"preinstr":"subs    r0, r0, r2", "instr":"sbc     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_throw.S b/runtime/interpreter/mterp/arm/op_throw.S
new file mode 100644
index 0000000..be49ada
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_throw.S
@@ -0,0 +1,11 @@
+    /*
+     * Throw an exception object in the current thread.
+     */
+    /* throw vAA */
+    EXPORT_PC
+    mov      r2, rINST, lsr #8           @ r2<- AA
+    GET_VREG r1, r2                      @ r1<- vAA (exception object)
+    cmp      r1, #0                      @ null object?
+    beq      common_errNullObject        @ yes, throw an NPE instead
+    str      r1, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ thread->exception<- obj
+    b        MterpException
diff --git a/runtime/interpreter/mterp/arm/op_unused_3e.S b/runtime/interpreter/mterp/arm/op_unused_3e.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_3e.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_3f.S b/runtime/interpreter/mterp/arm/op_unused_3f.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_3f.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_40.S b/runtime/interpreter/mterp/arm/op_unused_40.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_40.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_41.S b/runtime/interpreter/mterp/arm/op_unused_41.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_41.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_42.S b/runtime/interpreter/mterp/arm/op_unused_42.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_42.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_43.S b/runtime/interpreter/mterp/arm/op_unused_43.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_43.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_73.S b/runtime/interpreter/mterp/arm/op_unused_73.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_73.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_79.S b/runtime/interpreter/mterp/arm/op_unused_79.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_79.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_7a.S b/runtime/interpreter/mterp/arm/op_unused_7a.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_7a.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_f3.S b/runtime/interpreter/mterp/arm/op_unused_f3.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_f3.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_f4.S b/runtime/interpreter/mterp/arm/op_unused_f4.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_f4.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_f5.S b/runtime/interpreter/mterp/arm/op_unused_f5.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_f5.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_f6.S b/runtime/interpreter/mterp/arm/op_unused_f6.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_f6.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_f7.S b/runtime/interpreter/mterp/arm/op_unused_f7.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_f7.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_f8.S b/runtime/interpreter/mterp/arm/op_unused_f8.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_f8.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_f9.S b/runtime/interpreter/mterp/arm/op_unused_f9.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_f9.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_fa.S b/runtime/interpreter/mterp/arm/op_unused_fa.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_fa.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_fb.S b/runtime/interpreter/mterp/arm/op_unused_fb.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_fb.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_fc.S b/runtime/interpreter/mterp/arm/op_unused_fc.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_fc.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_fd.S b/runtime/interpreter/mterp/arm/op_unused_fd.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_fd.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_fe.S b/runtime/interpreter/mterp/arm/op_unused_fe.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_fe.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_ff.S b/runtime/interpreter/mterp/arm/op_unused_ff.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_ff.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_ushr_int.S b/runtime/interpreter/mterp/arm/op_ushr_int.S
new file mode 100644
index 0000000..a74361b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_ushr_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, lsr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_ushr_int_2addr.S b/runtime/interpreter/mterp/arm/op_ushr_int_2addr.S
new file mode 100644
index 0000000..f2d1d13
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_ushr_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, lsr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_ushr_int_lit8.S b/runtime/interpreter/mterp/arm/op_ushr_int_lit8.S
new file mode 100644
index 0000000..40a4435
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_ushr_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, lsr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_ushr_long.S b/runtime/interpreter/mterp/arm/op_ushr_long.S
new file mode 100644
index 0000000..f64c861
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_ushr_long.S
@@ -0,0 +1,27 @@
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* ushr-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r3, r0, #255                @ r3<- BB
+    mov     r0, r0, lsr #8              @ r0<- CC
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    GET_VREG r2, r0                     @ r2<- vCC
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S b/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S
new file mode 100644
index 0000000..dbab08d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S
@@ -0,0 +1,22 @@
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* ushr-long/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r2, r3                     @ r2<- vB
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_xor_int.S b/runtime/interpreter/mterp/arm/op_xor_int.S
new file mode 100644
index 0000000..fd7a4b7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_xor_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"instr":"eor     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_xor_int_2addr.S b/runtime/interpreter/mterp/arm/op_xor_int_2addr.S
new file mode 100644
index 0000000..196a665
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_xor_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"instr":"eor     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_xor_int_lit16.S b/runtime/interpreter/mterp/arm/op_xor_int_lit16.S
new file mode 100644
index 0000000..39f2a47
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_xor_int_lit16.S
@@ -0,0 +1 @@
+%include "arm/binopLit16.S" {"instr":"eor     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_xor_int_lit8.S b/runtime/interpreter/mterp/arm/op_xor_int_lit8.S
new file mode 100644
index 0000000..46bb712
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_xor_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"instr":"eor     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_xor_long.S b/runtime/interpreter/mterp/arm/op_xor_long.S
new file mode 100644
index 0000000..4f830d0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_xor_long.S
@@ -0,0 +1 @@
+%include "arm/binopWide.S" {"preinstr":"eor     r0, r0, r2", "instr":"eor     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_xor_long_2addr.S b/runtime/interpreter/mterp/arm/op_xor_long_2addr.S
new file mode 100644
index 0000000..5b5ed88
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_xor_long_2addr.S
@@ -0,0 +1 @@
+%include "arm/binopWide2addr.S" {"preinstr":"eor     r0, r0, r2", "instr":"eor     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/unop.S b/runtime/interpreter/mterp/arm/unop.S
new file mode 100644
index 0000000..56518b5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/unop.S
@@ -0,0 +1,20 @@
+%default {"preinstr":""}
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+    $preinstr                           @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    $instr                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 8-9 instructions */
diff --git a/runtime/interpreter/mterp/arm/unopNarrower.S b/runtime/interpreter/mterp/arm/unopNarrower.S
new file mode 100644
index 0000000..a5fc027
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/unopNarrower.S
@@ -0,0 +1,23 @@
+%default {"preinstr":""}
+    /*
+     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op r0/r1", where
+     * "result" is a 32-bit quantity in r0.
+     *
+     * For: long-to-float, double-to-int, double-to-float
+     *
+     * (This would work for long-to-int, but that instruction is actually
+     * an exact match for op_move.)
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vB/vB+1
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    $preinstr                           @ optional op; may set condition codes
+    $instr                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 9-10 instructions */
diff --git a/runtime/interpreter/mterp/arm/unopWide.S b/runtime/interpreter/mterp/arm/unopWide.S
new file mode 100644
index 0000000..7b8739c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/unopWide.S
@@ -0,0 +1,21 @@
+%default {"preinstr":""}
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0/r1".
+     * This could be an ARM instruction or a function call.
+     *
+     * For: neg-long, not-long, neg-double, long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    $preinstr                           @ optional op; may set condition codes
+    $instr                              @ r0/r1<- op, r2-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-11 instructions */
diff --git a/runtime/interpreter/mterp/arm/unopWider.S b/runtime/interpreter/mterp/arm/unopWider.S
new file mode 100644
index 0000000..657a395
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/unopWider.S
@@ -0,0 +1,20 @@
+%default {"preinstr":""}
+    /*
+     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op r0", where
+     * "result" is a 64-bit quantity in r0/r1.
+     *
+     * For: int-to-long, int-to-double, float-to-long, float-to-double
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    $preinstr                           @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    $instr                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vA/vA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 9-10 instructions */
diff --git a/runtime/interpreter/mterp/arm/unused.S b/runtime/interpreter/mterp/arm/unused.S
new file mode 100644
index 0000000..ffa00be
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/unused.S
@@ -0,0 +1,4 @@
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
diff --git a/runtime/interpreter/mterp/arm/zcmp.S b/runtime/interpreter/mterp/arm/zcmp.S
new file mode 100644
index 0000000..6e9ef55
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/zcmp.S
@@ -0,0 +1,32 @@
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, #0                      @ compare (vA, 0)
+    mov${revcmp} r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    cmp     r2, #0                      @ compare (vA, 0)
+    mov${revcmp} r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/config_arm b/runtime/interpreter/mterp/config_arm
new file mode 100644
index 0000000..436dcd2
--- /dev/null
+++ b/runtime/interpreter/mterp/config_arm
@@ -0,0 +1,298 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for ARMv7-A targets.
+#
+
+handler-style computed-goto
+handler-size 128
+
+# source for alternate entry stub
+asm-alt-stub arm/alt_stub.S
+
+# file header and basic definitions
+import arm/header.S
+
+# arch-specific entry point to interpreter
+import arm/entry.S
+
+# Stub to switch to alternate interpreter
+fallback-stub arm/fallback.S
+
+# opcode list; argument to op-start is default directory
+op-start arm
+    # (override example:) op op_sub_float_2addr arm-vfp
+    # (fallback example:) op op_sub_float_2addr FALLBACK
+
+    # op op_nop FALLBACK
+    # op op_move FALLBACK
+    # op op_move_from16 FALLBACK
+    # op op_move_16 FALLBACK
+    # op op_move_wide FALLBACK
+    # op op_move_wide_from16 FALLBACK
+    # op op_move_wide_16 FALLBACK
+    # op op_move_object FALLBACK
+    # op op_move_object_from16 FALLBACK
+    # op op_move_object_16 FALLBACK
+    # op op_move_result FALLBACK
+    # op op_move_result_wide FALLBACK
+    # op op_move_result_object FALLBACK
+    # op op_move_exception FALLBACK
+    # op op_return_void FALLBACK
+    # op op_return FALLBACK
+    # op op_return_wide FALLBACK
+    # op op_return_object FALLBACK
+    # op op_const_4 FALLBACK
+    # op op_const_16 FALLBACK
+    # op op_const FALLBACK
+    # op op_const_high16 FALLBACK
+    # op op_const_wide_16 FALLBACK
+    # op op_const_wide_32 FALLBACK
+    # op op_const_wide FALLBACK
+    # op op_const_wide_high16 FALLBACK
+    # op op_const_string FALLBACK
+    # op op_const_string_jumbo FALLBACK
+    # op op_const_class FALLBACK
+    # op op_monitor_enter FALLBACK
+    # op op_monitor_exit FALLBACK
+    # op op_check_cast FALLBACK
+    # op op_instance_of FALLBACK
+    # op op_array_length FALLBACK
+    # op op_new_instance FALLBACK
+    # op op_new_array FALLBACK
+    # op op_filled_new_array FALLBACK
+    # op op_filled_new_array_range FALLBACK
+    # op op_fill_array_data FALLBACK
+    # op op_throw FALLBACK
+    # op op_goto FALLBACK
+    # op op_goto_16 FALLBACK
+    # op op_goto_32 FALLBACK
+    # op op_packed_switch FALLBACK
+    # op op_sparse_switch FALLBACK
+    # op op_cmpl_float FALLBACK
+    # op op_cmpg_float FALLBACK
+    # op op_cmpl_double FALLBACK
+    # op op_cmpg_double FALLBACK
+    # op op_cmp_long FALLBACK
+    # op op_if_eq FALLBACK
+    # op op_if_ne FALLBACK
+    # op op_if_lt FALLBACK
+    # op op_if_ge FALLBACK
+    # op op_if_gt FALLBACK
+    # op op_if_le FALLBACK
+    # op op_if_eqz FALLBACK
+    # op op_if_nez FALLBACK
+    # op op_if_ltz FALLBACK
+    # op op_if_gez FALLBACK
+    # op op_if_gtz FALLBACK
+    # op op_if_lez FALLBACK
+    # op op_unused_3e FALLBACK
+    # op op_unused_3f FALLBACK
+    # op op_unused_40 FALLBACK
+    # op op_unused_41 FALLBACK
+    # op op_unused_42 FALLBACK
+    # op op_unused_43 FALLBACK
+    # op op_aget FALLBACK
+    # op op_aget_wide FALLBACK
+    # op op_aget_object FALLBACK
+    # op op_aget_boolean FALLBACK
+    # op op_aget_byte FALLBACK
+    # op op_aget_char FALLBACK
+    # op op_aget_short FALLBACK
+    # op op_aput FALLBACK
+    # op op_aput_wide FALLBACK
+    # op op_aput_object FALLBACK
+    # op op_aput_boolean FALLBACK
+    # op op_aput_byte FALLBACK
+    # op op_aput_char FALLBACK
+    # op op_aput_short FALLBACK
+    # op op_iget FALLBACK
+    # op op_iget_wide FALLBACK
+    # op op_iget_object FALLBACK
+    # op op_iget_boolean FALLBACK
+    # op op_iget_byte FALLBACK
+    # op op_iget_char FALLBACK
+    # op op_iget_short FALLBACK
+    # op op_iput FALLBACK
+    # op op_iput_wide FALLBACK
+    # op op_iput_object FALLBACK
+    # op op_iput_boolean FALLBACK
+    # op op_iput_byte FALLBACK
+    # op op_iput_char FALLBACK
+    # op op_iput_short FALLBACK
+    # op op_sget FALLBACK
+    # op op_sget_wide FALLBACK
+    # op op_sget_object FALLBACK
+    # op op_sget_boolean FALLBACK
+    # op op_sget_byte FALLBACK
+    # op op_sget_char FALLBACK
+    # op op_sget_short FALLBACK
+    # op op_sput FALLBACK
+    # op op_sput_wide FALLBACK
+    # op op_sput_object FALLBACK
+    # op op_sput_boolean FALLBACK
+    # op op_sput_byte FALLBACK
+    # op op_sput_char FALLBACK
+    # op op_sput_short FALLBACK
+    # op op_invoke_virtual FALLBACK
+    # op op_invoke_super FALLBACK
+    # op op_invoke_direct FALLBACK
+    # op op_invoke_static FALLBACK
+    # op op_invoke_interface FALLBACK
+    # op op_return_void_no_barrier FALLBACK
+    # op op_invoke_virtual_range FALLBACK
+    # op op_invoke_super_range FALLBACK
+    # op op_invoke_direct_range FALLBACK
+    # op op_invoke_static_range FALLBACK
+    # op op_invoke_interface_range FALLBACK
+    # op op_unused_79 FALLBACK
+    # op op_unused_7a FALLBACK
+    # op op_neg_int FALLBACK
+    # op op_not_int FALLBACK
+    # op op_neg_long FALLBACK
+    # op op_not_long FALLBACK
+    # op op_neg_float FALLBACK
+    # op op_neg_double FALLBACK
+    # op op_int_to_long FALLBACK
+    # op op_int_to_float FALLBACK
+    # op op_int_to_double FALLBACK
+    # op op_long_to_int FALLBACK
+    # op op_long_to_float FALLBACK
+    # op op_long_to_double FALLBACK
+    # op op_float_to_int FALLBACK
+    # op op_float_to_long FALLBACK
+    # op op_float_to_double FALLBACK
+    # op op_double_to_int FALLBACK
+    # op op_double_to_long FALLBACK
+    # op op_double_to_float FALLBACK
+    # op op_int_to_byte FALLBACK
+    # op op_int_to_char FALLBACK
+    # op op_int_to_short FALLBACK
+    # op op_add_int FALLBACK
+    # op op_sub_int FALLBACK
+    # op op_mul_int FALLBACK
+    # op op_div_int FALLBACK
+    # op op_rem_int FALLBACK
+    # op op_and_int FALLBACK
+    # op op_or_int FALLBACK
+    # op op_xor_int FALLBACK
+    # op op_shl_int FALLBACK
+    # op op_shr_int FALLBACK
+    # op op_ushr_int FALLBACK
+    # op op_add_long FALLBACK
+    # op op_sub_long FALLBACK
+    # op op_mul_long FALLBACK
+    # op op_div_long FALLBACK
+    # op op_rem_long FALLBACK
+    # op op_and_long FALLBACK
+    # op op_or_long FALLBACK
+    # op op_xor_long FALLBACK
+    # op op_shl_long FALLBACK
+    # op op_shr_long FALLBACK
+    # op op_ushr_long FALLBACK
+    # op op_add_float FALLBACK
+    # op op_sub_float FALLBACK
+    # op op_mul_float FALLBACK
+    # op op_div_float FALLBACK
+    # op op_rem_float FALLBACK
+    # op op_add_double FALLBACK
+    # op op_sub_double FALLBACK
+    # op op_mul_double FALLBACK
+    # op op_div_double FALLBACK
+    # op op_rem_double FALLBACK
+    # op op_add_int_2addr FALLBACK
+    # op op_sub_int_2addr FALLBACK
+    # op op_mul_int_2addr FALLBACK
+    # op op_div_int_2addr FALLBACK
+    # op op_rem_int_2addr FALLBACK
+    # op op_and_int_2addr FALLBACK
+    # op op_or_int_2addr FALLBACK
+    # op op_xor_int_2addr FALLBACK
+    # op op_shl_int_2addr FALLBACK
+    # op op_shr_int_2addr FALLBACK
+    # op op_ushr_int_2addr FALLBACK
+    # op op_add_long_2addr FALLBACK
+    # op op_sub_long_2addr FALLBACK
+    # op op_mul_long_2addr FALLBACK
+    # op op_div_long_2addr FALLBACK
+    # op op_rem_long_2addr FALLBACK
+    # op op_and_long_2addr FALLBACK
+    # op op_or_long_2addr FALLBACK
+    # op op_xor_long_2addr FALLBACK
+    # op op_shl_long_2addr FALLBACK
+    # op op_shr_long_2addr FALLBACK
+    # op op_ushr_long_2addr FALLBACK
+    # op op_add_float_2addr FALLBACK
+    # op op_sub_float_2addr FALLBACK
+    # op op_mul_float_2addr FALLBACK
+    # op op_div_float_2addr FALLBACK
+    # op op_rem_float_2addr FALLBACK
+    # op op_add_double_2addr FALLBACK
+    # op op_sub_double_2addr FALLBACK
+    # op op_mul_double_2addr FALLBACK
+    # op op_div_double_2addr FALLBACK
+    # op op_rem_double_2addr FALLBACK
+    # op op_add_int_lit16 FALLBACK
+    # op op_rsub_int FALLBACK
+    # op op_mul_int_lit16 FALLBACK
+    # op op_div_int_lit16 FALLBACK
+    # op op_rem_int_lit16 FALLBACK
+    # op op_and_int_lit16 FALLBACK
+    # op op_or_int_lit16 FALLBACK
+    # op op_xor_int_lit16 FALLBACK
+    # op op_add_int_lit8 FALLBACK
+    # op op_rsub_int_lit8 FALLBACK
+    # op op_mul_int_lit8 FALLBACK
+    # op op_div_int_lit8 FALLBACK
+    # op op_rem_int_lit8 FALLBACK
+    # op op_and_int_lit8 FALLBACK
+    # op op_or_int_lit8 FALLBACK
+    # op op_xor_int_lit8 FALLBACK
+    # op op_shl_int_lit8 FALLBACK
+    # op op_shr_int_lit8 FALLBACK
+    # op op_ushr_int_lit8 FALLBACK
+    # op op_iget_quick FALLBACK
+    # op op_iget_wide_quick FALLBACK
+    # op op_iget_object_quick FALLBACK
+    # op op_iput_quick FALLBACK
+    # op op_iput_wide_quick FALLBACK
+    # op op_iput_object_quick FALLBACK
+    # op op_invoke_virtual_quick FALLBACK
+    # op op_invoke_virtual_range_quick FALLBACK
+    # op op_iput_boolean_quick FALLBACK
+    # op op_iput_byte_quick FALLBACK
+    # op op_iput_char_quick FALLBACK
+    # op op_iput_short_quick FALLBACK
+    # op op_iget_boolean_quick FALLBACK
+    # op op_iget_byte_quick FALLBACK
+    # op op_iget_char_quick FALLBACK
+    # op op_iget_short_quick FALLBACK
+    op op_invoke_lambda FALLBACK
+    # op op_unused_f4 FALLBACK
+    op op_capture_variable FALLBACK
+    op op_create_lambda FALLBACK
+    op op_liberate_variable FALLBACK
+    op op_box_lambda FALLBACK
+    op op_unbox_lambda FALLBACK
+    # op op_unused_fa FALLBACK
+    # op op_unused_fb FALLBACK
+    # op op_unused_fc FALLBACK
+    # op op_unused_fd FALLBACK
+    # op op_unused_fe FALLBACK
+    # op op_unused_ff FALLBACK
+op-end
+
+# common subroutines for asm
+import arm/footer.S
diff --git a/runtime/interpreter/mterp/config_arm64 b/runtime/interpreter/mterp/config_arm64
new file mode 100644
index 0000000..ef3c721
--- /dev/null
+++ b/runtime/interpreter/mterp/config_arm64
@@ -0,0 +1,298 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for ARM64
+#
+
+handler-style computed-goto
+handler-size 128
+
+# source for alternate entry stub
+asm-alt-stub arm64/alt_stub.S
+
+# file header and basic definitions
+import arm64/header.S
+
+# arch-specific entry point to interpreter
+import arm64/entry.S
+
+# Stub to switch to alternate interpreter
+fallback-stub arm64/fallback.S
+
+# opcode list; argument to op-start is default directory
+op-start arm64
+    # (override example:) op OP_SUB_FLOAT_2ADDR arm-vfp
+    # (fallback example:) op OP_SUB_FLOAT_2ADDR FALLBACK
+
+    op op_nop FALLBACK
+    op op_move FALLBACK
+    op op_move_from16 FALLBACK
+    op op_move_16 FALLBACK
+    op op_move_wide FALLBACK
+    op op_move_wide_from16 FALLBACK
+    op op_move_wide_16 FALLBACK
+    op op_move_object FALLBACK
+    op op_move_object_from16 FALLBACK
+    op op_move_object_16 FALLBACK
+    op op_move_result FALLBACK
+    op op_move_result_wide FALLBACK
+    op op_move_result_object FALLBACK
+    op op_move_exception FALLBACK
+    op op_return_void FALLBACK
+    op op_return FALLBACK
+    op op_return_wide FALLBACK
+    op op_return_object FALLBACK
+    op op_const_4 FALLBACK
+    op op_const_16 FALLBACK
+    op op_const FALLBACK
+    op op_const_high16 FALLBACK
+    op op_const_wide_16 FALLBACK
+    op op_const_wide_32 FALLBACK
+    op op_const_wide FALLBACK
+    op op_const_wide_high16 FALLBACK
+    op op_const_string FALLBACK
+    op op_const_string_jumbo FALLBACK
+    op op_const_class FALLBACK
+    op op_monitor_enter FALLBACK
+    op op_monitor_exit FALLBACK
+    op op_check_cast FALLBACK
+    op op_instance_of FALLBACK
+    op op_array_length FALLBACK
+    op op_new_instance FALLBACK
+    op op_new_array FALLBACK
+    op op_filled_new_array FALLBACK
+    op op_filled_new_array_range FALLBACK
+    op op_fill_array_data FALLBACK
+    op op_throw FALLBACK
+    op op_goto FALLBACK
+    op op_goto_16 FALLBACK
+    op op_goto_32 FALLBACK
+    op op_packed_switch FALLBACK
+    op op_sparse_switch FALLBACK
+    op op_cmpl_float FALLBACK
+    op op_cmpg_float FALLBACK
+    op op_cmpl_double FALLBACK
+    op op_cmpg_double FALLBACK
+    op op_cmp_long FALLBACK
+    op op_if_eq FALLBACK
+    op op_if_ne FALLBACK
+    op op_if_lt FALLBACK
+    op op_if_ge FALLBACK
+    op op_if_gt FALLBACK
+    op op_if_le FALLBACK
+    op op_if_eqz FALLBACK
+    op op_if_nez FALLBACK
+    op op_if_ltz FALLBACK
+    op op_if_gez FALLBACK
+    op op_if_gtz FALLBACK
+    op op_if_lez FALLBACK
+    op_unused_3e FALLBACK
+    op_unused_3f FALLBACK
+    op_unused_40 FALLBACK
+    op_unused_41 FALLBACK
+    op_unused_42 FALLBACK
+    op_unused_43 FALLBACK
+    op op_aget FALLBACK
+    op op_aget_wide FALLBACK
+    op op_aget_object FALLBACK
+    op op_aget_boolean FALLBACK
+    op op_aget_byte FALLBACK
+    op op_aget_char FALLBACK
+    op op_aget_short FALLBACK
+    op op_aput FALLBACK
+    op op_aput_wide FALLBACK
+    op op_aput_object FALLBACK
+    op op_aput_boolean FALLBACK
+    op op_aput_byte FALLBACK
+    op op_aput_char FALLBACK
+    op op_aput_short FALLBACK
+    op op_iget FALLBACK
+    op op_iget_wide FALLBACK
+    op op_iget_object FALLBACK
+    op op_iget_boolean FALLBACK
+    op op_iget_byte FALLBACK
+    op op_iget_char FALLBACK
+    op op_iget_short FALLBACK
+    op op_iput FALLBACK
+    op op_iput_wide FALLBACK
+    op op_iput_object FALLBACK
+    op op_iput_boolean FALLBACK
+    op op_iput_byte FALLBACK
+    op op_iput_char FALLBACK
+    op op_iput_short FALLBACK
+    op op_sget FALLBACK
+    op op_sget_wide FALLBACK
+    op op_sget_object FALLBACK
+    op op_sget_boolean FALLBACK
+    op op_sget_byte FALLBACK
+    op op_sget_char FALLBACK
+    op op_sget_short FALLBACK
+    op op_sput FALLBACK
+    op op_sput_wide FALLBACK
+    op op_sput_object FALLBACK
+    op op_sput_boolean FALLBACK
+    op op_sput_byte FALLBACK
+    op op_sput_char FALLBACK
+    op op_sput_short FALLBACK
+    op op_invoke_virtual FALLBACK
+    op op_invoke_super FALLBACK
+    op op_invoke_direct FALLBACK
+    op op_invoke_static FALLBACK
+    op op_invoke_interface FALLBACK
+    op op_return_void_no_barrier FALLBACK
+    op op_invoke_virtual_range FALLBACK
+    op op_invoke_super_range FALLBACK
+    op op_invoke_direct_range FALLBACK
+    op op_invoke_static_range FALLBACK
+    op op_invoke_interface_range FALLBACK
+    op_unused_79 FALLBACK
+    op_unused_7a FALLBACK
+    op op_neg_int FALLBACK
+    op op_not_int FALLBACK
+    op op_neg_long FALLBACK
+    op op_not_long FALLBACK
+    op op_neg_float FALLBACK
+    op op_neg_double FALLBACK
+    op op_int_to_long FALLBACK
+    op op_int_to_float FALLBACK
+    op op_int_to_double FALLBACK
+    op op_long_to_int FALLBACK
+    op op_long_to_float FALLBACK
+    op op_long_to_double FALLBACK
+    op op_float_to_int FALLBACK
+    op op_float_to_long FALLBACK
+    op op_float_to_double FALLBACK
+    op op_double_to_int FALLBACK
+    op op_double_to_long FALLBACK
+    op op_double_to_float FALLBACK
+    op op_int_to_byte FALLBACK
+    op op_int_to_char FALLBACK
+    op op_int_to_short FALLBACK
+    op op_add_int FALLBACK
+    op op_sub_int FALLBACK
+    op op_mul_int FALLBACK
+    op op_div_int FALLBACK
+    op op_rem_int FALLBACK
+    op op_and_int FALLBACK
+    op op_or_int FALLBACK
+    op op_xor_int FALLBACK
+    op op_shl_int FALLBACK
+    op op_shr_int FALLBACK
+    op op_ushr_int FALLBACK
+    op op_add_long FALLBACK
+    op op_sub_long FALLBACK
+    op op_mul_long FALLBACK
+    op op_div_long FALLBACK
+    op op_rem_long FALLBACK
+    op op_and_long FALLBACK
+    op op_or_long FALLBACK
+    op op_xor_long FALLBACK
+    op op_shl_long FALLBACK
+    op op_shr_long FALLBACK
+    op op_ushr_long FALLBACK
+    op op_add_float FALLBACK
+    op op_sub_float FALLBACK
+    op op_mul_float FALLBACK
+    op op_div_float FALLBACK
+    op op_rem_float FALLBACK
+    op op_add_double FALLBACK
+    op op_sub_double FALLBACK
+    op op_mul_double FALLBACK
+    op op_div_double FALLBACK
+    op op_rem_double FALLBACK
+    op op_add_int_2addr FALLBACK
+    op op_sub_int_2addr FALLBACK
+    op op_mul_int_2addr FALLBACK
+    op op_div_int_2addr FALLBACK
+    op op_rem_int_2addr FALLBACK
+    op op_and_int_2addr FALLBACK
+    op op_or_int_2addr FALLBACK
+    op op_xor_int_2addr FALLBACK
+    op op_shl_int_2addr FALLBACK
+    op op_shr_int_2addr FALLBACK
+    op op_ushr_int_2addr FALLBACK
+    op op_add_long_2addr FALLBACK
+    op op_sub_long_2addr FALLBACK
+    op op_mul_long_2addr FALLBACK
+    op op_div_long_2addr FALLBACK
+    op op_rem_long_2addr FALLBACK
+    op op_and_long_2addr FALLBACK
+    op op_or_long_2addr FALLBACK
+    op op_xor_long_2addr FALLBACK
+    op op_shl_long_2addr FALLBACK
+    op op_shr_long_2addr FALLBACK
+    op op_ushr_long_2addr FALLBACK
+    op op_add_float_2addr FALLBACK
+    op op_sub_float_2addr FALLBACK
+    op op_mul_float_2addr FALLBACK
+    op op_div_float_2addr FALLBACK
+    op op_rem_float_2addr FALLBACK
+    op op_add_double_2addr FALLBACK
+    op op_sub_double_2addr FALLBACK
+    op op_mul_double_2addr FALLBACK
+    op op_div_double_2addr FALLBACK
+    op op_rem_double_2addr FALLBACK
+    op op_add_int_lit16 FALLBACK
+    op op_rsub_int FALLBACK
+    op op_mul_int_lit16 FALLBACK
+    op op_div_int_lit16 FALLBACK
+    op op_rem_int_lit16 FALLBACK
+    op op_and_int_lit16 FALLBACK
+    op op_or_int_lit16 FALLBACK
+    op op_xor_int_lit16 FALLBACK
+    op op_add_int_lit8 FALLBACK
+    op op_rsub_int_lit8 FALLBACK
+    op op_mul_int_lit8 FALLBACK
+    op op_div_int_lit8 FALLBACK
+    op op_rem_int_lit8 FALLBACK
+    op op_and_int_lit8 FALLBACK
+    op op_or_int_lit8 FALLBACK
+    op op_xor_int_lit8 FALLBACK
+    op op_shl_int_lit8 FALLBACK
+    op op_shr_int_lit8 FALLBACK
+    op op_ushr_int_lit8 FALLBACK
+    op op_iget_quick FALLBACK
+    op op_iget_wide_quick FALLBACK
+    op op_iget_object_quick FALLBACK
+    op op_iput_quick FALLBACK
+    op op_iput_wide_quick FALLBACK
+    op op_iput_object_quick FALLBACK
+    op op_invoke_virtual_quick FALLBACK
+    op op_invoke_virtual_range_quick FALLBACK
+    op op_iput_boolean_quick FALLBACK
+    op op_iput_byte_quick FALLBACK
+    op op_iput_char_quick FALLBACK
+    op op_iput_short_quick FALLBACK
+    op op_iget_boolean_quick FALLBACK
+    op op_iget_byte_quick FALLBACK
+    op op_iget_char_quick FALLBACK
+    op op_iget_short_quick FALLBACK
+    op_unused_f3 FALLBACK
+    op_unused_f4 FALLBACK
+    op_unused_f5 FALLBACK
+    op_unused_f6 FALLBACK
+    op_unused_f7 FALLBACK
+    op_unused_f8 FALLBACK
+    op_unused_f9 FALLBACK
+    op_unused_fa FALLBACK
+    op_unused_fb FALLBACK
+    op_unused_fc FALLBACK
+    op_unused_fd FALLBACK
+    op_unused_fe FALLBACK
+    op_unused_ff FALLBACK
+op-end
+
+# common subroutines for asm
+import arm64/footer.S
diff --git a/runtime/interpreter/mterp/config_mips b/runtime/interpreter/mterp/config_mips
new file mode 100644
index 0000000..d1221f7
--- /dev/null
+++ b/runtime/interpreter/mterp/config_mips
@@ -0,0 +1,298 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for MIPS_32
+#
+
+handler-style computed-goto
+handler-size 128
+
+# source for alternate entry stub
+asm-alt-stub mips/alt_stub.S
+
+# file header and basic definitions
+import mips/header.S
+
+# arch-specific entry point to interpreter
+import mips/entry.S
+
+# Stub to switch to alternate interpreter
+fallback-stub mips/fallback.S
+
+# opcode list; argument to op-start is default directory
+op-start mips
+    # (override example:) op OP_SUB_FLOAT_2ADDR arm-vfp
+    # (fallback example:) op OP_SUB_FLOAT_2ADDR FALLBACK
+
+    op op_nop FALLBACK
+    op op_move FALLBACK
+    op op_move_from16 FALLBACK
+    op op_move_16 FALLBACK
+    op op_move_wide FALLBACK
+    op op_move_wide_from16 FALLBACK
+    op op_move_wide_16 FALLBACK
+    op op_move_object FALLBACK
+    op op_move_object_from16 FALLBACK
+    op op_move_object_16 FALLBACK
+    op op_move_result FALLBACK
+    op op_move_result_wide FALLBACK
+    op op_move_result_object FALLBACK
+    op op_move_exception FALLBACK
+    op op_return_void FALLBACK
+    op op_return FALLBACK
+    op op_return_wide FALLBACK
+    op op_return_object FALLBACK
+    op op_const_4 FALLBACK
+    op op_const_16 FALLBACK
+    op op_const FALLBACK
+    op op_const_high16 FALLBACK
+    op op_const_wide_16 FALLBACK
+    op op_const_wide_32 FALLBACK
+    op op_const_wide FALLBACK
+    op op_const_wide_high16 FALLBACK
+    op op_const_string FALLBACK
+    op op_const_string_jumbo FALLBACK
+    op op_const_class FALLBACK
+    op op_monitor_enter FALLBACK
+    op op_monitor_exit FALLBACK
+    op op_check_cast FALLBACK
+    op op_instance_of FALLBACK
+    op op_array_length FALLBACK
+    op op_new_instance FALLBACK
+    op op_new_array FALLBACK
+    op op_filled_new_array FALLBACK
+    op op_filled_new_array_range FALLBACK
+    op op_fill_array_data FALLBACK
+    op op_throw FALLBACK
+    op op_goto FALLBACK
+    op op_goto_16 FALLBACK
+    op op_goto_32 FALLBACK
+    op op_packed_switch FALLBACK
+    op op_sparse_switch FALLBACK
+    op op_cmpl_float FALLBACK
+    op op_cmpg_float FALLBACK
+    op op_cmpl_double FALLBACK
+    op op_cmpg_double FALLBACK
+    op op_cmp_long FALLBACK
+    op op_if_eq FALLBACK
+    op op_if_ne FALLBACK
+    op op_if_lt FALLBACK
+    op op_if_ge FALLBACK
+    op op_if_gt FALLBACK
+    op op_if_le FALLBACK
+    op op_if_eqz FALLBACK
+    op op_if_nez FALLBACK
+    op op_if_ltz FALLBACK
+    op op_if_gez FALLBACK
+    op op_if_gtz FALLBACK
+    op op_if_lez FALLBACK
+    op_unused_3e FALLBACK
+    op_unused_3f FALLBACK
+    op_unused_40 FALLBACK
+    op_unused_41 FALLBACK
+    op_unused_42 FALLBACK
+    op_unused_43 FALLBACK
+    op op_aget FALLBACK
+    op op_aget_wide FALLBACK
+    op op_aget_object FALLBACK
+    op op_aget_boolean FALLBACK
+    op op_aget_byte FALLBACK
+    op op_aget_char FALLBACK
+    op op_aget_short FALLBACK
+    op op_aput FALLBACK
+    op op_aput_wide FALLBACK
+    op op_aput_object FALLBACK
+    op op_aput_boolean FALLBACK
+    op op_aput_byte FALLBACK
+    op op_aput_char FALLBACK
+    op op_aput_short FALLBACK
+    op op_iget FALLBACK
+    op op_iget_wide FALLBACK
+    op op_iget_object FALLBACK
+    op op_iget_boolean FALLBACK
+    op op_iget_byte FALLBACK
+    op op_iget_char FALLBACK
+    op op_iget_short FALLBACK
+    op op_iput FALLBACK
+    op op_iput_wide FALLBACK
+    op op_iput_object FALLBACK
+    op op_iput_boolean FALLBACK
+    op op_iput_byte FALLBACK
+    op op_iput_char FALLBACK
+    op op_iput_short FALLBACK
+    op op_sget FALLBACK
+    op op_sget_wide FALLBACK
+    op op_sget_object FALLBACK
+    op op_sget_boolean FALLBACK
+    op op_sget_byte FALLBACK
+    op op_sget_char FALLBACK
+    op op_sget_short FALLBACK
+    op op_sput FALLBACK
+    op op_sput_wide FALLBACK
+    op op_sput_object FALLBACK
+    op op_sput_boolean FALLBACK
+    op op_sput_byte FALLBACK
+    op op_sput_char FALLBACK
+    op op_sput_short FALLBACK
+    op op_invoke_virtual FALLBACK
+    op op_invoke_super FALLBACK
+    op op_invoke_direct FALLBACK
+    op op_invoke_static FALLBACK
+    op op_invoke_interface FALLBACK
+    op op_return_void_no_barrier FALLBACK
+    op op_invoke_virtual_range FALLBACK
+    op op_invoke_super_range FALLBACK
+    op op_invoke_direct_range FALLBACK
+    op op_invoke_static_range FALLBACK
+    op op_invoke_interface_range FALLBACK
+    op_unused_79 FALLBACK
+    op_unused_7a FALLBACK
+    op op_neg_int FALLBACK
+    op op_not_int FALLBACK
+    op op_neg_long FALLBACK
+    op op_not_long FALLBACK
+    op op_neg_float FALLBACK
+    op op_neg_double FALLBACK
+    op op_int_to_long FALLBACK
+    op op_int_to_float FALLBACK
+    op op_int_to_double FALLBACK
+    op op_long_to_int FALLBACK
+    op op_long_to_float FALLBACK
+    op op_long_to_double FALLBACK
+    op op_float_to_int FALLBACK
+    op op_float_to_long FALLBACK
+    op op_float_to_double FALLBACK
+    op op_double_to_int FALLBACK
+    op op_double_to_long FALLBACK
+    op op_double_to_float FALLBACK
+    op op_int_to_byte FALLBACK
+    op op_int_to_char FALLBACK
+    op op_int_to_short FALLBACK
+    op op_add_int FALLBACK
+    op op_sub_int FALLBACK
+    op op_mul_int FALLBACK
+    op op_div_int FALLBACK
+    op op_rem_int FALLBACK
+    op op_and_int FALLBACK
+    op op_or_int FALLBACK
+    op op_xor_int FALLBACK
+    op op_shl_int FALLBACK
+    op op_shr_int FALLBACK
+    op op_ushr_int FALLBACK
+    op op_add_long FALLBACK
+    op op_sub_long FALLBACK
+    op op_mul_long FALLBACK
+    op op_div_long FALLBACK
+    op op_rem_long FALLBACK
+    op op_and_long FALLBACK
+    op op_or_long FALLBACK
+    op op_xor_long FALLBACK
+    op op_shl_long FALLBACK
+    op op_shr_long FALLBACK
+    op op_ushr_long FALLBACK
+    op op_add_float FALLBACK
+    op op_sub_float FALLBACK
+    op op_mul_float FALLBACK
+    op op_div_float FALLBACK
+    op op_rem_float FALLBACK
+    op op_add_double FALLBACK
+    op op_sub_double FALLBACK
+    op op_mul_double FALLBACK
+    op op_div_double FALLBACK
+    op op_rem_double FALLBACK
+    op op_add_int_2addr FALLBACK
+    op op_sub_int_2addr FALLBACK
+    op op_mul_int_2addr FALLBACK
+    op op_div_int_2addr FALLBACK
+    op op_rem_int_2addr FALLBACK
+    op op_and_int_2addr FALLBACK
+    op op_or_int_2addr FALLBACK
+    op op_xor_int_2addr FALLBACK
+    op op_shl_int_2addr FALLBACK
+    op op_shr_int_2addr FALLBACK
+    op op_ushr_int_2addr FALLBACK
+    op op_add_long_2addr FALLBACK
+    op op_sub_long_2addr FALLBACK
+    op op_mul_long_2addr FALLBACK
+    op op_div_long_2addr FALLBACK
+    op op_rem_long_2addr FALLBACK
+    op op_and_long_2addr FALLBACK
+    op op_or_long_2addr FALLBACK
+    op op_xor_long_2addr FALLBACK
+    op op_shl_long_2addr FALLBACK
+    op op_shr_long_2addr FALLBACK
+    op op_ushr_long_2addr FALLBACK
+    op op_add_float_2addr FALLBACK
+    op op_sub_float_2addr FALLBACK
+    op op_mul_float_2addr FALLBACK
+    op op_div_float_2addr FALLBACK
+    op op_rem_float_2addr FALLBACK
+    op op_add_double_2addr FALLBACK
+    op op_sub_double_2addr FALLBACK
+    op op_mul_double_2addr FALLBACK
+    op op_div_double_2addr FALLBACK
+    op op_rem_double_2addr FALLBACK
+    op op_add_int_lit16 FALLBACK
+    op op_rsub_int FALLBACK
+    op op_mul_int_lit16 FALLBACK
+    op op_div_int_lit16 FALLBACK
+    op op_rem_int_lit16 FALLBACK
+    op op_and_int_lit16 FALLBACK
+    op op_or_int_lit16 FALLBACK
+    op op_xor_int_lit16 FALLBACK
+    op op_add_int_lit8 FALLBACK
+    op op_rsub_int_lit8 FALLBACK
+    op op_mul_int_lit8 FALLBACK
+    op op_div_int_lit8 FALLBACK
+    op op_rem_int_lit8 FALLBACK
+    op op_and_int_lit8 FALLBACK
+    op op_or_int_lit8 FALLBACK
+    op op_xor_int_lit8 FALLBACK
+    op op_shl_int_lit8 FALLBACK
+    op op_shr_int_lit8 FALLBACK
+    op op_ushr_int_lit8 FALLBACK
+    op op_iget_quick FALLBACK
+    op op_iget_wide_quick FALLBACK
+    op op_iget_object_quick FALLBACK
+    op op_iput_quick FALLBACK
+    op op_iput_wide_quick FALLBACK
+    op op_iput_object_quick FALLBACK
+    op op_invoke_virtual_quick FALLBACK
+    op op_invoke_virtual_range_quick FALLBACK
+    op op_iput_boolean_quick FALLBACK
+    op op_iput_byte_quick FALLBACK
+    op op_iput_char_quick FALLBACK
+    op op_iput_short_quick FALLBACK
+    op op_iget_boolean_quick FALLBACK
+    op op_iget_byte_quick FALLBACK
+    op op_iget_char_quick FALLBACK
+    op op_iget_short_quick FALLBACK
+    op_unused_f3 FALLBACK
+    op_unused_f4 FALLBACK
+    op_unused_f5 FALLBACK
+    op_unused_f6 FALLBACK
+    op_unused_f7 FALLBACK
+    op_unused_f8 FALLBACK
+    op_unused_f9 FALLBACK
+    op_unused_fa FALLBACK
+    op_unused_fb FALLBACK
+    op_unused_fc FALLBACK
+    op_unused_fd FALLBACK
+    op_unused_fe FALLBACK
+    op_unused_ff FALLBACK
+op-end
+
+# common subroutines for asm
+import mips/footer.S
diff --git a/runtime/interpreter/mterp/config_mips64 b/runtime/interpreter/mterp/config_mips64
new file mode 100644
index 0000000..f804ce5
--- /dev/null
+++ b/runtime/interpreter/mterp/config_mips64
@@ -0,0 +1,298 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for MIPS_64
+#
+
+handler-style computed-goto
+handler-size 128
+
+# source for alternate entry stub
+asm-alt-stub mips64/alt_stub.S
+
+# file header and basic definitions
+import mips64/header.S
+
+# arch-specific entry point to interpreter
+import mips64/entry.S
+
+# Stub to switch to alternate interpreter
+fallback-stub mips64/fallback.S
+
+# opcode list; argument to op-start is default directory
+op-start mips64
+    # (override example:) op OP_SUB_FLOAT_2ADDR arm-vfp
+    # (fallback example:) op OP_SUB_FLOAT_2ADDR FALLBACK
+
+    op op_nop FALLBACK
+    op op_move FALLBACK
+    op op_move_from16 FALLBACK
+    op op_move_16 FALLBACK
+    op op_move_wide FALLBACK
+    op op_move_wide_from16 FALLBACK
+    op op_move_wide_16 FALLBACK
+    op op_move_object FALLBACK
+    op op_move_object_from16 FALLBACK
+    op op_move_object_16 FALLBACK
+    op op_move_result FALLBACK
+    op op_move_result_wide FALLBACK
+    op op_move_result_object FALLBACK
+    op op_move_exception FALLBACK
+    op op_return_void FALLBACK
+    op op_return FALLBACK
+    op op_return_wide FALLBACK
+    op op_return_object FALLBACK
+    op op_const_4 FALLBACK
+    op op_const_16 FALLBACK
+    op op_const FALLBACK
+    op op_const_high16 FALLBACK
+    op op_const_wide_16 FALLBACK
+    op op_const_wide_32 FALLBACK
+    op op_const_wide FALLBACK
+    op op_const_wide_high16 FALLBACK
+    op op_const_string FALLBACK
+    op op_const_string_jumbo FALLBACK
+    op op_const_class FALLBACK
+    op op_monitor_enter FALLBACK
+    op op_monitor_exit FALLBACK
+    op op_check_cast FALLBACK
+    op op_instance_of FALLBACK
+    op op_array_length FALLBACK
+    op op_new_instance FALLBACK
+    op op_new_array FALLBACK
+    op op_filled_new_array FALLBACK
+    op op_filled_new_array_range FALLBACK
+    op op_fill_array_data FALLBACK
+    op op_throw FALLBACK
+    op op_goto FALLBACK
+    op op_goto_16 FALLBACK
+    op op_goto_32 FALLBACK
+    op op_packed_switch FALLBACK
+    op op_sparse_switch FALLBACK
+    op op_cmpl_float FALLBACK
+    op op_cmpg_float FALLBACK
+    op op_cmpl_double FALLBACK
+    op op_cmpg_double FALLBACK
+    op op_cmp_long FALLBACK
+    op op_if_eq FALLBACK
+    op op_if_ne FALLBACK
+    op op_if_lt FALLBACK
+    op op_if_ge FALLBACK
+    op op_if_gt FALLBACK
+    op op_if_le FALLBACK
+    op op_if_eqz FALLBACK
+    op op_if_nez FALLBACK
+    op op_if_ltz FALLBACK
+    op op_if_gez FALLBACK
+    op op_if_gtz FALLBACK
+    op op_if_lez FALLBACK
+    op_unused_3e FALLBACK
+    op_unused_3f FALLBACK
+    op_unused_40 FALLBACK
+    op_unused_41 FALLBACK
+    op_unused_42 FALLBACK
+    op_unused_43 FALLBACK
+    op op_aget FALLBACK
+    op op_aget_wide FALLBACK
+    op op_aget_object FALLBACK
+    op op_aget_boolean FALLBACK
+    op op_aget_byte FALLBACK
+    op op_aget_char FALLBACK
+    op op_aget_short FALLBACK
+    op op_aput FALLBACK
+    op op_aput_wide FALLBACK
+    op op_aput_object FALLBACK
+    op op_aput_boolean FALLBACK
+    op op_aput_byte FALLBACK
+    op op_aput_char FALLBACK
+    op op_aput_short FALLBACK
+    op op_iget FALLBACK
+    op op_iget_wide FALLBACK
+    op op_iget_object FALLBACK
+    op op_iget_boolean FALLBACK
+    op op_iget_byte FALLBACK
+    op op_iget_char FALLBACK
+    op op_iget_short FALLBACK
+    op op_iput FALLBACK
+    op op_iput_wide FALLBACK
+    op op_iput_object FALLBACK
+    op op_iput_boolean FALLBACK
+    op op_iput_byte FALLBACK
+    op op_iput_char FALLBACK
+    op op_iput_short FALLBACK
+    op op_sget FALLBACK
+    op op_sget_wide FALLBACK
+    op op_sget_object FALLBACK
+    op op_sget_boolean FALLBACK
+    op op_sget_byte FALLBACK
+    op op_sget_char FALLBACK
+    op op_sget_short FALLBACK
+    op op_sput FALLBACK
+    op op_sput_wide FALLBACK
+    op op_sput_object FALLBACK
+    op op_sput_boolean FALLBACK
+    op op_sput_byte FALLBACK
+    op op_sput_char FALLBACK
+    op op_sput_short FALLBACK
+    op op_invoke_virtual FALLBACK
+    op op_invoke_super FALLBACK
+    op op_invoke_direct FALLBACK
+    op op_invoke_static FALLBACK
+    op op_invoke_interface FALLBACK
+    op op_return_void_no_barrier FALLBACK
+    op op_invoke_virtual_range FALLBACK
+    op op_invoke_super_range FALLBACK
+    op op_invoke_direct_range FALLBACK
+    op op_invoke_static_range FALLBACK
+    op op_invoke_interface_range FALLBACK
+    op_unused_79 FALLBACK
+    op_unused_7a FALLBACK
+    op op_neg_int FALLBACK
+    op op_not_int FALLBACK
+    op op_neg_long FALLBACK
+    op op_not_long FALLBACK
+    op op_neg_float FALLBACK
+    op op_neg_double FALLBACK
+    op op_int_to_long FALLBACK
+    op op_int_to_float FALLBACK
+    op op_int_to_double FALLBACK
+    op op_long_to_int FALLBACK
+    op op_long_to_float FALLBACK
+    op op_long_to_double FALLBACK
+    op op_float_to_int FALLBACK
+    op op_float_to_long FALLBACK
+    op op_float_to_double FALLBACK
+    op op_double_to_int FALLBACK
+    op op_double_to_long FALLBACK
+    op op_double_to_float FALLBACK
+    op op_int_to_byte FALLBACK
+    op op_int_to_char FALLBACK
+    op op_int_to_short FALLBACK
+    op op_add_int FALLBACK
+    op op_sub_int FALLBACK
+    op op_mul_int FALLBACK
+    op op_div_int FALLBACK
+    op op_rem_int FALLBACK
+    op op_and_int FALLBACK
+    op op_or_int FALLBACK
+    op op_xor_int FALLBACK
+    op op_shl_int FALLBACK
+    op op_shr_int FALLBACK
+    op op_ushr_int FALLBACK
+    op op_add_long FALLBACK
+    op op_sub_long FALLBACK
+    op op_mul_long FALLBACK
+    op op_div_long FALLBACK
+    op op_rem_long FALLBACK
+    op op_and_long FALLBACK
+    op op_or_long FALLBACK
+    op op_xor_long FALLBACK
+    op op_shl_long FALLBACK
+    op op_shr_long FALLBACK
+    op op_ushr_long FALLBACK
+    op op_add_float FALLBACK
+    op op_sub_float FALLBACK
+    op op_mul_float FALLBACK
+    op op_div_float FALLBACK
+    op op_rem_float FALLBACK
+    op op_add_double FALLBACK
+    op op_sub_double FALLBACK
+    op op_mul_double FALLBACK
+    op op_div_double FALLBACK
+    op op_rem_double FALLBACK
+    op op_add_int_2addr FALLBACK
+    op op_sub_int_2addr FALLBACK
+    op op_mul_int_2addr FALLBACK
+    op op_div_int_2addr FALLBACK
+    op op_rem_int_2addr FALLBACK
+    op op_and_int_2addr FALLBACK
+    op op_or_int_2addr FALLBACK
+    op op_xor_int_2addr FALLBACK
+    op op_shl_int_2addr FALLBACK
+    op op_shr_int_2addr FALLBACK
+    op op_ushr_int_2addr FALLBACK
+    op op_add_long_2addr FALLBACK
+    op op_sub_long_2addr FALLBACK
+    op op_mul_long_2addr FALLBACK
+    op op_div_long_2addr FALLBACK
+    op op_rem_long_2addr FALLBACK
+    op op_and_long_2addr FALLBACK
+    op op_or_long_2addr FALLBACK
+    op op_xor_long_2addr FALLBACK
+    op op_shl_long_2addr FALLBACK
+    op op_shr_long_2addr FALLBACK
+    op op_ushr_long_2addr FALLBACK
+    op op_add_float_2addr FALLBACK
+    op op_sub_float_2addr FALLBACK
+    op op_mul_float_2addr FALLBACK
+    op op_div_float_2addr FALLBACK
+    op op_rem_float_2addr FALLBACK
+    op op_add_double_2addr FALLBACK
+    op op_sub_double_2addr FALLBACK
+    op op_mul_double_2addr FALLBACK
+    op op_div_double_2addr FALLBACK
+    op op_rem_double_2addr FALLBACK
+    op op_add_int_lit16 FALLBACK
+    op op_rsub_int FALLBACK
+    op op_mul_int_lit16 FALLBACK
+    op op_div_int_lit16 FALLBACK
+    op op_rem_int_lit16 FALLBACK
+    op op_and_int_lit16 FALLBACK
+    op op_or_int_lit16 FALLBACK
+    op op_xor_int_lit16 FALLBACK
+    op op_add_int_lit8 FALLBACK
+    op op_rsub_int_lit8 FALLBACK
+    op op_mul_int_lit8 FALLBACK
+    op op_div_int_lit8 FALLBACK
+    op op_rem_int_lit8 FALLBACK
+    op op_and_int_lit8 FALLBACK
+    op op_or_int_lit8 FALLBACK
+    op op_xor_int_lit8 FALLBACK
+    op op_shl_int_lit8 FALLBACK
+    op op_shr_int_lit8 FALLBACK
+    op op_ushr_int_lit8 FALLBACK
+    op op_iget_quick FALLBACK
+    op op_iget_wide_quick FALLBACK
+    op op_iget_object_quick FALLBACK
+    op op_iput_quick FALLBACK
+    op op_iput_wide_quick FALLBACK
+    op op_iput_object_quick FALLBACK
+    op op_invoke_virtual_quick FALLBACK
+    op op_invoke_virtual_range_quick FALLBACK
+    op op_iput_boolean_quick FALLBACK
+    op op_iput_byte_quick FALLBACK
+    op op_iput_char_quick FALLBACK
+    op op_iput_short_quick FALLBACK
+    op op_iget_boolean_quick FALLBACK
+    op op_iget_byte_quick FALLBACK
+    op op_iget_char_quick FALLBACK
+    op op_iget_short_quick FALLBACK
+    op_unused_f3 FALLBACK
+    op_unused_f4 FALLBACK
+    op_unused_f5 FALLBACK
+    op_unused_f6 FALLBACK
+    op_unused_f7 FALLBACK
+    op_unused_f8 FALLBACK
+    op_unused_f9 FALLBACK
+    op_unused_fa FALLBACK
+    op_unused_fb FALLBACK
+    op_unused_fc FALLBACK
+    op_unused_fd FALLBACK
+    op_unused_fe FALLBACK
+    op_unused_ff FALLBACK
+op-end
+
+# common subroutines for asm
+import mips64/footer.S
diff --git a/runtime/interpreter/mterp/config_x86 b/runtime/interpreter/mterp/config_x86
new file mode 100644
index 0000000..277817d
--- /dev/null
+++ b/runtime/interpreter/mterp/config_x86
@@ -0,0 +1,298 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for X86
+#
+
+handler-style computed-goto
+handler-size 128
+
+# source for alternate entry stub
+asm-alt-stub x86/alt_stub.S
+
+# file header and basic definitions
+import x86/header.S
+
+# arch-specific entry point to interpreter
+import x86/entry.S
+
+# Stub to switch to alternate interpreter
+fallback-stub x86/fallback.S
+
+# opcode list; argument to op-start is default directory
+op-start x86
+    # (override example:) op OP_SUB_FLOAT_2ADDR arm-vfp
+    # (fallback example:) op OP_SUB_FLOAT_2ADDR FALLBACK
+
+    op op_nop FALLBACK
+    op op_move FALLBACK
+    op op_move_from16 FALLBACK
+    op op_move_16 FALLBACK
+    op op_move_wide FALLBACK
+    op op_move_wide_from16 FALLBACK
+    op op_move_wide_16 FALLBACK
+    op op_move_object FALLBACK
+    op op_move_object_from16 FALLBACK
+    op op_move_object_16 FALLBACK
+    op op_move_result FALLBACK
+    op op_move_result_wide FALLBACK
+    op op_move_result_object FALLBACK
+    op op_move_exception FALLBACK
+    op op_return_void FALLBACK
+    op op_return FALLBACK
+    op op_return_wide FALLBACK
+    op op_return_object FALLBACK
+    op op_const_4 FALLBACK
+    op op_const_16 FALLBACK
+    op op_const FALLBACK
+    op op_const_high16 FALLBACK
+    op op_const_wide_16 FALLBACK
+    op op_const_wide_32 FALLBACK
+    op op_const_wide FALLBACK
+    op op_const_wide_high16 FALLBACK
+    op op_const_string FALLBACK
+    op op_const_string_jumbo FALLBACK
+    op op_const_class FALLBACK
+    op op_monitor_enter FALLBACK
+    op op_monitor_exit FALLBACK
+    op op_check_cast FALLBACK
+    op op_instance_of FALLBACK
+    op op_array_length FALLBACK
+    op op_new_instance FALLBACK
+    op op_new_array FALLBACK
+    op op_filled_new_array FALLBACK
+    op op_filled_new_array_range FALLBACK
+    op op_fill_array_data FALLBACK
+    op op_throw FALLBACK
+    op op_goto FALLBACK
+    op op_goto_16 FALLBACK
+    op op_goto_32 FALLBACK
+    op op_packed_switch FALLBACK
+    op op_sparse_switch FALLBACK
+    op op_cmpl_float FALLBACK
+    op op_cmpg_float FALLBACK
+    op op_cmpl_double FALLBACK
+    op op_cmpg_double FALLBACK
+    op op_cmp_long FALLBACK
+    op op_if_eq FALLBACK
+    op op_if_ne FALLBACK
+    op op_if_lt FALLBACK
+    op op_if_ge FALLBACK
+    op op_if_gt FALLBACK
+    op op_if_le FALLBACK
+    op op_if_eqz FALLBACK
+    op op_if_nez FALLBACK
+    op op_if_ltz FALLBACK
+    op op_if_gez FALLBACK
+    op op_if_gtz FALLBACK
+    op op_if_lez FALLBACK
+    op_unused_3e FALLBACK
+    op_unused_3f FALLBACK
+    op_unused_40 FALLBACK
+    op_unused_41 FALLBACK
+    op_unused_42 FALLBACK
+    op_unused_43 FALLBACK
+    op op_aget FALLBACK
+    op op_aget_wide FALLBACK
+    op op_aget_object FALLBACK
+    op op_aget_boolean FALLBACK
+    op op_aget_byte FALLBACK
+    op op_aget_char FALLBACK
+    op op_aget_short FALLBACK
+    op op_aput FALLBACK
+    op op_aput_wide FALLBACK
+    op op_aput_object FALLBACK
+    op op_aput_boolean FALLBACK
+    op op_aput_byte FALLBACK
+    op op_aput_char FALLBACK
+    op op_aput_short FALLBACK
+    op op_iget FALLBACK
+    op op_iget_wide FALLBACK
+    op op_iget_object FALLBACK
+    op op_iget_boolean FALLBACK
+    op op_iget_byte FALLBACK
+    op op_iget_char FALLBACK
+    op op_iget_short FALLBACK
+    op op_iput FALLBACK
+    op op_iput_wide FALLBACK
+    op op_iput_object FALLBACK
+    op op_iput_boolean FALLBACK
+    op op_iput_byte FALLBACK
+    op op_iput_char FALLBACK
+    op op_iput_short FALLBACK
+    op op_sget FALLBACK
+    op op_sget_wide FALLBACK
+    op op_sget_object FALLBACK
+    op op_sget_boolean FALLBACK
+    op op_sget_byte FALLBACK
+    op op_sget_char FALLBACK
+    op op_sget_short FALLBACK
+    op op_sput FALLBACK
+    op op_sput_wide FALLBACK
+    op op_sput_object FALLBACK
+    op op_sput_boolean FALLBACK
+    op op_sput_byte FALLBACK
+    op op_sput_char FALLBACK
+    op op_sput_short FALLBACK
+    op op_invoke_virtual FALLBACK
+    op op_invoke_super FALLBACK
+    op op_invoke_direct FALLBACK
+    op op_invoke_static FALLBACK
+    op op_invoke_interface FALLBACK
+    op op_return_void_no_barrier FALLBACK
+    op op_invoke_virtual_range FALLBACK
+    op op_invoke_super_range FALLBACK
+    op op_invoke_direct_range FALLBACK
+    op op_invoke_static_range FALLBACK
+    op op_invoke_interface_range FALLBACK
+    op_unused_79 FALLBACK
+    op_unused_7a FALLBACK
+    op op_neg_int FALLBACK
+    op op_not_int FALLBACK
+    op op_neg_long FALLBACK
+    op op_not_long FALLBACK
+    op op_neg_float FALLBACK
+    op op_neg_double FALLBACK
+    op op_int_to_long FALLBACK
+    op op_int_to_float FALLBACK
+    op op_int_to_double FALLBACK
+    op op_long_to_int FALLBACK
+    op op_long_to_float FALLBACK
+    op op_long_to_double FALLBACK
+    op op_float_to_int FALLBACK
+    op op_float_to_long FALLBACK
+    op op_float_to_double FALLBACK
+    op op_double_to_int FALLBACK
+    op op_double_to_long FALLBACK
+    op op_double_to_float FALLBACK
+    op op_int_to_byte FALLBACK
+    op op_int_to_char FALLBACK
+    op op_int_to_short FALLBACK
+    op op_add_int FALLBACK
+    op op_sub_int FALLBACK
+    op op_mul_int FALLBACK
+    op op_div_int FALLBACK
+    op op_rem_int FALLBACK
+    op op_and_int FALLBACK
+    op op_or_int FALLBACK
+    op op_xor_int FALLBACK
+    op op_shl_int FALLBACK
+    op op_shr_int FALLBACK
+    op op_ushr_int FALLBACK
+    op op_add_long FALLBACK
+    op op_sub_long FALLBACK
+    op op_mul_long FALLBACK
+    op op_div_long FALLBACK
+    op op_rem_long FALLBACK
+    op op_and_long FALLBACK
+    op op_or_long FALLBACK
+    op op_xor_long FALLBACK
+    op op_shl_long FALLBACK
+    op op_shr_long FALLBACK
+    op op_ushr_long FALLBACK
+    op op_add_float FALLBACK
+    op op_sub_float FALLBACK
+    op op_mul_float FALLBACK
+    op op_div_float FALLBACK
+    op op_rem_float FALLBACK
+    op op_add_double FALLBACK
+    op op_sub_double FALLBACK
+    op op_mul_double FALLBACK
+    op op_div_double FALLBACK
+    op op_rem_double FALLBACK
+    op op_add_int_2addr FALLBACK
+    op op_sub_int_2addr FALLBACK
+    op op_mul_int_2addr FALLBACK
+    op op_div_int_2addr FALLBACK
+    op op_rem_int_2addr FALLBACK
+    op op_and_int_2addr FALLBACK
+    op op_or_int_2addr FALLBACK
+    op op_xor_int_2addr FALLBACK
+    op op_shl_int_2addr FALLBACK
+    op op_shr_int_2addr FALLBACK
+    op op_ushr_int_2addr FALLBACK
+    op op_add_long_2addr FALLBACK
+    op op_sub_long_2addr FALLBACK
+    op op_mul_long_2addr FALLBACK
+    op op_div_long_2addr FALLBACK
+    op op_rem_long_2addr FALLBACK
+    op op_and_long_2addr FALLBACK
+    op op_or_long_2addr FALLBACK
+    op op_xor_long_2addr FALLBACK
+    op op_shl_long_2addr FALLBACK
+    op op_shr_long_2addr FALLBACK
+    op op_ushr_long_2addr FALLBACK
+    op op_add_float_2addr FALLBACK
+    op op_sub_float_2addr FALLBACK
+    op op_mul_float_2addr FALLBACK
+    op op_div_float_2addr FALLBACK
+    op op_rem_float_2addr FALLBACK
+    op op_add_double_2addr FALLBACK
+    op op_sub_double_2addr FALLBACK
+    op op_mul_double_2addr FALLBACK
+    op op_div_double_2addr FALLBACK
+    op op_rem_double_2addr FALLBACK
+    op op_add_int_lit16 FALLBACK
+    op op_rsub_int FALLBACK
+    op op_mul_int_lit16 FALLBACK
+    op op_div_int_lit16 FALLBACK
+    op op_rem_int_lit16 FALLBACK
+    op op_and_int_lit16 FALLBACK
+    op op_or_int_lit16 FALLBACK
+    op op_xor_int_lit16 FALLBACK
+    op op_add_int_lit8 FALLBACK
+    op op_rsub_int_lit8 FALLBACK
+    op op_mul_int_lit8 FALLBACK
+    op op_div_int_lit8 FALLBACK
+    op op_rem_int_lit8 FALLBACK
+    op op_and_int_lit8 FALLBACK
+    op op_or_int_lit8 FALLBACK
+    op op_xor_int_lit8 FALLBACK
+    op op_shl_int_lit8 FALLBACK
+    op op_shr_int_lit8 FALLBACK
+    op op_ushr_int_lit8 FALLBACK
+    op op_iget_quick FALLBACK
+    op op_iget_wide_quick FALLBACK
+    op op_iget_object_quick FALLBACK
+    op op_iput_quick FALLBACK
+    op op_iput_wide_quick FALLBACK
+    op op_iput_object_quick FALLBACK
+    op op_invoke_virtual_quick FALLBACK
+    op op_invoke_virtual_range_quick FALLBACK
+    op op_iput_boolean_quick FALLBACK
+    op op_iput_byte_quick FALLBACK
+    op op_iput_char_quick FALLBACK
+    op op_iput_short_quick FALLBACK
+    op op_iget_boolean_quick FALLBACK
+    op op_iget_byte_quick FALLBACK
+    op op_iget_char_quick FALLBACK
+    op op_iget_short_quick FALLBACK
+    op_unused_f3 FALLBACK
+    op_unused_f4 FALLBACK
+    op_unused_f5 FALLBACK
+    op_unused_f6 FALLBACK
+    op_unused_f7 FALLBACK
+    op_unused_f8 FALLBACK
+    op_unused_f9 FALLBACK
+    op_unused_fa FALLBACK
+    op_unused_fb FALLBACK
+    op_unused_fc FALLBACK
+    op_unused_fd FALLBACK
+    op_unused_fe FALLBACK
+    op_unused_ff FALLBACK
+op-end
+
+# common subroutines for asm
+import x86/footer.S
diff --git a/runtime/interpreter/mterp/config_x86_64 b/runtime/interpreter/mterp/config_x86_64
new file mode 100644
index 0000000..a002dc2
--- /dev/null
+++ b/runtime/interpreter/mterp/config_x86_64
@@ -0,0 +1,298 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for X86_64
+#
+
+handler-style computed-goto
+handler-size 128
+
+# source for alternate entry stub
+asm-alt-stub x86_64/alt_stub.S
+
+# file header and basic definitions
+import x86_64/header.S
+
+# arch-specific entry point to interpreter
+import x86_64/entry.S
+
+# Stub to switch to alternate interpreter
+fallback-stub x86_64/fallback.S
+
+# opcode list; argument to op-start is default directory
+op-start x86_64
+    # (override example:) op OP_SUB_FLOAT_2ADDR arm-vfp
+    # (fallback example:) op OP_SUB_FLOAT_2ADDR FALLBACK
+
+    op op_nop FALLBACK
+    op op_move FALLBACK
+    op op_move_from16 FALLBACK
+    op op_move_16 FALLBACK
+    op op_move_wide FALLBACK
+    op op_move_wide_from16 FALLBACK
+    op op_move_wide_16 FALLBACK
+    op op_move_object FALLBACK
+    op op_move_object_from16 FALLBACK
+    op op_move_object_16 FALLBACK
+    op op_move_result FALLBACK
+    op op_move_result_wide FALLBACK
+    op op_move_result_object FALLBACK
+    op op_move_exception FALLBACK
+    op op_return_void FALLBACK
+    op op_return FALLBACK
+    op op_return_wide FALLBACK
+    op op_return_object FALLBACK
+    op op_const_4 FALLBACK
+    op op_const_16 FALLBACK
+    op op_const FALLBACK
+    op op_const_high16 FALLBACK
+    op op_const_wide_16 FALLBACK
+    op op_const_wide_32 FALLBACK
+    op op_const_wide FALLBACK
+    op op_const_wide_high16 FALLBACK
+    op op_const_string FALLBACK
+    op op_const_string_jumbo FALLBACK
+    op op_const_class FALLBACK
+    op op_monitor_enter FALLBACK
+    op op_monitor_exit FALLBACK
+    op op_check_cast FALLBACK
+    op op_instance_of FALLBACK
+    op op_array_length FALLBACK
+    op op_new_instance FALLBACK
+    op op_new_array FALLBACK
+    op op_filled_new_array FALLBACK
+    op op_filled_new_array_range FALLBACK
+    op op_fill_array_data FALLBACK
+    op op_throw FALLBACK
+    op op_goto FALLBACK
+    op op_goto_16 FALLBACK
+    op op_goto_32 FALLBACK
+    op op_packed_switch FALLBACK
+    op op_sparse_switch FALLBACK
+    op op_cmpl_float FALLBACK
+    op op_cmpg_float FALLBACK
+    op op_cmpl_double FALLBACK
+    op op_cmpg_double FALLBACK
+    op op_cmp_long FALLBACK
+    op op_if_eq FALLBACK
+    op op_if_ne FALLBACK
+    op op_if_lt FALLBACK
+    op op_if_ge FALLBACK
+    op op_if_gt FALLBACK
+    op op_if_le FALLBACK
+    op op_if_eqz FALLBACK
+    op op_if_nez FALLBACK
+    op op_if_ltz FALLBACK
+    op op_if_gez FALLBACK
+    op op_if_gtz FALLBACK
+    op op_if_lez FALLBACK
+    op_unused_3e FALLBACK
+    op_unused_3f FALLBACK
+    op_unused_40 FALLBACK
+    op_unused_41 FALLBACK
+    op_unused_42 FALLBACK
+    op_unused_43 FALLBACK
+    op op_aget FALLBACK
+    op op_aget_wide FALLBACK
+    op op_aget_object FALLBACK
+    op op_aget_boolean FALLBACK
+    op op_aget_byte FALLBACK
+    op op_aget_char FALLBACK
+    op op_aget_short FALLBACK
+    op op_aput FALLBACK
+    op op_aput_wide FALLBACK
+    op op_aput_object FALLBACK
+    op op_aput_boolean FALLBACK
+    op op_aput_byte FALLBACK
+    op op_aput_char FALLBACK
+    op op_aput_short FALLBACK
+    op op_iget FALLBACK
+    op op_iget_wide FALLBACK
+    op op_iget_object FALLBACK
+    op op_iget_boolean FALLBACK
+    op op_iget_byte FALLBACK
+    op op_iget_char FALLBACK
+    op op_iget_short FALLBACK
+    op op_iput FALLBACK
+    op op_iput_wide FALLBACK
+    op op_iput_object FALLBACK
+    op op_iput_boolean FALLBACK
+    op op_iput_byte FALLBACK
+    op op_iput_char FALLBACK
+    op op_iput_short FALLBACK
+    op op_sget FALLBACK
+    op op_sget_wide FALLBACK
+    op op_sget_object FALLBACK
+    op op_sget_boolean FALLBACK
+    op op_sget_byte FALLBACK
+    op op_sget_char FALLBACK
+    op op_sget_short FALLBACK
+    op op_sput FALLBACK
+    op op_sput_wide FALLBACK
+    op op_sput_object FALLBACK
+    op op_sput_boolean FALLBACK
+    op op_sput_byte FALLBACK
+    op op_sput_char FALLBACK
+    op op_sput_short FALLBACK
+    op op_invoke_virtual FALLBACK
+    op op_invoke_super FALLBACK
+    op op_invoke_direct FALLBACK
+    op op_invoke_static FALLBACK
+    op op_invoke_interface FALLBACK
+    op op_return_void_no_barrier FALLBACK
+    op op_invoke_virtual_range FALLBACK
+    op op_invoke_super_range FALLBACK
+    op op_invoke_direct_range FALLBACK
+    op op_invoke_static_range FALLBACK
+    op op_invoke_interface_range FALLBACK
+    op_unused_79 FALLBACK
+    op_unused_7a FALLBACK
+    op op_neg_int FALLBACK
+    op op_not_int FALLBACK
+    op op_neg_long FALLBACK
+    op op_not_long FALLBACK
+    op op_neg_float FALLBACK
+    op op_neg_double FALLBACK
+    op op_int_to_long FALLBACK
+    op op_int_to_float FALLBACK
+    op op_int_to_double FALLBACK
+    op op_long_to_int FALLBACK
+    op op_long_to_float FALLBACK
+    op op_long_to_double FALLBACK
+    op op_float_to_int FALLBACK
+    op op_float_to_long FALLBACK
+    op op_float_to_double FALLBACK
+    op op_double_to_int FALLBACK
+    op op_double_to_long FALLBACK
+    op op_double_to_float FALLBACK
+    op op_int_to_byte FALLBACK
+    op op_int_to_char FALLBACK
+    op op_int_to_short FALLBACK
+    op op_add_int FALLBACK
+    op op_sub_int FALLBACK
+    op op_mul_int FALLBACK
+    op op_div_int FALLBACK
+    op op_rem_int FALLBACK
+    op op_and_int FALLBACK
+    op op_or_int FALLBACK
+    op op_xor_int FALLBACK
+    op op_shl_int FALLBACK
+    op op_shr_int FALLBACK
+    op op_ushr_int FALLBACK
+    op op_add_long FALLBACK
+    op op_sub_long FALLBACK
+    op op_mul_long FALLBACK
+    op op_div_long FALLBACK
+    op op_rem_long FALLBACK
+    op op_and_long FALLBACK
+    op op_or_long FALLBACK
+    op op_xor_long FALLBACK
+    op op_shl_long FALLBACK
+    op op_shr_long FALLBACK
+    op op_ushr_long FALLBACK
+    op op_add_float FALLBACK
+    op op_sub_float FALLBACK
+    op op_mul_float FALLBACK
+    op op_div_float FALLBACK
+    op op_rem_float FALLBACK
+    op op_add_double FALLBACK
+    op op_sub_double FALLBACK
+    op op_mul_double FALLBACK
+    op op_div_double FALLBACK
+    op op_rem_double FALLBACK
+    op op_add_int_2addr FALLBACK
+    op op_sub_int_2addr FALLBACK
+    op op_mul_int_2addr FALLBACK
+    op op_div_int_2addr FALLBACK
+    op op_rem_int_2addr FALLBACK
+    op op_and_int_2addr FALLBACK
+    op op_or_int_2addr FALLBACK
+    op op_xor_int_2addr FALLBACK
+    op op_shl_int_2addr FALLBACK
+    op op_shr_int_2addr FALLBACK
+    op op_ushr_int_2addr FALLBACK
+    op op_add_long_2addr FALLBACK
+    op op_sub_long_2addr FALLBACK
+    op op_mul_long_2addr FALLBACK
+    op op_div_long_2addr FALLBACK
+    op op_rem_long_2addr FALLBACK
+    op op_and_long_2addr FALLBACK
+    op op_or_long_2addr FALLBACK
+    op op_xor_long_2addr FALLBACK
+    op op_shl_long_2addr FALLBACK
+    op op_shr_long_2addr FALLBACK
+    op op_ushr_long_2addr FALLBACK
+    op op_add_float_2addr FALLBACK
+    op op_sub_float_2addr FALLBACK
+    op op_mul_float_2addr FALLBACK
+    op op_div_float_2addr FALLBACK
+    op op_rem_float_2addr FALLBACK
+    op op_add_double_2addr FALLBACK
+    op op_sub_double_2addr FALLBACK
+    op op_mul_double_2addr FALLBACK
+    op op_div_double_2addr FALLBACK
+    op op_rem_double_2addr FALLBACK
+    op op_add_int_lit16 FALLBACK
+    op op_rsub_int FALLBACK
+    op op_mul_int_lit16 FALLBACK
+    op op_div_int_lit16 FALLBACK
+    op op_rem_int_lit16 FALLBACK
+    op op_and_int_lit16 FALLBACK
+    op op_or_int_lit16 FALLBACK
+    op op_xor_int_lit16 FALLBACK
+    op op_add_int_lit8 FALLBACK
+    op op_rsub_int_lit8 FALLBACK
+    op op_mul_int_lit8 FALLBACK
+    op op_div_int_lit8 FALLBACK
+    op op_rem_int_lit8 FALLBACK
+    op op_and_int_lit8 FALLBACK
+    op op_or_int_lit8 FALLBACK
+    op op_xor_int_lit8 FALLBACK
+    op op_shl_int_lit8 FALLBACK
+    op op_shr_int_lit8 FALLBACK
+    op op_ushr_int_lit8 FALLBACK
+    op op_iget_quick FALLBACK
+    op op_iget_wide_quick FALLBACK
+    op op_iget_object_quick FALLBACK
+    op op_iput_quick FALLBACK
+    op op_iput_wide_quick FALLBACK
+    op op_iput_object_quick FALLBACK
+    op op_invoke_virtual_quick FALLBACK
+    op op_invoke_virtual_range_quick FALLBACK
+    op op_iput_boolean_quick FALLBACK
+    op op_iput_byte_quick FALLBACK
+    op op_iput_char_quick FALLBACK
+    op op_iput_short_quick FALLBACK
+    op op_iget_boolean_quick FALLBACK
+    op op_iget_byte_quick FALLBACK
+    op op_iget_char_quick FALLBACK
+    op op_iget_short_quick FALLBACK
+    op_unused_f3 FALLBACK
+    op_unused_f4 FALLBACK
+    op_unused_f5 FALLBACK
+    op_unused_f6 FALLBACK
+    op_unused_f7 FALLBACK
+    op_unused_f8 FALLBACK
+    op_unused_f9 FALLBACK
+    op_unused_fa FALLBACK
+    op_unused_fb FALLBACK
+    op_unused_fc FALLBACK
+    op_unused_fd FALLBACK
+    op_unused_fe FALLBACK
+    op_unused_ff FALLBACK
+op-end
+
+# common subroutines for asm
+import x86_64/footer.S
diff --git a/runtime/interpreter/mterp/gen_mterp.py b/runtime/interpreter/mterp/gen_mterp.py
new file mode 100755
index 0000000..f56d8bd
--- /dev/null
+++ b/runtime/interpreter/mterp/gen_mterp.py
@@ -0,0 +1,602 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Using instructions from an architecture-specific config file, generate C
+# and assembly source files for the Dalvik interpreter.
+#
+
+import sys, string, re, time
+from string import Template
+
+interp_defs_file = "../../dex_instruction_list.h" # need opcode list
+kNumPackedOpcodes = 256
+
+splitops = False
+verbose = False
+handler_size_bits = -1000
+handler_size_bytes = -1000
+in_op_start = 0             # 0=not started, 1=started, 2=ended
+in_alt_op_start = 0         # 0=not started, 1=started, 2=ended
+default_op_dir = None
+default_alt_stub = None
+opcode_locations = {}
+alt_opcode_locations = {}
+asm_stub_text = []
+fallback_stub_text = []
+label_prefix = ".L"         # use ".L" to hide labels from gdb
+alt_label_prefix = ".L_ALT" # use ".L" to hide labels from gdb
+style = None                # interpreter style
+generate_alt_table = False
+
+# Exception class.
+class DataParseError(SyntaxError):
+    "Failure when parsing data file"
+
+#
+# Set any omnipresent substitution values.
+#
+def getGlobalSubDict():
+    return { "handler_size_bits":handler_size_bits,
+             "handler_size_bytes":handler_size_bytes }
+
+#
+# Parse arch config file --
+# Set interpreter style.
+#
+def setHandlerStyle(tokens):
+    global style
+    if len(tokens) != 2:
+        raise DataParseError("handler-style requires one argument")
+    style = tokens[1]
+    if style != "computed-goto":
+        raise DataParseError("handler-style (%s) invalid" % style)
+
+#
+# Parse arch config file --
+# Set handler_size_bytes to the value of tokens[1], and handler_size_bits to
+# log2(handler_size_bytes).  Throws an exception if "bytes" is not 0 or
+# a power of two.
+#
+def setHandlerSize(tokens):
+    global handler_size_bits, handler_size_bytes
+    if style != "computed-goto":
+        print "Warning: handler-size valid only for computed-goto interpreters"
+    if len(tokens) != 2:
+        raise DataParseError("handler-size requires one argument")
+    if handler_size_bits != -1000:
+        raise DataParseError("handler-size may only be set once")
+
+    # compute log2(n), and make sure n is 0 or a power of 2
+    handler_size_bytes = bytes = int(tokens[1])
+    bits = -1
+    while bytes > 0:
+        bytes //= 2     # halve with truncating division
+        bits += 1
+
+    if handler_size_bytes == 0 or handler_size_bytes != (1 << bits):
+        raise DataParseError("handler-size (%d) must be power of 2" \
+                % orig_bytes)
+    handler_size_bits = bits
+
+#
+# Parse arch config file --
+# Copy a file in to asm output file.
+#
+def importFile(tokens):
+    if len(tokens) != 2:
+        raise DataParseError("import requires one argument")
+    source = tokens[1]
+    if source.endswith(".S"):
+        appendSourceFile(tokens[1], getGlobalSubDict(), asm_fp, None)
+    else:
+        raise DataParseError("don't know how to import %s (expecting .cpp/.S)"
+                % source)
+
+#
+# Parse arch config file --
+# Copy a file in to the C or asm output file.
+#
+def setAsmStub(tokens):
+    global asm_stub_text
+    if len(tokens) != 2:
+        raise DataParseError("import requires one argument")
+    try:
+        stub_fp = open(tokens[1])
+        asm_stub_text = stub_fp.readlines()
+    except IOError, err:
+        stub_fp.close()
+        raise DataParseError("unable to load asm-stub: %s" % str(err))
+    stub_fp.close()
+
+#
+# Parse arch config file --
+# Copy a file in to the C or asm output file.
+#
+def setFallbackStub(tokens):
+    global fallback_stub_text
+    if len(tokens) != 2:
+        raise DataParseError("import requires one argument")
+    try:
+        stub_fp = open(tokens[1])
+        fallback_stub_text = stub_fp.readlines()
+    except IOError, err:
+        stub_fp.close()
+        raise DataParseError("unable to load fallback-stub: %s" % str(err))
+    stub_fp.close()
+#
+# Parse arch config file --
+# Record location of default alt stub
+#
+def setAsmAltStub(tokens):
+    global default_alt_stub, generate_alt_table
+    if len(tokens) != 2:
+        raise DataParseError("import requires one argument")
+    default_alt_stub = tokens[1]
+    generate_alt_table = True
+
+#
+# Parse arch config file --
+# Start of opcode list.
+#
+def opStart(tokens):
+    global in_op_start
+    global default_op_dir
+    if len(tokens) != 2:
+        raise DataParseError("opStart takes a directory name argument")
+    if in_op_start != 0:
+        raise DataParseError("opStart can only be specified once")
+    default_op_dir = tokens[1]
+    in_op_start = 1
+
+#
+# Parse arch config file --
+# Set location of a single alt opcode's source file.
+#
+def altEntry(tokens):
+    global generate_alt_table
+    if len(tokens) != 3:
+        raise DataParseError("alt requires exactly two arguments")
+    if in_op_start != 1:
+        raise DataParseError("alt statements must be between opStart/opEnd")
+    try:
+        index = opcodes.index(tokens[1])
+    except ValueError:
+        raise DataParseError("unknown opcode %s" % tokens[1])
+    if alt_opcode_locations.has_key(tokens[1]):
+        print "Note: alt overrides earlier %s (%s -> %s)" \
+                % (tokens[1], alt_opcode_locations[tokens[1]], tokens[2])
+    alt_opcode_locations[tokens[1]] = tokens[2]
+    generate_alt_table = True
+
+#
+# Parse arch config file --
+# Set location of a single opcode's source file.
+#
+def opEntry(tokens):
+    #global opcode_locations
+    if len(tokens) != 3:
+        raise DataParseError("op requires exactly two arguments")
+    if in_op_start != 1:
+        raise DataParseError("op statements must be between opStart/opEnd")
+    try:
+        index = opcodes.index(tokens[1])
+    except ValueError:
+        raise DataParseError("unknown opcode %s" % tokens[1])
+    if opcode_locations.has_key(tokens[1]):
+        print "Note: op overrides earlier %s (%s -> %s)" \
+                % (tokens[1], opcode_locations[tokens[1]], tokens[2])
+    opcode_locations[tokens[1]] = tokens[2]
+
+#
+# Parse arch config file --
+# End of opcode list; emit instruction blocks.
+#
+def opEnd(tokens):
+    global in_op_start
+    if len(tokens) != 1:
+        raise DataParseError("opEnd takes no arguments")
+    if in_op_start != 1:
+        raise DataParseError("opEnd must follow opStart, and only appear once")
+    in_op_start = 2
+
+    loadAndEmitOpcodes()
+    if splitops == False:
+        if generate_alt_table:
+            loadAndEmitAltOpcodes()
+
+def genaltop(tokens):
+    if in_op_start != 2:
+       raise DataParseError("alt-op can be specified only after op-end")
+    if len(tokens) != 1:
+        raise DataParseError("opEnd takes no arguments")
+    if generate_alt_table:
+        loadAndEmitAltOpcodes()
+
+#
+# Extract an ordered list of instructions from the VM sources.  We use the
+# "goto table" definition macro, which has exactly kNumPackedOpcodes
+# entries.
+#
+def getOpcodeList():
+    opcodes = []
+    opcode_fp = open(interp_defs_file)
+    opcode_re = re.compile(r"^\s*V\((....), (\w+),.*", re.DOTALL)
+    for line in opcode_fp:
+        match = opcode_re.match(line)
+        if not match:
+            continue
+        opcodes.append("op_" + match.group(2).lower())
+    opcode_fp.close()
+
+    if len(opcodes) != kNumPackedOpcodes:
+        print "ERROR: found %d opcodes in Interp.h (expected %d)" \
+                % (len(opcodes), kNumPackedOpcodes)
+        raise SyntaxError, "bad opcode count"
+    return opcodes
+
+def emitAlign():
+    if style == "computed-goto":
+        asm_fp.write("    .balign %d\n" % handler_size_bytes)
+
+#
+# Load and emit opcodes for all kNumPackedOpcodes instructions.
+#
+def loadAndEmitOpcodes():
+    sister_list = []
+    assert len(opcodes) == kNumPackedOpcodes
+    need_dummy_start = False
+    start_label = "artMterpAsmInstructionStart"
+    end_label = "artMterpAsmInstructionEnd"
+
+    # point MterpAsmInstructionStart at the first handler or stub
+    asm_fp.write("\n    .global %s\n" % start_label)
+    asm_fp.write("    .type   %s, %%function\n" % start_label)
+    asm_fp.write("%s = " % start_label + label_prefix + "_op_nop\n")
+    asm_fp.write("    .text\n\n")
+
+    for i in xrange(kNumPackedOpcodes):
+        op = opcodes[i]
+
+        if opcode_locations.has_key(op):
+            location = opcode_locations[op]
+        else:
+            location = default_op_dir
+
+        if location == "FALLBACK":
+            emitFallback(i)
+        else:
+            loadAndEmitAsm(location, i, sister_list)
+
+    # For a 100% C implementation, there are no asm handlers or stubs.  We
+    # need to have the MterpAsmInstructionStart label point at op_nop, and it's
+    # too annoying to try to slide it in after the alignment psuedo-op, so
+    # we take the low road and just emit a dummy op_nop here.
+    if need_dummy_start:
+        emitAlign()
+        asm_fp.write(label_prefix + "_op_nop:   /* dummy */\n");
+
+    emitAlign()
+    asm_fp.write("    .size   %s, .-%s\n" % (start_label, start_label))
+    asm_fp.write("    .global %s\n" % end_label)
+    asm_fp.write("%s:\n" % end_label)
+
+    if style == "computed-goto":
+        emitSectionComment("Sister implementations", asm_fp)
+        asm_fp.write("    .global artMterpAsmSisterStart\n")
+        asm_fp.write("    .type   artMterpAsmSisterStart, %function\n")
+        asm_fp.write("    .text\n")
+        asm_fp.write("    .balign 4\n")
+        asm_fp.write("artMterpAsmSisterStart:\n")
+        asm_fp.writelines(sister_list)
+        asm_fp.write("\n    .size   artMterpAsmSisterStart, .-artMterpAsmSisterStart\n")
+        asm_fp.write("    .global artMterpAsmSisterEnd\n")
+        asm_fp.write("artMterpAsmSisterEnd:\n\n")
+
+#
+# Load an alternate entry stub
+#
+def loadAndEmitAltStub(source, opindex):
+    op = opcodes[opindex]
+    if verbose:
+        print " alt emit %s --> stub" % source
+    dict = getGlobalSubDict()
+    dict.update({ "opcode":op, "opnum":opindex })
+
+    emitAsmHeader(asm_fp, dict, alt_label_prefix)
+    appendSourceFile(source, dict, asm_fp, None)
+
+#
+# Load and emit alternate opcodes for all kNumPackedOpcodes instructions.
+#
+def loadAndEmitAltOpcodes():
+    assert len(opcodes) == kNumPackedOpcodes
+    start_label = "artMterpAsmAltInstructionStart"
+    end_label = "artMterpAsmAltInstructionEnd"
+
+    # point MterpAsmInstructionStart at the first handler or stub
+    asm_fp.write("\n    .global %s\n" % start_label)
+    asm_fp.write("    .type   %s, %%function\n" % start_label)
+    asm_fp.write("    .text\n\n")
+    asm_fp.write("%s = " % start_label + label_prefix + "_ALT_op_nop\n")
+
+    for i in xrange(kNumPackedOpcodes):
+        op = opcodes[i]
+        if alt_opcode_locations.has_key(op):
+            source = "%s/alt_%s.S" % (alt_opcode_locations[op], op)
+        else:
+            source = default_alt_stub
+        loadAndEmitAltStub(source, i)
+
+    emitAlign()
+    asm_fp.write("    .size   %s, .-%s\n" % (start_label, start_label))
+    asm_fp.write("    .global %s\n" % end_label)
+    asm_fp.write("%s:\n" % end_label)
+
+#
+# Load an assembly fragment and emit it.
+#
+def loadAndEmitAsm(location, opindex, sister_list):
+    op = opcodes[opindex]
+    source = "%s/%s.S" % (location, op)
+    dict = getGlobalSubDict()
+    dict.update({ "opcode":op, "opnum":opindex })
+    if verbose:
+        print " emit %s --> asm" % source
+
+    emitAsmHeader(asm_fp, dict, label_prefix)
+    appendSourceFile(source, dict, asm_fp, sister_list)
+
+#
+# Emit fallback fragment
+#
+def emitFallback(opindex):
+    op = opcodes[opindex]
+    dict = getGlobalSubDict()
+    dict.update({ "opcode":op, "opnum":opindex })
+    emitAsmHeader(asm_fp, dict, label_prefix)
+    for line in fallback_stub_text:
+        asm_fp.write(line)
+    asm_fp.write("\n")
+
+#
+# Output the alignment directive and label for an assembly piece.
+#
+def emitAsmHeader(outfp, dict, prefix):
+    outfp.write("/* ------------------------------ */\n")
+    # The alignment directive ensures that the handler occupies
+    # at least the correct amount of space.  We don't try to deal
+    # with overflow here.
+    emitAlign()
+    # Emit a label so that gdb will say the right thing.  We prepend an
+    # underscore so the symbol name doesn't clash with the Opcode enum.
+    outfp.write(prefix + "_%(opcode)s: /* 0x%(opnum)02x */\n" % dict)
+
+#
+# Output a generic instruction stub that updates the "glue" struct and
+# calls the C implementation.
+#
+def emitAsmStub(outfp, dict):
+    emitAsmHeader(outfp, dict, label_prefix)
+    for line in asm_stub_text:
+        templ = Template(line)
+        outfp.write(templ.substitute(dict))
+
+#
+# Append the file specified by "source" to the open "outfp".  Each line will
+# be template-replaced using the substitution dictionary "dict".
+#
+# If the first line of the file starts with "%" it is taken as a directive.
+# A "%include" line contains a filename and, optionally, a Python-style
+# dictionary declaration with substitution strings.  (This is implemented
+# with recursion.)
+#
+# If "sister_list" is provided, and we find a line that contains only "&",
+# all subsequent lines from the file will be appended to sister_list instead
+# of copied to the output.
+#
+# This may modify "dict".
+#
+def appendSourceFile(source, dict, outfp, sister_list):
+    outfp.write("/* File: %s */\n" % source)
+    infp = open(source, "r")
+    in_sister = False
+    for line in infp:
+        if line.startswith("%include"):
+            # Parse the "include" line
+            tokens = line.strip().split(' ', 2)
+            if len(tokens) < 2:
+                raise DataParseError("malformed %%include in %s" % source)
+
+            alt_source = tokens[1].strip("\"")
+            if alt_source == source:
+                raise DataParseError("self-referential %%include in %s"
+                        % source)
+
+            new_dict = dict.copy()
+            if len(tokens) == 3:
+                new_dict.update(eval(tokens[2]))
+            #print " including src=%s dict=%s" % (alt_source, new_dict)
+            appendSourceFile(alt_source, new_dict, outfp, sister_list)
+            continue
+
+        elif line.startswith("%default"):
+            # copy keywords into dictionary
+            tokens = line.strip().split(' ', 1)
+            if len(tokens) < 2:
+                raise DataParseError("malformed %%default in %s" % source)
+            defaultValues = eval(tokens[1])
+            for entry in defaultValues:
+                dict.setdefault(entry, defaultValues[entry])
+            continue
+
+        elif line.startswith("%break") and sister_list != None:
+            # allow more than one %break, ignoring all following the first
+            if style == "computed-goto" and not in_sister:
+                in_sister = True
+                sister_list.append("\n/* continuation for %(opcode)s */\n"%dict)
+            continue
+
+        # perform keyword substitution if a dictionary was provided
+        if dict != None:
+            templ = Template(line)
+            try:
+                subline = templ.substitute(dict)
+            except KeyError, err:
+                raise DataParseError("keyword substitution failed in %s: %s"
+                        % (source, str(err)))
+            except:
+                print "ERROR: substitution failed: " + line
+                raise
+        else:
+            subline = line
+
+        # write output to appropriate file
+        if in_sister:
+            sister_list.append(subline)
+        else:
+            outfp.write(subline)
+    outfp.write("\n")
+    infp.close()
+
+#
+# Emit a C-style section header comment.
+#
+def emitSectionComment(str, fp):
+    equals = "========================================" \
+             "==================================="
+
+    fp.write("\n/*\n * %s\n *  %s\n * %s\n */\n" %
+        (equals, str, equals))
+
+
+#
+# ===========================================================================
+# "main" code
+#
+
+#
+# Check args.
+#
+if len(sys.argv) != 3:
+    print "Usage: %s target-arch output-dir" % sys.argv[0]
+    sys.exit(2)
+
+target_arch = sys.argv[1]
+output_dir = sys.argv[2]
+
+#
+# Extract opcode list.
+#
+opcodes = getOpcodeList()
+#for op in opcodes:
+#    print "  %s" % op
+
+#
+# Open config file.
+#
+try:
+    config_fp = open("config_%s" % target_arch)
+except:
+    print "Unable to open config file 'config_%s'" % target_arch
+    sys.exit(1)
+
+#
+# Open and prepare output files.
+#
+try:
+    asm_fp = open("%s/mterp_%s.S" % (output_dir, target_arch), "w")
+except:
+    print "Unable to open output files"
+    print "Make sure directory '%s' exists and existing files are writable" \
+            % output_dir
+    # Ideally we'd remove the files to avoid confusing "make", but if they
+    # failed to open we probably won't be able to remove them either.
+    sys.exit(1)
+
+print "Generating %s" % (asm_fp.name)
+
+file_header = """/*
+ * This file was generated automatically by gen-mterp.py for '%s'.
+ *
+ * --> DO NOT EDIT <--
+ */
+
+""" % (target_arch)
+
+asm_fp.write(file_header)
+
+#
+# Process the config file.
+#
+failed = False
+try:
+    for line in config_fp:
+        line = line.strip()         # remove CRLF, leading spaces
+        tokens = line.split(' ')    # tokenize
+        #print "%d: %s" % (len(tokens), tokens)
+        if len(tokens[0]) == 0:
+            #print "  blank"
+            pass
+        elif tokens[0][0] == '#':
+            #print "  comment"
+            pass
+        else:
+            if tokens[0] == "handler-size":
+                setHandlerSize(tokens)
+            elif tokens[0] == "import":
+                importFile(tokens)
+            elif tokens[0] == "asm-stub":
+                setAsmStub(tokens)
+            elif tokens[0] == "asm-alt-stub":
+                setAsmAltStub(tokens)
+            elif tokens[0] == "op-start":
+                opStart(tokens)
+            elif tokens[0] == "op-end":
+                opEnd(tokens)
+            elif tokens[0] == "alt":
+                altEntry(tokens)
+            elif tokens[0] == "op":
+                opEntry(tokens)
+            elif tokens[0] == "handler-style":
+                setHandlerStyle(tokens)
+            elif tokens[0] == "alt-ops":
+                genaltop(tokens)
+            elif tokens[0] == "split-ops":
+                splitops = True
+            elif tokens[0] == "fallback-stub":
+               setFallbackStub(tokens)
+            else:
+                raise DataParseError, "unrecognized command '%s'" % tokens[0]
+            if style == None:
+                print "tokens[0] = %s" % tokens[0]
+                raise DataParseError, "handler-style must be first command"
+except DataParseError, err:
+    print "Failed: " + str(err)
+    # TODO: remove output files so "make" doesn't get confused
+    failed = True
+    asm_fp.close()
+    asm_fp = None
+
+config_fp.close()
+
+#
+# Done!
+#
+if asm_fp:
+    asm_fp.close()
+
+sys.exit(failed)
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
new file mode 100644
index 0000000..9975458
--- /dev/null
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -0,0 +1,620 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Mterp entry point and support functions.
+ */
+#include "interpreter/interpreter_common.h"
+#include "entrypoints/entrypoint_utils-inl.h"
+#include "mterp.h"
+
+namespace art {
+namespace interpreter {
+/*
+ * Verify some constants used by the mterp interpreter.
+ */
+void CheckMterpAsmConstants() {
+  /*
+   * If we're using computed goto instruction transitions, make sure
+   * none of the handlers overflows the 128-byte limit.  This won't tell
+   * which one did, but if any one is too big the total size will
+   * overflow.
+   */
+  const int width = 128;
+  int interp_size = (uintptr_t) artMterpAsmInstructionEnd -
+                    (uintptr_t) artMterpAsmInstructionStart;
+  if ((interp_size == 0) || (interp_size != (art::kNumPackedOpcodes * width))) {
+      LOG(art::FATAL) << "ERROR: unexpected asm interp size " << interp_size
+                      << "(did an instruction handler exceed " << width << " bytes?)";
+  }
+}
+
+void InitMterpTls(Thread* self) {
+  self->SetMterpDefaultIBase(artMterpAsmInstructionStart);
+  self->SetMterpAltIBase(artMterpAsmAltInstructionStart);
+  self->SetMterpCurrentIBase(artMterpAsmInstructionStart);
+}
+
+/*
+ * Find the matching case.  Returns the offset to the handler instructions.
+ *
+ * Returns 3 if we don't find a match (it's the size of the sparse-switch
+ * instruction).
+ */
+extern "C" int32_t MterpDoSparseSwitch(const uint16_t* switchData, int32_t testVal) {
+  const int kInstrLen = 3;
+  uint16_t size;
+  const int32_t* keys;
+  const int32_t* entries;
+
+  /*
+   * Sparse switch data format:
+   *  ushort ident = 0x0200   magic value
+   *  ushort size             number of entries in the table; > 0
+   *  int keys[size]          keys, sorted low-to-high; 32-bit aligned
+   *  int targets[size]       branch targets, relative to switch opcode
+   *
+   * Total size is (2+size*4) 16-bit code units.
+   */
+
+  uint16_t signature = *switchData++;
+  DCHECK_EQ(signature, static_cast<uint16_t>(art::Instruction::kSparseSwitchSignature));
+
+  size = *switchData++;
+
+  /* The keys are guaranteed to be aligned on a 32-bit boundary;
+   * we can treat them as a native int array.
+   */
+  keys = reinterpret_cast<const int32_t*>(switchData);
+
+  /* The entries are guaranteed to be aligned on a 32-bit boundary;
+   * we can treat them as a native int array.
+   */
+  entries = keys + size;
+
+  /*
+   * Binary-search through the array of keys, which are guaranteed to
+   * be sorted low-to-high.
+   */
+  int lo = 0;
+  int hi = size - 1;
+  while (lo <= hi) {
+    int mid = (lo + hi) >> 1;
+
+    int32_t foundVal = keys[mid];
+    if (testVal < foundVal) {
+      hi = mid - 1;
+    } else if (testVal > foundVal) {
+      lo = mid + 1;
+    } else {
+      return entries[mid];
+    }
+  }
+  return kInstrLen;
+}
+
+extern "C" int32_t MterpDoPackedSwitch(const uint16_t* switchData, int32_t testVal) {
+  const int kInstrLen = 3;
+
+  /*
+   * Packed switch data format:
+   *  ushort ident = 0x0100   magic value
+   *  ushort size             number of entries in the table
+   *  int first_key           first (and lowest) switch case value
+   *  int targets[size]       branch targets, relative to switch opcode
+   *
+   * Total size is (4+size*2) 16-bit code units.
+   */
+  uint16_t signature = *switchData++;
+  DCHECK_EQ(signature, static_cast<uint16_t>(art::Instruction::kPackedSwitchSignature));
+
+  uint16_t size = *switchData++;
+
+  int32_t firstKey = *switchData++;
+  firstKey |= (*switchData++) << 16;
+
+  int index = testVal - firstKey;
+  if (index < 0 || index >= size) {
+    return kInstrLen;
+  }
+
+  /*
+   * The entries are guaranteed to be aligned on a 32-bit boundary;
+   * we can treat them as a native int array.
+   */
+  const int32_t* entries = reinterpret_cast<const int32_t*>(switchData);
+  return entries[index];
+}
+
+
+extern "C" bool MterpInvokeVirtual(Thread* self, ShadowFrame* shadow_frame,
+                                   uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kVirtual, false, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeSuper(Thread* self, ShadowFrame* shadow_frame,
+                                 uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kSuper, false, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeInterface(Thread* self, ShadowFrame* shadow_frame,
+                                     uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kInterface, false, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeDirect(Thread* self, ShadowFrame* shadow_frame,
+                                  uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kDirect, false, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeStatic(Thread* self, ShadowFrame* shadow_frame,
+                                  uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kStatic, false, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeVirtualRange(Thread* self, ShadowFrame* shadow_frame,
+                                        uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kVirtual, true, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeSuperRange(Thread* self, ShadowFrame* shadow_frame,
+                                      uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kSuper, true, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeInterfaceRange(Thread* self, ShadowFrame* shadow_frame,
+                                          uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kInterface, true, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeDirectRange(Thread* self, ShadowFrame* shadow_frame,
+                                       uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kDirect, true, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeStaticRange(Thread* self, ShadowFrame* shadow_frame,
+                                       uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kStatic, true, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeVirtualQuick(Thread* self, ShadowFrame* shadow_frame,
+                                        uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvokeVirtualQuick<false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeVirtualQuickRange(Thread* self, ShadowFrame* shadow_frame,
+                                             uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvokeVirtualQuick<true>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" void MterpThreadFenceForConstructor() {
+  QuasiAtomic::ThreadFenceForConstructor();
+}
+
+extern "C" bool MterpConstString(uint32_t index, uint32_t tgt_vreg, ShadowFrame* shadow_frame,
+                                 Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  String* s = ResolveString(self, *shadow_frame,  index);
+  if (UNLIKELY(s == nullptr)) {
+    return true;
+  }
+  shadow_frame->SetVRegReference(tgt_vreg, s);
+  return false;
+}
+
+extern "C" bool MterpConstClass(uint32_t index, uint32_t tgt_vreg, ShadowFrame* shadow_frame,
+                                Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  Class* c = ResolveVerifyAndClinit(index, shadow_frame->GetMethod(), self, false, false);
+  if (UNLIKELY(c == nullptr)) {
+    return true;
+  }
+  shadow_frame->SetVRegReference(tgt_vreg, c);
+  return false;
+}
+
+extern "C" bool MterpCheckCast(uint32_t index, Object* obj, art::ArtMethod* method,
+                               Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  Class* c = ResolveVerifyAndClinit(index, method, self, false, false);
+  if (UNLIKELY(c == nullptr)) {
+    return true;
+  }
+  if (UNLIKELY(obj != nullptr && !obj->InstanceOf(c))) {
+    ThrowClassCastException(c, obj->GetClass());
+    return true;
+  }
+  return false;
+}
+
+extern "C" bool MterpInstanceOf(uint32_t index, Object* obj, art::ArtMethod* method,
+                                Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  Class* c = ResolveVerifyAndClinit(index, method, self, false, false);
+  if (UNLIKELY(c == nullptr)) {
+    return false;  // Caller will check for pending exception.  Return value unimportant.
+  }
+  return (obj != nullptr) && obj->InstanceOf(c);
+}
+
+extern "C" bool MterpFillArrayData(Object* obj, const Instruction::ArrayDataPayload* payload)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  return FillArrayData(obj, payload);
+}
+
+extern "C" bool MterpNewInstance(ShadowFrame* shadow_frame, Thread* self, uint32_t inst_data)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  Object* obj = nullptr;
+  Class* c = ResolveVerifyAndClinit(inst->VRegB_21c(), shadow_frame->GetMethod(),
+                                    self, false, false);
+  if (LIKELY(c != nullptr)) {
+    if (UNLIKELY(c->IsStringClass())) {
+      gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+      mirror::SetStringCountVisitor visitor(0);
+      obj = String::Alloc<true>(self, 0, allocator_type, visitor);
+    } else {
+      obj = AllocObjectFromCode<false, true>(
+        inst->VRegB_21c(), shadow_frame->GetMethod(), self,
+        Runtime::Current()->GetHeap()->GetCurrentAllocator());
+    }
+  }
+  if (UNLIKELY(obj == nullptr)) {
+    return false;
+  }
+  obj->GetClass()->AssertInitializedOrInitializingInThread(self);
+  shadow_frame->SetVRegReference(inst->VRegA_21c(inst_data), obj);
+  return true;
+}
+
+extern "C" bool MterpSputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
+                                uint32_t inst_data, Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoFieldPut<StaticObjectWrite, Primitive::kPrimNot, false, false>
+      (self, *shadow_frame, inst, inst_data);
+}
+
+extern "C" bool MterpIputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
+                                uint32_t inst_data, Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoFieldPut<InstanceObjectWrite, Primitive::kPrimNot, false, false>
+      (self, *shadow_frame, inst, inst_data);
+}
+
+extern "C" bool MterpIputObjectQuick(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
+                                     uint32_t inst_data)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoIPutQuick<Primitive::kPrimNot, false>(*shadow_frame, inst, inst_data);
+}
+
+extern "C" bool MterpAputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
+                                uint32_t inst_data)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  Object* a = shadow_frame->GetVRegReference(inst->VRegB_23x());
+  if (UNLIKELY(a == nullptr)) {
+    return false;
+  }
+  int32_t index = shadow_frame->GetVReg(inst->VRegC_23x());
+  Object* val = shadow_frame->GetVRegReference(inst->VRegA_23x(inst_data));
+  ObjectArray<Object>* array = a->AsObjectArray<Object>();
+  if (array->CheckIsValidIndex(index) && array->CheckAssignable(val)) {
+    array->SetWithoutChecks<false>(index, val);
+    return true;
+  }
+  return false;
+}
+
+extern "C" bool MterpFilledNewArray(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
+                                    Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoFilledNewArray<false, false, false>(inst, *shadow_frame, self,
+                                               shadow_frame->GetResultRegister());
+}
+
+extern "C" bool MterpFilledNewArrayRange(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
+                                         Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoFilledNewArray<true, false, false>(inst, *shadow_frame, self,
+                                              shadow_frame->GetResultRegister());
+}
+
+extern "C" bool MterpNewArray(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
+                              uint32_t inst_data, Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  int32_t length = shadow_frame->GetVReg(inst->VRegB_22c(inst_data));
+  Object* obj = AllocArrayFromCode<false, true>(
+      inst->VRegC_22c(), length, shadow_frame->GetMethod(), self,
+      Runtime::Current()->GetHeap()->GetCurrentAllocator());
+  if (UNLIKELY(obj == nullptr)) {
+      return false;
+  }
+  shadow_frame->SetVRegReference(inst->VRegA_22c(inst_data), obj);
+  return true;
+}
+
+extern "C" bool MterpHandleException(Thread* self, ShadowFrame* shadow_frame)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  DCHECK(self->IsExceptionPending());
+  const instrumentation::Instrumentation* const instrumentation =
+      Runtime::Current()->GetInstrumentation();
+  uint32_t found_dex_pc = FindNextInstructionFollowingException(self, *shadow_frame,
+                                                                shadow_frame->GetDexPC(),
+                                                                instrumentation);
+  if (found_dex_pc == DexFile::kDexNoIndex) {
+    return false;
+  }
+  // OK - we can deal with it.  Update and continue.
+  shadow_frame->SetDexPC(found_dex_pc);
+  return true;
+}
+
+extern "C" void MterpCheckBefore(Thread* self, ShadowFrame* shadow_frame)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  if (inst->Opcode(inst_data) == Instruction::MOVE_EXCEPTION) {
+    self->AssertPendingException();
+  } else {
+    self->AssertNoPendingException();
+  }
+}
+
+extern "C" void MterpLogDivideByZeroException(Thread* self, ShadowFrame* shadow_frame)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "DivideByZero: " << inst->Opcode(inst_data);
+}
+
+extern "C" void MterpLogArrayIndexException(Thread* self, ShadowFrame* shadow_frame)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "ArrayIndex: " << inst->Opcode(inst_data);
+}
+
+extern "C" void MterpLogNegativeArraySizeException(Thread* self, ShadowFrame* shadow_frame)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "NegativeArraySize: " << inst->Opcode(inst_data);
+}
+
+extern "C" void MterpLogNoSuchMethodException(Thread* self, ShadowFrame* shadow_frame)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "NoSuchMethod: " << inst->Opcode(inst_data);
+}
+
+extern "C" void MterpLogExceptionThrownException(Thread* self, ShadowFrame* shadow_frame)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "ExceptionThrown: " << inst->Opcode(inst_data);
+}
+
+extern "C" void MterpLogNullObjectException(Thread* self, ShadowFrame* shadow_frame)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "NullObject: " << inst->Opcode(inst_data);
+}
+
+extern "C" void MterpLogFallback(Thread* self, ShadowFrame* shadow_frame)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "Fallback: " << inst->Opcode(inst_data) << ", Suspend Pending?: "
+            << self->IsExceptionPending();
+}
+
+extern "C" void MterpLogSuspendFallback(Thread* self, ShadowFrame* shadow_frame, uint32_t flags)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  if (flags & kCheckpointRequest) {
+    LOG(INFO) << "Checkpoint fallback: " << inst->Opcode(inst_data);
+  } else if (flags & kSuspendRequest) {
+    LOG(INFO) << "Suspend fallback: " << inst->Opcode(inst_data);
+  }
+}
+
+extern "C" void MterpSuspendCheck(Thread* self)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  self->AllowThreadSuspension();
+}
+
+extern "C" int artSet64IndirectStaticFromMterp(uint32_t field_idx, ArtMethod* referrer,
+                                               uint64_t* new_value, Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
+  ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite,
+                                          sizeof(int64_t));
+  if (LIKELY(field != nullptr)) {
+    // Compiled code can't use transactional mode.
+    field->Set64<false>(field->GetDeclaringClass(), *new_value);
+    return 0;  // success
+  }
+  field = FindFieldFromCode<StaticPrimitiveWrite, true>(field_idx, referrer, self, sizeof(int64_t));
+  if (LIKELY(field != nullptr)) {
+    // Compiled code can't use transactional mode.
+    field->Set64<false>(field->GetDeclaringClass(), *new_value);
+    return 0;  // success
+  }
+  return -1;  // failure
+}
+
+extern "C" int artSet8InstanceFromMterp(uint32_t field_idx, mirror::Object* obj, uint8_t new_value,
+                                        ArtMethod* referrer)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
+                                          sizeof(int8_t));
+  if (LIKELY(field != nullptr && obj != nullptr)) {
+    Primitive::Type type = field->GetTypeAsPrimitiveType();
+    if (type == Primitive::kPrimBoolean) {
+      field->SetBoolean<false>(obj, new_value);
+    } else {
+      DCHECK_EQ(Primitive::kPrimByte, type);
+      field->SetByte<false>(obj, new_value);
+    }
+    return 0;  // success
+  }
+  return -1;  // failure
+}
+
+extern "C" int artSet16InstanceFromMterp(uint32_t field_idx, mirror::Object* obj, uint16_t new_value,
+                                        ArtMethod* referrer)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
+                                          sizeof(int16_t));
+  if (LIKELY(field != nullptr && obj != nullptr)) {
+    Primitive::Type type = field->GetTypeAsPrimitiveType();
+    if (type == Primitive::kPrimChar) {
+      field->SetChar<false>(obj, new_value);
+    } else {
+      DCHECK_EQ(Primitive::kPrimShort, type);
+      field->SetShort<false>(obj, new_value);
+    }
+    return 0;  // success
+  }
+  return -1;  // failure
+}
+
+extern "C" int artSet32InstanceFromMterp(uint32_t field_idx, mirror::Object* obj,
+                                         uint32_t new_value, ArtMethod* referrer)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
+                                          sizeof(int32_t));
+  if (LIKELY(field != nullptr && obj != nullptr)) {
+    field->Set32<false>(obj, new_value);
+    return 0;  // success
+  }
+  return -1;  // failure
+}
+
+extern "C" int artSet64InstanceFromMterp(uint32_t field_idx, mirror::Object* obj,
+                                         uint64_t* new_value, ArtMethod* referrer)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
+                                          sizeof(int64_t));
+  if (LIKELY(field != nullptr  && obj != nullptr)) {
+    field->Set64<false>(obj, *new_value);
+    return 0;  // success
+  }
+  return -1;  // failure
+}
+
+extern "C" int artSetObjInstanceFromMterp(uint32_t field_idx, mirror::Object* obj,
+                                         mirror::Object* new_value, ArtMethod* referrer)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectWrite,
+                                          sizeof(mirror::HeapReference<mirror::Object>));
+  if (LIKELY(field != nullptr && obj != nullptr)) {
+    field->SetObj<false>(obj, new_value);
+    return 0;  // success
+  }
+  return -1;  // failure
+}
+
+extern "C" mirror::Object* artAGetObjectFromMterp(mirror::Object* arr, int32_t index)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (UNLIKELY(arr == nullptr)) {
+    ThrowNullPointerExceptionFromInterpreter();
+    return nullptr;
+  }
+  ObjectArray<Object>* array = arr->AsObjectArray<Object>();
+  if (LIKELY(array->CheckIsValidIndex(index))) {
+    return array->GetWithoutChecks(index);
+  } else {
+    return nullptr;
+  }
+}
+
+extern "C" mirror::Object* artIGetObjectFromMterp(mirror::Object* obj, uint32_t field_offset)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (UNLIKELY(obj == nullptr)) {
+    ThrowNullPointerExceptionFromInterpreter();
+    return nullptr;
+  }
+  return obj->GetFieldObject<mirror::Object>(MemberOffset(field_offset));
+}
+
+}  // namespace interpreter
+}  // namespace art
diff --git a/runtime/interpreter/mterp/mterp.h b/runtime/interpreter/mterp/mterp.h
new file mode 100644
index 0000000..90d21e9
--- /dev/null
+++ b/runtime/interpreter/mterp/mterp.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_INTERPRETER_MTERP_MTERP_H_
+#define ART_RUNTIME_INTERPRETER_MTERP_MTERP_H_
+
+/*
+ * Mterp assembly handler bases
+ */
+extern "C" void* artMterpAsmInstructionStart[];
+extern "C" void* artMterpAsmInstructionEnd[];
+extern "C" void* artMterpAsmAltInstructionStart[];
+extern "C" void* artMterpAsmAltInstructionEnd[];
+
+namespace art {
+namespace interpreter {
+
+void InitMterpTls(Thread* self);
+void CheckMterpAsmConstants();
+
+}  // namespace interpreter
+}  // namespace art
+
+#endif  // ART_RUNTIME_INTERPRETER_MTERP_MTERP_H_
diff --git a/runtime/interpreter/mterp/mterp_stub.cc b/runtime/interpreter/mterp/mterp_stub.cc
new file mode 100644
index 0000000..7e7337e
--- /dev/null
+++ b/runtime/interpreter/mterp/mterp_stub.cc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "../interpreter_common.h"
+
+/*
+ * Stub definitions for targets without mterp implementations.
+ */
+
+namespace art {
+namespace interpreter {
+/*
+ * Call this during initialization to verify that the values in asm-constants.h
+ * are still correct.
+ */
+void CheckMterpAsmConstants() {
+  // Dummy version when mterp not implemented.
+}
+
+void InitMterpTls(Thread* self) {
+  self->SetMterpDefaultIBase(nullptr);
+  self->SetMterpCurrentIBase(nullptr);
+  self->SetMterpAltIBase(nullptr);
+}
+
+/*
+ * The platform-specific implementation must provide this.
+ */
+extern "C" bool ExecuteMterpImpl(Thread* self, const DexFile::CodeItem* code_item,
+                                 ShadowFrame* shadow_frame, JValue* result_register)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self); UNUSED(shadow_frame); UNUSED(code_item); UNUSED(result_register);
+  UNIMPLEMENTED(art::FATAL);
+  return false;
+}
+
+}  // namespace interpreter
+}  // namespace art
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
new file mode 100644
index 0000000..9ae98a2
--- /dev/null
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -0,0 +1,12201 @@
+/*
+ * This file was generated automatically by gen-mterp.py for 'arm'.
+ *
+ * --> DO NOT EDIT <--
+ */
+
+/* File: arm/header.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat rFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via rFP &
+  number_of_vregs_.
+
+ */
+
+/*
+ARM EABI general notes:
+
+r0-r3 hold first 4 args to a method; they are not preserved across method calls
+r4-r8 are available for general use
+r9 is given special treatment in some situations, but not for us
+r10 (sl) seems to be generally available
+r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
+r12 (ip) is scratch -- not preserved across method calls
+r13 (sp) should be managed carefully in case a signal arrives
+r14 (lr) must be preserved
+r15 (pc) can be tinkered with directly
+
+r0 holds returns of <= 4 bytes
+r0-r1 hold returns of 8 bytes, low word in r0
+
+Callee must save/restore r4+ (except r12) if it modifies them.  If VFP
+is present, registers s16-s31 (a/k/a d8-d15, a/k/a q4-q7) must be preserved,
+s0-s15 (d0-d7, q0-a3) do not need to be.
+
+Stack is "full descending".  Only the arguments that don't fit in the first 4
+registers are placed on the stack.  "sp" points at the first stacked argument
+(i.e. the 5th arg).
+
+VFP: single-precision results in s0, double-precision results in d0.
+
+In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
+64-bit quantities (long long, double) must be 64-bit aligned.
+*/
+
+/*
+Mterp and ARM notes:
+
+The following registers have fixed assignments:
+
+  reg nick      purpose
+  r4  rPC       interpreted program counter, used for fetching instructions
+  r5  rFP       interpreted frame pointer, used for accessing locals and args
+  r6  rSELF     self (Thread) pointer
+  r7  rINST     first 16-bit code unit of current instruction
+  r8  rIBASE    interpreted instruction base pointer, used for computed goto
+  r11 rREFS	base of object references in shadow frame  (ideally, we'll get rid of this later).
+
+Macros are provided for common operations.  Each macro MUST emit only
+one instruction to make instruction-counting easier.  They MUST NOT alter
+unspecified registers or condition codes.
+*/
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+/* During bringup, we'll use the shadow frame model instead of rFP */
+/* single-purpose registers, given names for clarity */
+#define rPC     r4
+#define rFP     r5
+#define rSELF   r6
+#define rINST   r7
+#define rIBASE  r8
+#define rREFS   r11
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
+/*
+ *
+ * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
+ * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
+ * mterp should do so as well.
+ */
+#define MTERP_SUSPEND 0
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    str  rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+.endm
+
+.macro EXPORT_DEX_PC tmp
+    ldr  \tmp, [rFP, #OFF_FP_CODE_ITEM]
+    str  rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+    add  \tmp, #CODEITEM_INSNS_OFFSET
+    sub  \tmp, rPC, \tmp
+    asr  \tmp, #1
+    str  \tmp, [rFP, #OFF_FP_DEX_PC]
+.endm
+
+/*
+ * Fetch the next instruction from rPC into rINST.  Does not advance rPC.
+ */
+.macro FETCH_INST
+    ldrh    rINST, [rPC]
+.endm
+
+/*
+ * Fetch the next instruction from the specified offset.  Advances rPC
+ * to point to the next instruction.  "_count" is in 16-bit code units.
+ *
+ * Because of the limited size of immediate constants on ARM, this is only
+ * suitable for small forward movements (i.e. don't try to implement "goto"
+ * with this).
+ *
+ * This must come AFTER anything that can throw an exception, or the
+ * exception catch may miss.  (This also implies that it must come after
+ * EXPORT_PC.)
+ */
+.macro FETCH_ADVANCE_INST count
+    ldrh    rINST, [rPC, #((\count)*2)]!
+.endm
+
+/*
+ * The operation performed here is similar to FETCH_ADVANCE_INST, except the
+ * src and dest registers are parameterized (not hard-wired to rPC and rINST).
+ */
+.macro PREFETCH_ADVANCE_INST dreg, sreg, count
+    ldrh    \dreg, [\sreg, #((\count)*2)]!
+.endm
+
+/*
+ * Similar to FETCH_ADVANCE_INST, but does not update rPC.  Used to load
+ * rINST ahead of possible exception point.  Be sure to manually advance rPC
+ * later.
+ */
+.macro PREFETCH_INST count
+    ldrh    rINST, [rPC, #((\count)*2)]
+.endm
+
+/* Advance rPC by some number of code units. */
+.macro ADVANCE count
+  add  rPC, #((\count)*2)
+.endm
+
+/*
+ * Fetch the next instruction from an offset specified by _reg.  Updates
+ * rPC to point to the next instruction.  "_reg" must specify the distance
+ * in bytes, *not* 16-bit code units, and may be a signed value.
+ *
+ * We want to write "ldrh rINST, [rPC, _reg, lsl #1]!", but some of the
+ * bits that hold the shift distance are used for the half/byte/sign flags.
+ * In some cases we can pre-double _reg for free, so we require a byte offset
+ * here.
+ */
+.macro FETCH_ADVANCE_INST_RB reg
+    ldrh    rINST, [rPC, \reg]!
+.endm
+
+/*
+ * Fetch a half-word code unit from an offset past the current PC.  The
+ * "_count" value is in 16-bit code units.  Does not advance rPC.
+ *
+ * The "_S" variant works the same but treats the value as signed.
+ */
+.macro FETCH reg, count
+    ldrh    \reg, [rPC, #((\count)*2)]
+.endm
+
+.macro FETCH_S reg, count
+    ldrsh   \reg, [rPC, #((\count)*2)]
+.endm
+
+/*
+ * Fetch one byte from an offset past the current PC.  Pass in the same
+ * "_count" as you would for FETCH, and an additional 0/1 indicating which
+ * byte of the halfword you want (lo/hi).
+ */
+.macro FETCH_B reg, count, byte
+    ldrb     \reg, [rPC, #((\count)*2+(\byte))]
+.endm
+
+/*
+ * Put the instruction's opcode field into the specified register.
+ */
+.macro GET_INST_OPCODE reg
+    and     \reg, rINST, #255
+.endm
+
+/*
+ * Put the prefetched instruction's opcode field into the specified register.
+ */
+.macro GET_PREFETCHED_OPCODE oreg, ireg
+    and     \oreg, \ireg, #255
+.endm
+
+/*
+ * Begin executing the opcode in _reg.  Because this only jumps within the
+ * interpreter, we don't have to worry about pre-ARMv5 THUMB interwork.
+ */
+.macro GOTO_OPCODE reg
+    add     pc, rIBASE, \reg, lsl #7
+.endm
+.macro GOTO_OPCODE_BASE base,reg
+    add     pc, \base, \reg, lsl #7
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+.macro GET_VREG reg, vreg
+    ldr     \reg, [rFP, \vreg, lsl #2]
+.endm
+.macro SET_VREG reg, vreg
+    str     \reg, [rFP, \vreg, lsl #2]
+    mov     \reg, #0
+    str     \reg, [rREFS, \vreg, lsl #2]
+.endm
+.macro SET_VREG_OBJECT reg, vreg, tmpreg
+    str     \reg, [rFP, \vreg, lsl #2]
+    str     \reg, [rREFS, \vreg, lsl #2]
+.endm
+
+/*
+ * Convert a virtual register index into an address.
+ */
+.macro VREG_INDEX_TO_ADDR reg, vreg
+    add     \reg, rFP, \vreg, lsl #2   /* WARNING/FIXME: handle shadow frame vreg zero if store */
+.endm
+
+/*
+ * Refresh handler table.
+ */
+.macro REFRESH_IBASE
+  ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
+.endm
+
+/* File: arm/entry.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Interpreter entry point.
+ */
+
+    .text
+    .align  2
+    .global ExecuteMterpImpl
+    .type   ExecuteMterpImpl, %function
+
+/*
+ * On entry:
+ *  r0  Thread* self/
+ *  r1  code_item
+ *  r2  ShadowFrame
+ *  r3  JValue* result_register
+ *
+ */
+
+ExecuteMterpImpl:
+    .fnstart
+    .save {r4-r10,fp,lr}
+    stmfd   sp!, {r4-r10,fp,lr}         @ save 9 regs
+    .pad    #4
+    sub     sp, sp, #4                  @ align 64
+
+    /* Remember the return register */
+    str     r3, [r2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
+
+    /* Remember the code_item */
+    str     r1, [r2, #SHADOWFRAME_CODE_ITEM_OFFSET]
+
+    /* set up "named" registers */
+    mov     rSELF, r0
+    ldr     r0, [r2, #SHADOWFRAME_NUMBER_OF_VREGS_OFFSET]
+    add     rFP, r2, #SHADOWFRAME_VREGS_OFFSET     @ point to insns[] (i.e. - the dalivk byte code).
+    add     rREFS, rFP, r0, lsl #2                 @ point to reference array in shadow frame
+    ldr     r0, [r2, #SHADOWFRAME_DEX_PC_OFFSET]   @ Get starting dex_pc.
+    add     rPC, r1, #CODEITEM_INSNS_OFFSET        @ Point to base of insns[]
+    add     rPC, rPC, r0, lsl #1                   @ Create direct pointer to 1st dex opcode
+    EXPORT_PC
+
+    /* Starting ibase */
+    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
+
+    /* start executing the instruction at rPC */
+    FETCH_INST                          @ load rINST from rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* NOTE: no fallthrough */
+
+
+    .global artMterpAsmInstructionStart
+    .type   artMterpAsmInstructionStart, %function
+artMterpAsmInstructionStart = .L_op_nop
+    .text
+
+/* ------------------------------ */
+    .balign 128
+.L_op_nop: /* 0x00 */
+/* File: arm/op_nop.S */
+    FETCH_ADVANCE_INST 1                @ advance to next instr, load rINST
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    GOTO_OPCODE ip                      @ execute it
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move: /* 0x01 */
+/* File: arm/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B from 15:12
+    ubfx    r0, rINST, #8, #4           @ r0<- A from 11:8
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[B]
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    .if 0
+    SET_VREG_OBJECT r2, r0              @ fp[A]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[A]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ execute next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_from16: /* 0x02 */
+/* File: arm/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    FETCH r1, 1                         @ r1<- BBBB
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT r2, r0              @ fp[AA]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[AA]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_16: /* 0x03 */
+/* File: arm/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    FETCH r1, 2                         @ r1<- BBBB
+    FETCH r0, 1                         @ r0<- AAAA
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT r2, r0              @ fp[AAAA]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[AAAA]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide: /* 0x04 */
+/* File: arm/op_move_wide.S */
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[A]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[B]
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r2, {r0-r1}                 @ fp[A]<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_from16: /* 0x05 */
+/* File: arm/op_move_wide_from16.S */
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH r3, 1                         @ r3<- BBBB
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BBBB]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[AA]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r2, {r0-r1}                 @ fp[AA]<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_16: /* 0x06 */
+/* File: arm/op_move_wide_16.S */
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH r3, 2                         @ r3<- BBBB
+    FETCH r2, 1                         @ r2<- AAAA
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BBBB]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[AAAA]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    stmia   r2, {r0-r1}                 @ fp[AAAA]<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object: /* 0x07 */
+/* File: arm/op_move_object.S */
+/* File: arm/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B from 15:12
+    ubfx    r0, rINST, #8, #4           @ r0<- A from 11:8
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[B]
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    .if 1
+    SET_VREG_OBJECT r2, r0              @ fp[A]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[A]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ execute next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_from16: /* 0x08 */
+/* File: arm/op_move_object_from16.S */
+/* File: arm/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    FETCH r1, 1                         @ r1<- BBBB
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 1
+    SET_VREG_OBJECT r2, r0              @ fp[AA]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[AA]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_16: /* 0x09 */
+/* File: arm/op_move_object_16.S */
+/* File: arm/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    FETCH r1, 2                         @ r1<- BBBB
+    FETCH r0, 1                         @ r0<- AAAA
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 1
+    SET_VREG_OBJECT r2, r0              @ fp[AAAA]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[AAAA]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result: /* 0x0a */
+/* File: arm/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    ldr     r0, [rFP, #OFF_FP_RESULT_REGISTER]  @ get pointer to result JType.
+    ldr     r0, [r0]                    @ r0 <- result.i.
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT r0, r2, r1          @ fp[AA]<- r0
+    .else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_wide: /* 0x0b */
+/* File: arm/op_move_result_wide.S */
+    /* move-result-wide vAA */
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    ldr     r3, [rFP, #OFF_FP_RESULT_REGISTER]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[AA]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- retval.j
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    stmia   r2, {r0-r1}                 @ fp[AA]<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_object: /* 0x0c */
+/* File: arm/op_move_result_object.S */
+/* File: arm/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    ldr     r0, [rFP, #OFF_FP_RESULT_REGISTER]  @ get pointer to result JType.
+    ldr     r0, [r0]                    @ r0 <- result.i.
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 1
+    SET_VREG_OBJECT r0, r2, r1          @ fp[AA]<- r0
+    .else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_exception: /* 0x0d */
+/* File: arm/op_move_exception.S */
+    /* move-exception vAA */
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    ldr     r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov     r1, #0                      @ r1<- 0
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    SET_VREG_OBJECT r3, r2              @ fp[AA]<- exception obj
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    str     r1, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ clear exception
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void: /* 0x0e */
+/* File: arm/op_return_void.S */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    mov    r0, #0
+    mov    r1, #0
+    b      MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return: /* 0x0f */
+/* File: arm/op_return.S */
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    GET_VREG r0, r2                     @ r0<- vAA
+    mov     r1, #0
+    b       MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_wide: /* 0x10 */
+/* File: arm/op_return_wide.S */
+    /*
+     * Return a 64-bit value.
+     */
+    /* return-wide vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[AA]
+    ldmia   r2, {r0-r1}                 @ r0/r1 <- vAA/vAA+1
+    b       MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_object: /* 0x11 */
+/* File: arm/op_return_object.S */
+/* File: arm/op_return.S */
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    GET_VREG r0, r2                     @ r0<- vAA
+    mov     r1, #0
+    b       MterpReturn
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_4: /* 0x12 */
+/* File: arm/op_const_4.S */
+    /* const/4 vA, #+B */
+    mov     r1, rINST, lsl #16          @ r1<- Bxxx0000
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    mov     r1, r1, asr #28             @ r1<- sssssssB (sign-extended)
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    SET_VREG r1, r0                     @ fp[A]<- r1
+    GOTO_OPCODE ip                      @ execute next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_16: /* 0x13 */
+/* File: arm/op_const_16.S */
+    /* const/16 vAA, #+BBBB */
+    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r3                     @ vAA<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const: /* 0x14 */
+/* File: arm/op_const.S */
+    /* const vAA, #+BBBBbbbb */
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    FETCH r0, 1                         @ r0<- bbbb (low
+    FETCH r1, 2                         @ r1<- BBBB (high
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r3                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_high16: /* 0x15 */
+/* File: arm/op_const_high16.S */
+    /* const/high16 vAA, #+BBBB0000 */
+    FETCH r0, 1                         @ r0<- 0000BBBB (zero-extended
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    mov     r0, r0, lsl #16             @ r0<- BBBB0000
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r3                     @ vAA<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_16: /* 0x16 */
+/* File: arm/op_const_wide_16.S */
+    /* const-wide/16 vAA, #+BBBB */
+    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    mov     r1, r0, asr #31             @ r1<- ssssssss
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_32: /* 0x17 */
+/* File: arm/op_const_wide_32.S */
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    FETCH r0, 1                         @ r0<- 0000bbbb (low)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    FETCH_S r2, 2                       @ r2<- ssssBBBB (high)
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    orr     r0, r0, r2, lsl #16         @ r0<- BBBBbbbb
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    mov     r1, r0, asr #31             @ r1<- ssssssss
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide: /* 0x18 */
+/* File: arm/op_const_wide.S */
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    FETCH r0, 1                         @ r0<- bbbb (low)
+    FETCH r1, 2                         @ r1<- BBBB (low middle)
+    FETCH r2, 3                         @ r2<- hhhh (high middle)
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb (low word)
+    FETCH r3, 4                         @ r3<- HHHH (high)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    orr     r1, r2, r3, lsl #16         @ r1<- HHHHhhhh (high word)
+    FETCH_ADVANCE_INST 5                @ advance rPC, load rINST
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_high16: /* 0x19 */
+/* File: arm/op_const_wide_high16.S */
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    FETCH r1, 1                         @ r1<- 0000BBBB (zero-extended)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    mov     r0, #0                      @ r0<- 00000000
+    mov     r1, r1, lsl #16             @ r1<- BBBB0000
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string: /* 0x1a */
+/* File: arm/op_const_string.S */
+    /* const/string vAA, String@BBBB */
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- BBBB
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    add     r2, rFP, #OFF_FP_SHADOWFRAME
+    mov     r3, rSELF
+    bl      MterpConstString            @ (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2                     @ load rINST
+    cmp     r0, #0                      @ fail?
+    bne     MterpPossibleException      @ let reference interpreter deal with it.
+    ADVANCE 2                           @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string_jumbo: /* 0x1b */
+/* File: arm/op_const_string_jumbo.S */
+    /* const/string vAA, String@BBBBBBBB */
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- bbbb (low
+    FETCH r2, 2                         @ r2<- BBBB (high
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    orr     r0, r0, r2, lsl #16         @ r1<- BBBBbbbb
+    add     r2, rFP, #OFF_FP_SHADOWFRAME
+    mov     r3, rSELF
+    bl      MterpConstString            @ (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 3                     @ advance rPC
+    cmp     r0, #0                      @ fail?
+    bne     MterpPossibleException      @ let reference interpreter deal with it.
+    ADVANCE 3                           @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_class: /* 0x1c */
+/* File: arm/op_const_class.S */
+    /* const/class vAA, Class@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- BBBB
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    add     r2, rFP, #OFF_FP_SHADOWFRAME
+    mov     r3, rSELF
+    bl      MterpConstClass             @ (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2
+    cmp     r0, #0
+    bne     MterpPossibleException
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_enter: /* 0x1d */
+/* File: arm/op_monitor_enter.S */
+    /*
+     * Synchronize on an object.
+     */
+    /* monitor-enter vAA */
+    EXPORT_PC
+    mov      r2, rINST, lsr #8           @ r2<- AA
+    GET_VREG r0, r2                      @ r0<- vAA (object)
+    mov      r1, rSELF                   @ r1<- self
+    bl       artLockObjectFromCode
+    cmp      r0, #0
+    bne      MterpException
+    FETCH_ADVANCE_INST 1
+    GET_INST_OPCODE ip                   @ extract opcode from rINST
+    GOTO_OPCODE ip                       @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_exit: /* 0x1e */
+/* File: arm/op_monitor_exit.S */
+    /*
+     * Unlock an object.
+     *
+     * Exceptions that occur when unlocking a monitor need to appear as
+     * if they happened at the following instruction.  See the Dalvik
+     * instruction spec.
+     */
+    /* monitor-exit vAA */
+    EXPORT_PC
+    mov      r2, rINST, lsr #8          @ r2<- AA
+    GET_VREG r0, r2                     @ r0<- vAA (object)
+    mov      r1, rSELF                  @ r0<- self
+    bl       artUnlockObjectFromCode    @ r0<- success for unlock(self, obj)
+    cmp     r0, #0                      @ failed?
+    bne     MterpException
+    FETCH_ADVANCE_INST 1                @ before throw: advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_check_cast: /* 0x1f */
+/* File: arm/op_check_cast.S */
+    /*
+     * Check to see if a cast from one class to another is allowed.
+     */
+    /* check-cast vAA, class@BBBB */
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- BBBB
+    mov      r1, rINST, lsr #8          @ r1<- AA
+    GET_VREG r1, r1                     @ r1<- object
+    ldr      r2, [rFP, #OFF_FP_METHOD]  @ r2<- method
+    mov      r3, rSELF                  @ r3<- self
+    bl       MterpCheckCast             @ (index, obj, method, self)
+    PREFETCH_INST 2
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_instance_of: /* 0x20 */
+/* File: arm/op_instance_of.S */
+    /*
+     * Check to see if an object reference is an instance of a class.
+     *
+     * Most common situation is a non-null object, being compared against
+     * an already-resolved class.
+     */
+    /* instance-of vA, vB, class@CCCC */
+    EXPORT_PC
+    FETCH     r0, 1                     @ r0<- CCCC
+    mov       r1, rINST, lsr #12        @ r1<- B
+    GET_VREG  r1, r1                    @ r1<- vB (object)
+    ldr       r2, [rFP, #OFF_FP_METHOD] @ r2<- method
+    mov       r3, rSELF                 @ r3<- self
+    mov       r9, rINST, lsr #8         @ r9<- A+
+    and       r9, r9, #15               @ r9<- A
+    bl        MterpInstanceOf           @ (index, obj, method, self)
+    ldr       r1, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    PREFETCH_INST 2
+    cmp       r1, #0                    @ exception pending?
+    bne       MterpException
+    ADVANCE 2                           @ advance rPC
+    SET_VREG r0, r9                     @ vA<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_array_length: /* 0x21 */
+/* File: arm/op_array_length.S */
+    /*
+     * Return the length of an array.
+     */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    GET_VREG r0, r1                     @ r0<- vB (object ref)
+    cmp     r0, #0                      @ is object null?
+    beq     common_errNullObject        @ yup, fail
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- array length
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r3, r2                     @ vB<- length
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_instance: /* 0x22 */
+/* File: arm/op_new_instance.S */
+    /*
+     * Create a new instance of a class.
+     */
+    /* new-instance vAA, class@BBBB */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rSELF
+    mov     r2, rINST
+    bl      MterpNewInstance           @ (shadow_frame, self, inst_data)
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 2               @ advance rPC, load rINST
+    GET_INST_OPCODE ip                 @ extract opcode from rINST
+    GOTO_OPCODE ip                     @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_array: /* 0x23 */
+/* File: arm/op_new_array.S */
+    /*
+     * Allocate an array of objects, specified with the array class
+     * and a count.
+     *
+     * The verifier guarantees that this is an array class, so we don't
+     * check for it here.
+     */
+    /* new-array vA, vB, class@CCCC */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    mov     r3, rSELF
+    bl      MterpNewArray
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array: /* 0x24 */
+/* File: arm/op_filled_new_array.S */
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+    .extern MterpFilledNewArray
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rSELF
+    bl      MterpFilledNewArray
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array_range: /* 0x25 */
+/* File: arm/op_filled_new_array_range.S */
+/* File: arm/op_filled_new_array.S */
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+    .extern MterpFilledNewArrayRange
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rSELF
+    bl      MterpFilledNewArrayRange
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_fill_array_data: /* 0x26 */
+/* File: arm/op_fill_array_data.S */
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r1, r0, r1, lsl #16         @ r1<- BBBBbbbb
+    GET_VREG r0, r3                     @ r0<- vAA (array object)
+    add     r1, rPC, r1, lsl #1         @ r1<- PC + BBBBbbbb*2 (array data off.)
+    bl      MterpFillArrayData          @ (obj, payload)
+    cmp     r0, #0                      @ 0 means an exception is thrown
+    beq     MterpPossibleException      @ exception?
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_throw: /* 0x27 */
+/* File: arm/op_throw.S */
+    /*
+     * Throw an exception object in the current thread.
+     */
+    /* throw vAA */
+    EXPORT_PC
+    mov      r2, rINST, lsr #8           @ r2<- AA
+    GET_VREG r1, r2                      @ r1<- vAA (exception object)
+    cmp      r1, #0                      @ null object?
+    beq      common_errNullObject        @ yes, throw an NPE instead
+    str      r1, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ thread->exception<- obj
+    b        MterpException
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto: /* 0x28 */
+/* File: arm/op_goto.S */
+    /*
+     * Unconditional branch, 8-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto +AA */
+    /* tuning: use sbfx for 6t2+ targets */
+#if MTERP_SUSPEND
+    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
+    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
+    add     r2, r1, r1                  @ r2<- byte offset, set flags
+       @ If backwards branch refresh rIBASE
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
+    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
+    add     r2, r1, r1                  @ r2<- byte offset, set flags
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+       @ If backwards branch refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_16: /* 0x29 */
+/* File: arm/op_goto_16.S */
+    /*
+     * Unconditional branch, 16-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto/16 +AAAA */
+#if MTERP_SUSPEND
+    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
+    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_32: /* 0x2a */
+/* File: arm/op_goto_32.S */
+    /*
+     * Unconditional branch, 32-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     *
+     * Unlike most opcodes, this one is allowed to branch to itself, so
+     * our "backward branch" test must be "<=0" instead of "<0".  Because
+     * we need the V bit set, we'll use an adds to convert from Dalvik
+     * offset to byte offset.
+     */
+    /* goto/32 +AAAAAAAA */
+#if MTERP_SUSPEND
+    FETCH r0, 1                         @ r0<- aaaa (lo)
+    FETCH r1, 2                         @ r1<- AAAA (hi)
+    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
+    adds    r1, r0, r0                  @ r1<- byte offset
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    FETCH r0, 1                         @ r0<- aaaa (lo)
+    FETCH r1, 2                         @ r1<- AAAA (hi)
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
+    adds    r1, r0, r0                  @ r1<- byte offset
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_packed_switch: /* 0x2b */
+/* File: arm/op_packed_switch.S */
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBB */
+#if MTERP_SUSPEND
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_VREG r1, r3                     @ r1<- vAA
+    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
+    bl      MterpDoPackedSwitch                       @ r0<- code-unit branch offset
+    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_VREG r1, r3                     @ r1<- vAA
+    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
+    bl      MterpDoPackedSwitch                       @ r0<- code-unit branch offset
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sparse_switch: /* 0x2c */
+/* File: arm/op_sparse_switch.S */
+/* File: arm/op_packed_switch.S */
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBB */
+#if MTERP_SUSPEND
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_VREG r1, r3                     @ r1<- vAA
+    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
+    bl      MterpDoSparseSwitch                       @ r0<- code-unit branch offset
+    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_VREG r1, r3                     @ r1<- vAA
+    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
+    bl      MterpDoSparseSwitch                       @ r0<- code-unit branch offset
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_float: /* 0x2d */
+/* File: arm/op_cmpl_float.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else {
+     *         return -1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    flds    s0, [r2]                    @ s0<- vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    fcmpes  s0, s1                      @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mvn     r0, #0                      @ r0<- -1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    movgt   r0, #1                      @ (greater than) r1<- 1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_float: /* 0x2e */
+/* File: arm/op_cmpg_float.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else {
+     *         return 1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    flds    s0, [r2]                    @ s0<- vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    fcmpes  s0, s1                      @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r0, #1                      @ r0<- 1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    mvnmi   r0, #0                      @ (less than) r1<- -1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_double: /* 0x2f */
+/* File: arm/op_cmpl_double.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else {
+     *         return -1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fcmped  d0, d1                      @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mvn     r0, #0                      @ r0<- -1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    movgt   r0, #1                      @ (greater than) r1<- 1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_double: /* 0x30 */
+/* File: arm/op_cmpg_double.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else {
+     *         return 1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fcmped  d0, d1                      @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r0, #1                      @ r0<- 1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    mvnmi   r0, #0                      @ (less than) r1<- -1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmp_long: /* 0x31 */
+/* File: arm/op_cmp_long.S */
+    /*
+     * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
+     * register based on the results of the comparison.
+     *
+     * We load the full values with LDM, but in practice many values could
+     * be resolved by only looking at the high word.  This could be made
+     * faster or slower by splitting the LDM into a pair of LDRs.
+     *
+     * If we just wanted to set condition flags, we could do this:
+     *  subs    ip, r0, r2
+     *  sbcs    ip, r1, r3
+     *  subeqs  ip, r0, r2
+     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
+     * integer value, which we can do with 2 conditional mov/mvn instructions
+     * (set 1, set -1; if they're equal we already have 0 in ip), giving
+     * us a constant 5-cycle path plus a branch at the end to the
+     * instruction epilogue code.  The multi-compare approach below needs
+     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
+     * in the worst case (the 64-bit values are equal).
+     */
+    /* cmp-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
+    blt     .Lop_cmp_long_less            @ signed compare on high part
+    bgt     .Lop_cmp_long_greater
+    subs    r1, r0, r2                  @ r1<- r0 - r2
+    bhi     .Lop_cmp_long_greater         @ unsigned compare on low part
+    bne     .Lop_cmp_long_less
+    b       .Lop_cmp_long_finish          @ equal; r1 already holds 0
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eq: /* 0x32 */
+/* File: arm/op_if_eq.S */
+/* File: arm/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    movne r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    movne r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ne: /* 0x33 */
+/* File: arm/op_if_ne.S */
+/* File: arm/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    moveq r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    moveq r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lt: /* 0x34 */
+/* File: arm/op_if_lt.S */
+/* File: arm/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    movge r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    movge r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ge: /* 0x35 */
+/* File: arm/op_if_ge.S */
+/* File: arm/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    movlt r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    movlt r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gt: /* 0x36 */
+/* File: arm/op_if_gt.S */
+/* File: arm/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    movle r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    movle r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_le: /* 0x37 */
+/* File: arm/op_if_le.S */
+/* File: arm/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    movgt r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    movgt r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eqz: /* 0x38 */
+/* File: arm/op_if_eqz.S */
+/* File: arm/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, #0                      @ compare (vA, 0)
+    movne r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    cmp     r2, #0                      @ compare (vA, 0)
+    movne r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_nez: /* 0x39 */
+/* File: arm/op_if_nez.S */
+/* File: arm/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, #0                      @ compare (vA, 0)
+    moveq r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    cmp     r2, #0                      @ compare (vA, 0)
+    moveq r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ltz: /* 0x3a */
+/* File: arm/op_if_ltz.S */
+/* File: arm/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, #0                      @ compare (vA, 0)
+    movge r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    cmp     r2, #0                      @ compare (vA, 0)
+    movge r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gez: /* 0x3b */
+/* File: arm/op_if_gez.S */
+/* File: arm/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, #0                      @ compare (vA, 0)
+    movlt r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    cmp     r2, #0                      @ compare (vA, 0)
+    movlt r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gtz: /* 0x3c */
+/* File: arm/op_if_gtz.S */
+/* File: arm/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, #0                      @ compare (vA, 0)
+    movle r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    cmp     r2, #0                      @ compare (vA, 0)
+    movle r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lez: /* 0x3d */
+/* File: arm/op_if_lez.S */
+/* File: arm/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, #0                      @ compare (vA, 0)
+    movgt r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    cmp     r2, #0                      @ compare (vA, 0)
+    movgt r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3e: /* 0x3e */
+/* File: arm/op_unused_3e.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3f: /* 0x3f */
+/* File: arm/op_unused_3f.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_40: /* 0x40 */
+/* File: arm/op_unused_40.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_41: /* 0x41 */
+/* File: arm/op_unused_41.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_42: /* 0x42 */
+/* File: arm/op_unused_42.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_43: /* 0x43 */
+/* File: arm/op_unused_43.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget: /* 0x44 */
+/* File: arm/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #2     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldr   r2, [r0, #MIRROR_INT_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r2, r9                     @ vAA<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_wide: /* 0x45 */
+/* File: arm/op_aget_wide.S */
+    /*
+     * Array get, 64 bits.  vAA <- vBB[vCC].
+     *
+     * Arrays of long/double are 64-bit aligned, so it's okay to use LDRD.
+     */
+    /* aget-wide vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #3          @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldrd    r2, [r0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  @ r2/r3<- vBB[vCC]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r2-r3}                 @ vAA/vAA+1<- r2/r3
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_object: /* 0x46 */
+/* File: arm/op_aget_object.S */
+    /*
+     * Array object get.  vAA <- vBB[vCC].
+     *
+     * for: aget-object
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    EXPORT_PC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    bl       artAGetObjectFromMterp     @ (array, index)
+    ldr      r1, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    PREFETCH_INST 2
+    cmp      r1, #0
+    bne      MterpException
+    SET_VREG_OBJECT r0, r9
+    ADVANCE 2
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_boolean: /* 0x47 */
+/* File: arm/op_aget_boolean.S */
+/* File: arm/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #0     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldrb   r2, [r0, #MIRROR_BOOLEAN_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r2, r9                     @ vAA<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_byte: /* 0x48 */
+/* File: arm/op_aget_byte.S */
+/* File: arm/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #0     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldrsb   r2, [r0, #MIRROR_BYTE_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r2, r9                     @ vAA<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_char: /* 0x49 */
+/* File: arm/op_aget_char.S */
+/* File: arm/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #1     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldrh   r2, [r0, #MIRROR_CHAR_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r2, r9                     @ vAA<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_short: /* 0x4a */
+/* File: arm/op_aget_short.S */
+/* File: arm/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #1     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldrsh   r2, [r0, #MIRROR_SHORT_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r2, r9                     @ vAA<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput: /* 0x4b */
+/* File: arm/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]     @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #2     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r9                     @ r2<- vAA
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    str  r2, [r0, #MIRROR_INT_ARRAY_DATA_OFFSET]     @ vBB[vCC]<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_wide: /* 0x4c */
+/* File: arm/op_aput_wide.S */
+    /*
+     * Array put, 64 bits.  vBB[vCC] <- vAA.
+     *
+     * Arrays of long/double are 64-bit aligned, so it's okay to use STRD.
+     */
+    /* aput-wide vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #3          @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldmia   r9, {r2-r3}                 @ r2/r3<- vAA/vAA+1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    strd    r2, [r0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  @ r2/r3<- vBB[vCC]
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_object: /* 0x4d */
+/* File: arm/op_aput_object.S */
+    /*
+     * Store an object into an array.  vBB[vCC] <- vAA.
+     */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    bl      MterpAputObject
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_boolean: /* 0x4e */
+/* File: arm/op_aput_boolean.S */
+/* File: arm/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]     @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #0     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r9                     @ r2<- vAA
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    strb  r2, [r0, #MIRROR_BOOLEAN_ARRAY_DATA_OFFSET]     @ vBB[vCC]<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_byte: /* 0x4f */
+/* File: arm/op_aput_byte.S */
+/* File: arm/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]     @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #0     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r9                     @ r2<- vAA
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    strb  r2, [r0, #MIRROR_BYTE_ARRAY_DATA_OFFSET]     @ vBB[vCC]<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_char: /* 0x50 */
+/* File: arm/op_aput_char.S */
+/* File: arm/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]     @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #1     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r9                     @ r2<- vAA
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    strh  r2, [r0, #MIRROR_CHAR_ARRAY_DATA_OFFSET]     @ vBB[vCC]<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_short: /* 0x51 */
+/* File: arm/op_aput_short.S */
+/* File: arm/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]     @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #1     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r9                     @ r2<- vAA
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    strh  r2, [r0, #MIRROR_SHORT_ARRAY_DATA_OFFSET]     @ vBB[vCC]<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget: /* 0x52 */
+/* File: arm/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGet32InstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpPossibleException        @ bail out
+    .if 0
+    SET_VREG_OBJECT r0, r2                 @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                        @ fp[A]<- r0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide: /* 0x53 */
+/* File: arm/op_iget_wide.S */
+    /*
+     * 64-bit instance field get.
+     *
+     * for: iget-wide
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGet64InstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpException                @ bail out
+    add     r3, rFP, r2, lsl #2            @ r3<- &fp[A]
+    stmia   r3, {r0-r1}                    @ fp[A]<- r0/r1
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object: /* 0x54 */
+/* File: arm/op_iget_object.S */
+/* File: arm/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGetObjInstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpPossibleException        @ bail out
+    .if 1
+    SET_VREG_OBJECT r0, r2                 @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                        @ fp[A]<- r0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean: /* 0x55 */
+/* File: arm/op_iget_boolean.S */
+/* File: arm/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGetBooleanInstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpPossibleException        @ bail out
+    .if 0
+    SET_VREG_OBJECT r0, r2                 @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                        @ fp[A]<- r0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte: /* 0x56 */
+/* File: arm/op_iget_byte.S */
+/* File: arm/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGetByteInstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpPossibleException        @ bail out
+    .if 0
+    SET_VREG_OBJECT r0, r2                 @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                        @ fp[A]<- r0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char: /* 0x57 */
+/* File: arm/op_iget_char.S */
+/* File: arm/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGetCharInstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpPossibleException        @ bail out
+    .if 0
+    SET_VREG_OBJECT r0, r2                 @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                        @ fp[A]<- r0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short: /* 0x58 */
+/* File: arm/op_iget_short.S */
+/* File: arm/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGetShortInstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpPossibleException        @ bail out
+    .if 0
+    SET_VREG_OBJECT r0, r2                 @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                        @ fp[A]<- r0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput: /* 0x59 */
+/* File: arm/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field@CCCC */
+    .extern artSet32InstanceFromMterp
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    GET_VREG r2, r2                     @ r2<- fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       artSet32InstanceFromMterp
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide: /* 0x5a */
+/* File: arm/op_iput_wide.S */
+    /* iput-wide vA, vB, field@CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    add      r2, rFP, r2, lsl #2        @ r2<- &fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       artSet64InstanceFromMterp
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object: /* 0x5b */
+/* File: arm/op_iput_object.S */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    mov     r3, rSELF
+    bl      MterpIputObject
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean: /* 0x5c */
+/* File: arm/op_iput_boolean.S */
+/* File: arm/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field@CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    GET_VREG r2, r2                     @ r2<- fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       artSet8InstanceFromMterp
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte: /* 0x5d */
+/* File: arm/op_iput_byte.S */
+/* File: arm/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field@CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    GET_VREG r2, r2                     @ r2<- fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       artSet8InstanceFromMterp
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char: /* 0x5e */
+/* File: arm/op_iput_char.S */
+/* File: arm/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field@CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    GET_VREG r2, r2                     @ r2<- fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       artSet16InstanceFromMterp
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short: /* 0x5f */
+/* File: arm/op_iput_short.S */
+/* File: arm/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field@CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    GET_VREG r2, r2                     @ r2<- fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       artSet16InstanceFromMterp
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget: /* 0x60 */
+/* File: arm/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field@BBBB */
+
+    .extern artGet32StaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGet32StaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r2, rINST, lsr #8             @ r2<- AA
+    PREFETCH_INST 2
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+.if 0
+    SET_VREG_OBJECT r0, r2              @ fp[AA]<- r0
+.else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_wide: /* 0x61 */
+/* File: arm/op_sget_wide.S */
+    /*
+     * SGET_WIDE handler wrapper.
+     *
+     */
+    /* sget-wide vAA, field@BBBB */
+
+    .extern artGet64StaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGet64StaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r9, rINST, lsr #8             @ r9<- AA
+    add   r9, rFP, r9, lsl #2           @ r9<- &fp[AA]
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_object: /* 0x62 */
+/* File: arm/op_sget_object.S */
+/* File: arm/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field@BBBB */
+
+    .extern artGetObjStaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGetObjStaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r2, rINST, lsr #8             @ r2<- AA
+    PREFETCH_INST 2
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+.if 1
+    SET_VREG_OBJECT r0, r2              @ fp[AA]<- r0
+.else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_boolean: /* 0x63 */
+/* File: arm/op_sget_boolean.S */
+/* File: arm/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field@BBBB */
+
+    .extern artGetBooleanStaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGetBooleanStaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r2, rINST, lsr #8             @ r2<- AA
+    PREFETCH_INST 2
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+.if 0
+    SET_VREG_OBJECT r0, r2              @ fp[AA]<- r0
+.else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_byte: /* 0x64 */
+/* File: arm/op_sget_byte.S */
+/* File: arm/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field@BBBB */
+
+    .extern artGetByteStaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGetByteStaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r2, rINST, lsr #8             @ r2<- AA
+    PREFETCH_INST 2
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+.if 0
+    SET_VREG_OBJECT r0, r2              @ fp[AA]<- r0
+.else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_char: /* 0x65 */
+/* File: arm/op_sget_char.S */
+/* File: arm/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field@BBBB */
+
+    .extern artGetCharStaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGetCharStaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r2, rINST, lsr #8             @ r2<- AA
+    PREFETCH_INST 2
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+.if 0
+    SET_VREG_OBJECT r0, r2              @ fp[AA]<- r0
+.else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_short: /* 0x66 */
+/* File: arm/op_sget_short.S */
+/* File: arm/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field@BBBB */
+
+    .extern artGetShortStaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGetShortStaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r2, rINST, lsr #8             @ r2<- AA
+    PREFETCH_INST 2
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+.if 0
+    SET_VREG_OBJECT r0, r2              @ fp[AA]<- r0
+.else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput: /* 0x67 */
+/* File: arm/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    GET_VREG r1, r3                     @ r1<= fp[AA]
+    ldr     r2, [rFP, #OFF_FP_METHOD]
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      artSet32StaticFromCode
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_wide: /* 0x68 */
+/* File: arm/op_sput_wide.S */
+    /*
+     * SPUT_WIDE handler wrapper.
+     *
+     */
+    /* sput-wide vAA, field@BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    ldr     r1, [rFP, #OFF_FP_METHOD]
+    mov     r2, rINST, lsr #8           @ r3<- AA
+    add     r2, rFP, r2, lsl #2
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      artSet64IndirectStaticFromMterp
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_object: /* 0x69 */
+/* File: arm/op_sput_object.S */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    mov     r3, rSELF
+    bl      MterpSputObject
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_boolean: /* 0x6a */
+/* File: arm/op_sput_boolean.S */
+/* File: arm/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    GET_VREG r1, r3                     @ r1<= fp[AA]
+    ldr     r2, [rFP, #OFF_FP_METHOD]
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      artSet8StaticFromCode
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_byte: /* 0x6b */
+/* File: arm/op_sput_byte.S */
+/* File: arm/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    GET_VREG r1, r3                     @ r1<= fp[AA]
+    ldr     r2, [rFP, #OFF_FP_METHOD]
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      artSet8StaticFromCode
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_char: /* 0x6c */
+/* File: arm/op_sput_char.S */
+/* File: arm/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    GET_VREG r1, r3                     @ r1<= fp[AA]
+    ldr     r2, [rFP, #OFF_FP_METHOD]
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      artSet16StaticFromCode
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_short: /* 0x6d */
+/* File: arm/op_sput_short.S */
+/* File: arm/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    GET_VREG r1, r3                     @ r1<= fp[AA]
+    ldr     r2, [rFP, #OFF_FP_METHOD]
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      artSet16StaticFromCode
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual: /* 0x6e */
+/* File: arm/op_invoke_virtual.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtual
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeVirtual
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+    /*
+     * Handle a virtual method call.
+     *
+     * for: invoke-virtual, invoke-virtual/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super: /* 0x6f */
+/* File: arm/op_invoke_super.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuper
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeSuper
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+    /*
+     * Handle a "super" method call.
+     *
+     * for: invoke-super, invoke-super/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct: /* 0x70 */
+/* File: arm/op_invoke_direct.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirect
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeDirect
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static: /* 0x71 */
+/* File: arm/op_invoke_static.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStatic
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeStatic
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface: /* 0x72 */
+/* File: arm/op_invoke_interface.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterface
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeInterface
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+    /*
+     * Handle an interface method call.
+     *
+     * for: invoke-interface, invoke-interface/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void_no_barrier: /* 0x73 */
+/* File: arm/op_return_void_no_barrier.S */
+    mov    r0, #0
+    mov    r1, #0
+    b      MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range: /* 0x74 */
+/* File: arm/op_invoke_virtual_range.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualRange
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeVirtualRange
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super_range: /* 0x75 */
+/* File: arm/op_invoke_super_range.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuperRange
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeSuperRange
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct_range: /* 0x76 */
+/* File: arm/op_invoke_direct_range.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirectRange
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeDirectRange
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static_range: /* 0x77 */
+/* File: arm/op_invoke_static_range.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStaticRange
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeStaticRange
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface_range: /* 0x78 */
+/* File: arm/op_invoke_interface_range.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterfaceRange
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeInterfaceRange
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_79: /* 0x79 */
+/* File: arm/op_unused_79.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_7a: /* 0x7a */
+/* File: arm/op_unused_7a.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_int: /* 0x7b */
+/* File: arm/op_neg_int.S */
+/* File: arm/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    rsb     r0, r0, #0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_int: /* 0x7c */
+/* File: arm/op_not_int.S */
+/* File: arm/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    mvn     r0, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_long: /* 0x7d */
+/* File: arm/op_neg_long.S */
+/* File: arm/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0/r1".
+     * This could be an ARM instruction or a function call.
+     *
+     * For: neg-long, not-long, neg-double, long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    rsbs    r0, r0, #0                           @ optional op; may set condition codes
+    rsc     r1, r1, #0                              @ r0/r1<- op, r2-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-11 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_long: /* 0x7e */
+/* File: arm/op_not_long.S */
+/* File: arm/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0/r1".
+     * This could be an ARM instruction or a function call.
+     *
+     * For: neg-long, not-long, neg-double, long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    mvn     r0, r0                           @ optional op; may set condition codes
+    mvn     r1, r1                              @ r0/r1<- op, r2-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-11 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_float: /* 0x7f */
+/* File: arm/op_neg_float.S */
+/* File: arm/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    add     r0, r0, #0x80000000                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_double: /* 0x80 */
+/* File: arm/op_neg_double.S */
+/* File: arm/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0/r1".
+     * This could be an ARM instruction or a function call.
+     *
+     * For: neg-long, not-long, neg-double, long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    add     r1, r1, #0x80000000                              @ r0/r1<- op, r2-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-11 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_long: /* 0x81 */
+/* File: arm/op_int_to_long.S */
+/* File: arm/unopWider.S */
+    /*
+     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op r0", where
+     * "result" is a 64-bit quantity in r0/r1.
+     *
+     * For: int-to-long, int-to-double, float-to-long, float-to-double
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    mov     r1, r0, asr #31                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vA/vA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 9-10 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_float: /* 0x82 */
+/* File: arm/op_int_to_float.S */
+/* File: arm/funop.S */
+    /*
+     * Generic 32-bit unary floating-point operation.  Provide an "instr"
+     * line that specifies an instruction that performs "s1 = op s0".
+     *
+     * for: int-to-float, float-to-int
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    flds    s0, [r3]                    @ s0<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    and     r9, r9, #15                 @ r9<- A
+    fsitos  s1, s0                              @ s1<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fsts    s1, [r9]                    @ vA<- s1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_double: /* 0x83 */
+/* File: arm/op_int_to_double.S */
+/* File: arm/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op s0".
+     *
+     * For: int-to-double, float-to-double
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    flds    s0, [r3]                    @ s0<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    and     r9, r9, #15                 @ r9<- A
+    fsitod  d0, s0                              @ d0<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fstd    d0, [r9]                    @ vA<- d0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_int: /* 0x84 */
+/* File: arm/op_long_to_int.S */
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+/* File: arm/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B from 15:12
+    ubfx    r0, rINST, #8, #4           @ r0<- A from 11:8
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[B]
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    .if 0
+    SET_VREG_OBJECT r2, r0              @ fp[A]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[A]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ execute next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_float: /* 0x85 */
+/* File: arm/op_long_to_float.S */
+/* File: arm/unopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op r0/r1", where
+     * "result" is a 32-bit quantity in r0.
+     *
+     * For: long-to-float, double-to-int, double-to-float
+     *
+     * (This would work for long-to-int, but that instruction is actually
+     * an exact match for op_move.)
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vB/vB+1
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    bl      __aeabi_l2f                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 9-10 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_double: /* 0x86 */
+/* File: arm/op_long_to_double.S */
+    /*
+     * Specialised 64-bit floating point operation.
+     *
+     * Note: The result will be returned in d2.
+     *
+     * For: long-to-double
+     */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    vldr    d0, [r3]                    @ d0<- vAA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    vcvt.f64.s32    d1, s1              @ d1<- (double)(vAAh)
+    vcvt.f64.u32    d2, s0              @ d2<- (double)(vAAl)
+    vldr            d3, constvalop_long_to_double
+    vmla.f64        d2, d1, d3          @ d2<- vAAh*2^32 + vAAl
+
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    vstr.64 d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+    /* literal pool helper */
+constvalop_long_to_double:
+    .8byte          0x41f0000000000000
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_int: /* 0x87 */
+/* File: arm/op_float_to_int.S */
+/* File: arm/funop.S */
+    /*
+     * Generic 32-bit unary floating-point operation.  Provide an "instr"
+     * line that specifies an instruction that performs "s1 = op s0".
+     *
+     * for: int-to-float, float-to-int
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    flds    s0, [r3]                    @ s0<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    and     r9, r9, #15                 @ r9<- A
+    ftosizs s1, s0                              @ s1<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fsts    s1, [r9]                    @ vA<- s1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_long: /* 0x88 */
+/* File: arm/op_float_to_long.S */
+@include "arm/unopWider.S" {"instr":"bl      __aeabi_f2lz"}
+/* File: arm/unopWider.S */
+    /*
+     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op r0", where
+     * "result" is a 64-bit quantity in r0/r1.
+     *
+     * For: int-to-long, int-to-double, float-to-long, float-to-double
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    bl      f2l_doconv                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vA/vA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 9-10 instructions */
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_double: /* 0x89 */
+/* File: arm/op_float_to_double.S */
+/* File: arm/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op s0".
+     *
+     * For: int-to-double, float-to-double
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    flds    s0, [r3]                    @ s0<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    and     r9, r9, #15                 @ r9<- A
+    fcvtds  d0, s0                              @ d0<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fstd    d0, [r9]                    @ vA<- d0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_int: /* 0x8a */
+/* File: arm/op_double_to_int.S */
+/* File: arm/funopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit unary floating point operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op d0".
+     *
+     * For: double-to-int, double-to-float
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    fldd    d0, [r3]                    @ d0<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    and     r9, r9, #15                 @ r9<- A
+    ftosizd  s0, d0                              @ s0<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fsts    s0, [r9]                    @ vA<- s0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_long: /* 0x8b */
+/* File: arm/op_double_to_long.S */
+@include "arm/unopWide.S" {"instr":"bl      __aeabi_d2lz"}
+/* File: arm/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0/r1".
+     * This could be an ARM instruction or a function call.
+     *
+     * For: neg-long, not-long, neg-double, long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    bl      d2l_doconv                              @ r0/r1<- op, r2-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-11 instructions */
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_float: /* 0x8c */
+/* File: arm/op_double_to_float.S */
+/* File: arm/funopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit unary floating point operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op d0".
+     *
+     * For: double-to-int, double-to-float
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    fldd    d0, [r3]                    @ d0<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    and     r9, r9, #15                 @ r9<- A
+    fcvtsd  s0, d0                              @ s0<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fsts    s0, [r9]                    @ vA<- s0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_byte: /* 0x8d */
+/* File: arm/op_int_to_byte.S */
+/* File: arm/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    sxtb    r0, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_char: /* 0x8e */
+/* File: arm/op_int_to_char.S */
+/* File: arm/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    uxth    r0, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_short: /* 0x8f */
+/* File: arm/op_int_to_short.S */
+/* File: arm/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    sxth    r0, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int: /* 0x90 */
+/* File: arm/op_add_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    add     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int: /* 0x91 */
+/* File: arm/op_sub_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    sub     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int: /* 0x92 */
+/* File: arm/op_mul_int.S */
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    mul     r0, r1, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int: /* 0x93 */
+/* File: arm/op_div_int.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int
+     *
+     */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl    __aeabi_idiv                  @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int: /* 0x94 */
+/* File: arm/op_rem_int.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int
+     *
+     */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls  r1, r1, r2, r0                 @ r1<- op, r0-r2 changed
+#else
+    bl   __aeabi_idivmod                @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int: /* 0x95 */
+/* File: arm/op_and_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    and     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int: /* 0x96 */
+/* File: arm/op_or_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    orr     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int: /* 0x97 */
+/* File: arm/op_xor_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    eor     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int: /* 0x98 */
+/* File: arm/op_shl_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, asl r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int: /* 0x99 */
+/* File: arm/op_shr_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, asr r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int: /* 0x9a */
+/* File: arm/op_ushr_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, lsr r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long: /* 0x9b */
+/* File: arm/op_add_long.S */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    adds    r0, r0, r2                           @ optional op; may set condition codes
+    adc     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long: /* 0x9c */
+/* File: arm/op_sub_long.S */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    subs    r0, r0, r2                           @ optional op; may set condition codes
+    sbc     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long: /* 0x9d */
+/* File: arm/op_mul_long.S */
+    /*
+     * Signed 64-bit integer multiply.
+     *
+     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
+     *        WX
+     *      x YZ
+     *  --------
+     *     ZW ZX
+     *  YW YX
+     *
+     * The low word of the result holds ZX, the high word holds
+     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
+     * it doesn't fit in the low 64 bits.
+     *
+     * Unlike most ARM math operations, multiply instructions have
+     * restrictions on using the same register more than once (Rd and Rm
+     * cannot be the same).
+     */
+    /* mul-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    mul     ip, r2, r1                  @  ip<- ZxW
+    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
+    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    add     r0, rFP, r0, lsl #2         @ r0<- &fp[AA]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long: /* 0x9e */
+/* File: arm/op_div_long.S */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 1
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    bl      __aeabi_ldivmod                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long: /* 0x9f */
+/* File: arm/op_rem_long.S */
+/* ldivmod returns quotient in r0/r1 and remainder in r2/r3 */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 1
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    bl      __aeabi_ldivmod                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r2,r3}     @ vAA/vAA+1<- r2/r3
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long: /* 0xa0 */
+/* File: arm/op_and_long.S */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    and     r0, r0, r2                           @ optional op; may set condition codes
+    and     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long: /* 0xa1 */
+/* File: arm/op_or_long.S */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    orr     r0, r0, r2                           @ optional op; may set condition codes
+    orr     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long: /* 0xa2 */
+/* File: arm/op_xor_long.S */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    eor     r0, r0, r2                           @ optional op; may set condition codes
+    eor     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long: /* 0xa3 */
+/* File: arm/op_shl_long.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* shl-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r3, r0, #255                @ r3<- BB
+    mov     r0, r0, lsr #8              @ r0<- CC
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    GET_VREG r2, r0                     @ r2<- vCC
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+
+    mov     r1, r1, asl r2              @  r1<- r1 << r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long: /* 0xa4 */
+/* File: arm/op_shr_long.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* shr-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r3, r0, #255                @ r3<- BB
+    mov     r0, r0, lsr #8              @ r0<- CC
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    GET_VREG r2, r0                     @ r2<- vCC
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long: /* 0xa5 */
+/* File: arm/op_ushr_long.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* ushr-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r3, r0, #255                @ r3<- BB
+    mov     r0, r0, lsr #8              @ r0<- CC
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    GET_VREG r2, r0                     @ r2<- vCC
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float: /* 0xa6 */
+/* File: arm/op_add_float.S */
+/* File: arm/fbinop.S */
+    /*
+     * Generic 32-bit floating-point operation.  Provide an "instr" line that
+     * specifies an instruction that performs "s2 = s0 op s1".  Because we
+     * use the "softfp" ABI, this must be an instruction, not a function call.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    flds    s0, [r2]                    @ s0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    fadds   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float: /* 0xa7 */
+/* File: arm/op_sub_float.S */
+/* File: arm/fbinop.S */
+    /*
+     * Generic 32-bit floating-point operation.  Provide an "instr" line that
+     * specifies an instruction that performs "s2 = s0 op s1".  Because we
+     * use the "softfp" ABI, this must be an instruction, not a function call.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    flds    s0, [r2]                    @ s0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    fsubs   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float: /* 0xa8 */
+/* File: arm/op_mul_float.S */
+/* File: arm/fbinop.S */
+    /*
+     * Generic 32-bit floating-point operation.  Provide an "instr" line that
+     * specifies an instruction that performs "s2 = s0 op s1".  Because we
+     * use the "softfp" ABI, this must be an instruction, not a function call.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    flds    s0, [r2]                    @ s0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    fmuls   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float: /* 0xa9 */
+/* File: arm/op_div_float.S */
+/* File: arm/fbinop.S */
+    /*
+     * Generic 32-bit floating-point operation.  Provide an "instr" line that
+     * specifies an instruction that performs "s2 = s0 op s1".  Because we
+     * use the "softfp" ABI, this must be an instruction, not a function call.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    flds    s0, [r2]                    @ s0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    fdivs   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float: /* 0xaa */
+/* File: arm/op_rem_float.S */
+/* EABI doesn't define a float remainder function, but libm does */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    bl      fmodf                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double: /* 0xab */
+/* File: arm/op_add_double.S */
+/* File: arm/fbinopWide.S */
+    /*
+     * Generic 64-bit double-precision floating point binary operation.
+     * Provide an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * for: add-double, sub-double, mul-double, div-double
+     */
+    /* doubleop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    faddd   d2, d0, d1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double: /* 0xac */
+/* File: arm/op_sub_double.S */
+/* File: arm/fbinopWide.S */
+    /*
+     * Generic 64-bit double-precision floating point binary operation.
+     * Provide an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * for: add-double, sub-double, mul-double, div-double
+     */
+    /* doubleop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    fsubd   d2, d0, d1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double: /* 0xad */
+/* File: arm/op_mul_double.S */
+/* File: arm/fbinopWide.S */
+    /*
+     * Generic 64-bit double-precision floating point binary operation.
+     * Provide an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * for: add-double, sub-double, mul-double, div-double
+     */
+    /* doubleop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    fmuld   d2, d0, d1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double: /* 0xae */
+/* File: arm/op_div_double.S */
+/* File: arm/fbinopWide.S */
+    /*
+     * Generic 64-bit double-precision floating point binary operation.
+     * Provide an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * for: add-double, sub-double, mul-double, div-double
+     */
+    /* doubleop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    fdivd   d2, d0, d1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double: /* 0xaf */
+/* File: arm/op_rem_double.S */
+/* EABI doesn't define a double remainder function, but libm does */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    bl      fmod                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_2addr: /* 0xb0 */
+/* File: arm/op_add_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    add     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int_2addr: /* 0xb1 */
+/* File: arm/op_sub_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    sub     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_2addr: /* 0xb2 */
+/* File: arm/op_mul_int_2addr.S */
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    mul     r0, r1, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_2addr: /* 0xb3 */
+/* File: arm/op_div_int_2addr.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int/2addr
+     *
+     */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl       __aeabi_idiv               @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_2addr: /* 0xb4 */
+/* File: arm/op_rem_int_2addr.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int/2addr
+     *
+     */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls     r1, r1, r2, r0              @ r1<- op
+#else
+    bl      __aeabi_idivmod             @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_2addr: /* 0xb5 */
+/* File: arm/op_and_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    and     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_2addr: /* 0xb6 */
+/* File: arm/op_or_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    orr     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_2addr: /* 0xb7 */
+/* File: arm/op_xor_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    eor     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_2addr: /* 0xb8 */
+/* File: arm/op_shl_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, asl r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_2addr: /* 0xb9 */
+/* File: arm/op_shr_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, asr r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_2addr: /* 0xba */
+/* File: arm/op_ushr_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, lsr r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long_2addr: /* 0xbb */
+/* File: arm/op_add_long_2addr.S */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    adds    r0, r0, r2                           @ optional op; may set condition codes
+    adc     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long_2addr: /* 0xbc */
+/* File: arm/op_sub_long_2addr.S */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    subs    r0, r0, r2                           @ optional op; may set condition codes
+    sbc     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long_2addr: /* 0xbd */
+/* File: arm/op_mul_long_2addr.S */
+    /*
+     * Signed 64-bit integer multiply, "/2addr" version.
+     *
+     * See op_mul_long for an explanation.
+     *
+     * We get a little tight on registers, so to avoid looking up &fp[A]
+     * again we stuff it into rINST.
+     */
+    /* mul-long/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     rINST, rFP, r9, lsl #2      @ rINST<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   rINST, {r0-r1}              @ r0/r1<- vAA/vAA+1
+    mul     ip, r2, r1                  @  ip<- ZxW
+    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
+    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    mov     r0, rINST                   @ r0<- &fp[A] (free up rINST)
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long_2addr: /* 0xbe */
+/* File: arm/op_div_long_2addr.S */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 1
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    bl      __aeabi_ldivmod                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long_2addr: /* 0xbf */
+/* File: arm/op_rem_long_2addr.S */
+/* ldivmod returns quotient in r0/r1 and remainder in r2/r3 */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 1
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    bl      __aeabi_ldivmod                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r2,r3}     @ vAA/vAA+1<- r2/r3
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long_2addr: /* 0xc0 */
+/* File: arm/op_and_long_2addr.S */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    and     r0, r0, r2                           @ optional op; may set condition codes
+    and     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long_2addr: /* 0xc1 */
+/* File: arm/op_or_long_2addr.S */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    orr     r0, r0, r2                           @ optional op; may set condition codes
+    orr     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long_2addr: /* 0xc2 */
+/* File: arm/op_xor_long_2addr.S */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    eor     r0, r0, r2                           @ optional op; may set condition codes
+    eor     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long_2addr: /* 0xc3 */
+/* File: arm/op_shl_long_2addr.S */
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* shl-long/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r2, r3                     @ r2<- vB
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+
+    mov     r1, r1, asl r2              @  r1<- r1 << r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long_2addr: /* 0xc4 */
+/* File: arm/op_shr_long_2addr.S */
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* shr-long/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r2, r3                     @ r2<- vB
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long_2addr: /* 0xc5 */
+/* File: arm/op_ushr_long_2addr.S */
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* ushr-long/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r2, r3                     @ r2<- vB
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float_2addr: /* 0xc6 */
+/* File: arm/op_add_float_2addr.S */
+/* File: arm/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    and     r9, r9, #15                 @ r9<- A
+    flds    s1, [r3]                    @ s1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    flds    s0, [r9]                    @ s0<- vA
+
+    fadds   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float_2addr: /* 0xc7 */
+/* File: arm/op_sub_float_2addr.S */
+/* File: arm/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    and     r9, r9, #15                 @ r9<- A
+    flds    s1, [r3]                    @ s1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    flds    s0, [r9]                    @ s0<- vA
+
+    fsubs   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float_2addr: /* 0xc8 */
+/* File: arm/op_mul_float_2addr.S */
+/* File: arm/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    and     r9, r9, #15                 @ r9<- A
+    flds    s1, [r3]                    @ s1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    flds    s0, [r9]                    @ s0<- vA
+
+    fmuls   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float_2addr: /* 0xc9 */
+/* File: arm/op_div_float_2addr.S */
+/* File: arm/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    and     r9, r9, #15                 @ r9<- A
+    flds    s1, [r3]                    @ s1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    flds    s0, [r9]                    @ s0<- vA
+
+    fdivs   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float_2addr: /* 0xca */
+/* File: arm/op_rem_float_2addr.S */
+/* EABI doesn't define a float remainder function, but libm does */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    bl      fmodf                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double_2addr: /* 0xcb */
+/* File: arm/op_add_double_2addr.S */
+/* File: arm/fbinopWide2addr.S */
+    /*
+     * Generic 64-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+     *      div-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    and     r9, r9, #15                 @ r9<- A
+    fldd    d1, [r3]                    @ d1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    fldd    d0, [r9]                    @ d0<- vA
+
+    faddd   d2, d0, d1                              @ d2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double_2addr: /* 0xcc */
+/* File: arm/op_sub_double_2addr.S */
+/* File: arm/fbinopWide2addr.S */
+    /*
+     * Generic 64-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+     *      div-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    and     r9, r9, #15                 @ r9<- A
+    fldd    d1, [r3]                    @ d1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    fldd    d0, [r9]                    @ d0<- vA
+
+    fsubd   d2, d0, d1                              @ d2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double_2addr: /* 0xcd */
+/* File: arm/op_mul_double_2addr.S */
+/* File: arm/fbinopWide2addr.S */
+    /*
+     * Generic 64-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+     *      div-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    and     r9, r9, #15                 @ r9<- A
+    fldd    d1, [r3]                    @ d1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    fldd    d0, [r9]                    @ d0<- vA
+
+    fmuld   d2, d0, d1                              @ d2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double_2addr: /* 0xce */
+/* File: arm/op_div_double_2addr.S */
+/* File: arm/fbinopWide2addr.S */
+    /*
+     * Generic 64-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+     *      div-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    and     r9, r9, #15                 @ r9<- A
+    fldd    d1, [r3]                    @ d1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    fldd    d0, [r9]                    @ d0<- vA
+
+    fdivd   d2, d0, d1                              @ d2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double_2addr: /* 0xcf */
+/* File: arm/op_rem_double_2addr.S */
+/* EABI doesn't define a double remainder function, but libm does */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    bl      fmod                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit16: /* 0xd0 */
+/* File: arm/op_add_int_lit16.S */
+/* File: arm/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    add     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int: /* 0xd1 */
+/* File: arm/op_rsub_int.S */
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+/* File: arm/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    rsb     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit16: /* 0xd2 */
+/* File: arm/op_mul_int_lit16.S */
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+/* File: arm/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    mul     r0, r1, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit16: /* 0xd3 */
+/* File: arm/op_div_int_lit16.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int/lit16
+     *
+     */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl       __aeabi_idiv               @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit16: /* 0xd4 */
+/* File: arm/op_rem_int_lit16.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int/lit16
+     *
+     */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls     r1, r1, r2, r0              @ r1<- op
+#else
+    bl     __aeabi_idivmod              @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit16: /* 0xd5 */
+/* File: arm/op_and_int_lit16.S */
+/* File: arm/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    and     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit16: /* 0xd6 */
+/* File: arm/op_or_int_lit16.S */
+/* File: arm/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    orr     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit16: /* 0xd7 */
+/* File: arm/op_xor_int_lit16.S */
+/* File: arm/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    eor     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit8: /* 0xd8 */
+/* File: arm/op_add_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    add     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int_lit8: /* 0xd9 */
+/* File: arm/op_rsub_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    rsb     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit8: /* 0xda */
+/* File: arm/op_mul_int_lit8.S */
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    mul     r0, r1, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit8: /* 0xdb */
+/* File: arm/op_div_int_lit8.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int/lit8
+     *
+     */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl   __aeabi_idiv                   @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit8: /* 0xdc */
+/* File: arm/op_rem_int_lit8.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int/lit8
+     *
+     */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls     r1, r1, r2, r0              @ r1<- op
+#else
+    bl       __aeabi_idivmod            @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit8: /* 0xdd */
+/* File: arm/op_and_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    and     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit8: /* 0xde */
+/* File: arm/op_or_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    orr     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit8: /* 0xdf */
+/* File: arm/op_xor_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    eor     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_lit8: /* 0xe0 */
+/* File: arm/op_shl_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, asl r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_lit8: /* 0xe1 */
+/* File: arm/op_shr_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, asr r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_lit8: /* 0xe2 */
+/* File: arm/op_ushr_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, lsr r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_quick: /* 0xe3 */
+/* File: arm/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldr   r0, [r3, r1]                @ r0<- obj.field
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r2                     @ fp[A]<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide_quick: /* 0xe4 */
+/* File: arm/op_iget_wide_quick.S */
+    /* iget-wide-quick vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH ip, 1                         @ ip<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldrd    r0, [r3, ip]                @ r0<- obj.field (64 bits, aligned)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    add     r3, rFP, r2, lsl #2         @ r3<- &fp[A]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ fp[A]<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object_quick: /* 0xe5 */
+/* File: arm/op_iget_object_quick.S */
+    /* For: iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    EXPORT_PC
+    GET_VREG r0, r2                     @ r0<- object we're operating on
+    bl      artIGetObjectFromMterp      @ (obj, offset)
+    ldr     r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    PREFETCH_INST 2
+    cmp     r3, #0
+    bne     MterpPossibleException      @ bail out
+    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
+    ADVANCE 2                           @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_quick: /* 0xe6 */
+/* File: arm/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- fp[B], the object pointer
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    GET_VREG r0, r2                     @ r0<- fp[A]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    str     r0, [r3, r1]             @ obj.field<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide_quick: /* 0xe7 */
+/* File: arm/op_iput_wide_quick.S */
+    /* iput-wide-quick vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r3, 1                         @ r3<- field byte offset
+    GET_VREG r2, r2                     @ r2<- fp[B], the object pointer
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    cmp     r2, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    add     r0, rFP, r0, lsl #2         @ r0<- &fp[A]
+    ldmia   r0, {r0-r1}                 @ r0/r1<- fp[A]/fp[A+1]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    strd    r0, [r2, r3]                @ obj.field<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object_quick: /* 0xe8 */
+/* File: arm/op_iput_object_quick.S */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    bl      MterpIputObjectQuick
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_quick: /* 0xe9 */
+/* File: arm/op_invoke_virtual_quick.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuick
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeVirtualQuick
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range_quick: /* 0xea */
+/* File: arm/op_invoke_virtual_range_quick.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuickRange
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeVirtualQuickRange
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean_quick: /* 0xeb */
+/* File: arm/op_iput_boolean_quick.S */
+/* File: arm/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- fp[B], the object pointer
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    GET_VREG r0, r2                     @ r0<- fp[A]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    strb     r0, [r3, r1]             @ obj.field<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte_quick: /* 0xec */
+/* File: arm/op_iput_byte_quick.S */
+/* File: arm/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- fp[B], the object pointer
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    GET_VREG r0, r2                     @ r0<- fp[A]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    strb     r0, [r3, r1]             @ obj.field<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char_quick: /* 0xed */
+/* File: arm/op_iput_char_quick.S */
+/* File: arm/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- fp[B], the object pointer
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    GET_VREG r0, r2                     @ r0<- fp[A]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    strh     r0, [r3, r1]             @ obj.field<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short_quick: /* 0xee */
+/* File: arm/op_iput_short_quick.S */
+/* File: arm/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- fp[B], the object pointer
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    GET_VREG r0, r2                     @ r0<- fp[A]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    strh     r0, [r3, r1]             @ obj.field<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean_quick: /* 0xef */
+/* File: arm/op_iget_boolean_quick.S */
+/* File: arm/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldrb   r0, [r3, r1]                @ r0<- obj.field
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r2                     @ fp[A]<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte_quick: /* 0xf0 */
+/* File: arm/op_iget_byte_quick.S */
+/* File: arm/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldrsb   r0, [r3, r1]                @ r0<- obj.field
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r2                     @ fp[A]<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char_quick: /* 0xf1 */
+/* File: arm/op_iget_char_quick.S */
+/* File: arm/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldrh   r0, [r3, r1]                @ r0<- obj.field
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r2                     @ fp[A]<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short_quick: /* 0xf2 */
+/* File: arm/op_iget_short_quick.S */
+/* File: arm/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldrsh   r0, [r3, r1]                @ r0<- obj.field
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r2                     @ fp[A]<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_lambda: /* 0xf3 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f4: /* 0xf4 */
+/* File: arm/op_unused_f4.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_capture_variable: /* 0xf5 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_create_lambda: /* 0xf6 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_liberate_variable: /* 0xf7 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_box_lambda: /* 0xf8 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unbox_lambda: /* 0xf9 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fa: /* 0xfa */
+/* File: arm/op_unused_fa.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fb: /* 0xfb */
+/* File: arm/op_unused_fb.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fc: /* 0xfc */
+/* File: arm/op_unused_fc.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fd: /* 0xfd */
+/* File: arm/op_unused_fd.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fe: /* 0xfe */
+/* File: arm/op_unused_fe.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_ff: /* 0xff */
+/* File: arm/op_unused_ff.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+    .balign 128
+    .size   artMterpAsmInstructionStart, .-artMterpAsmInstructionStart
+    .global artMterpAsmInstructionEnd
+artMterpAsmInstructionEnd:
+
+/*
+ * ===========================================================================
+ *  Sister implementations
+ * ===========================================================================
+ */
+    .global artMterpAsmSisterStart
+    .type   artMterpAsmSisterStart, %function
+    .text
+    .balign 4
+artMterpAsmSisterStart:
+
+/* continuation for op_cmp_long */
+
+.Lop_cmp_long_less:
+    mvn     r1, #0                      @ r1<- -1
+    @ Want to cond code the next mov so we can avoid branch, but don't see it;
+    @ instead, we just replicate the tail end.
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+.Lop_cmp_long_greater:
+    mov     r1, #1                      @ r1<- 1
+    @ fall through to _finish
+
+.Lop_cmp_long_finish:
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* continuation for op_float_to_long */
+/*
+ * Convert the float in r0 to a long in r0/r1.
+ *
+ * We have to clip values to long min/max per the specification.  The
+ * expected common case is a "reasonable" value that converts directly
+ * to modest integer.  The EABI convert function isn't doing this for us.
+ */
+f2l_doconv:
+    stmfd   sp!, {r4, lr}
+    mov     r1, #0x5f000000             @ (float)maxlong
+    mov     r4, r0
+    bl      __aeabi_fcmpge              @ is arg >= maxlong?
+    cmp     r0, #0                      @ nonzero == yes
+    mvnne   r0, #0                      @ return maxlong (7fffffff)
+    mvnne   r1, #0x80000000
+    ldmnefd sp!, {r4, pc}
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, #0xdf000000             @ (float)minlong
+    bl      __aeabi_fcmple              @ is arg <= minlong?
+    cmp     r0, #0                      @ nonzero == yes
+    movne   r0, #0                      @ return minlong (80000000)
+    movne   r1, #0x80000000
+    ldmnefd sp!, {r4, pc}
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r4
+    bl      __aeabi_fcmpeq              @ is arg == self?
+    cmp     r0, #0                      @ zero == no
+    moveq   r1, #0                      @ return zero for NaN
+    ldmeqfd sp!, {r4, pc}
+
+    mov     r0, r4                      @ recover arg
+    bl      __aeabi_f2lz                @ convert float to long
+    ldmfd   sp!, {r4, pc}
+
+/* continuation for op_double_to_long */
+/*
+ * Convert the double in r0/r1 to a long in r0/r1.
+ *
+ * We have to clip values to long min/max per the specification.  The
+ * expected common case is a "reasonable" value that converts directly
+ * to modest integer.  The EABI convert function isn't doing this for us.
+ */
+d2l_doconv:
+    stmfd   sp!, {r4, r5, lr}           @ save regs
+    mov     r3, #0x43000000             @ maxlong, as a double (high word)
+    add     r3, #0x00e00000             @  0x43e00000
+    mov     r2, #0                      @ maxlong, as a double (low word)
+    sub     sp, sp, #4                  @ align for EABI
+    mov     r4, r0                      @ save a copy of r0
+    mov     r5, r1                      @  and r1
+    bl      __aeabi_dcmpge              @ is arg >= maxlong?
+    cmp     r0, #0                      @ nonzero == yes
+    mvnne   r0, #0                      @ return maxlong (7fffffffffffffff)
+    mvnne   r1, #0x80000000
+    bne     1f
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r5
+    mov     r3, #0xc3000000             @ minlong, as a double (high word)
+    add     r3, #0x00e00000             @  0xc3e00000
+    mov     r2, #0                      @ minlong, as a double (low word)
+    bl      __aeabi_dcmple              @ is arg <= minlong?
+    cmp     r0, #0                      @ nonzero == yes
+    movne   r0, #0                      @ return minlong (8000000000000000)
+    movne   r1, #0x80000000
+    bne     1f
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r5
+    mov     r2, r4                      @ compare against self
+    mov     r3, r5
+    bl      __aeabi_dcmpeq              @ is arg == self?
+    cmp     r0, #0                      @ zero == no
+    moveq   r1, #0                      @ return zero for NaN
+    beq     1f
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r5
+    bl      __aeabi_d2lz                @ convert double to long
+
+1:
+    add     sp, sp, #4
+    ldmfd   sp!, {r4, r5, pc}
+
+    .size   artMterpAsmSisterStart, .-artMterpAsmSisterStart
+    .global artMterpAsmSisterEnd
+artMterpAsmSisterEnd:
+
+
+    .global artMterpAsmAltInstructionStart
+    .type   artMterpAsmAltInstructionStart, %function
+    .text
+
+artMterpAsmAltInstructionStart = .L_ALT_op_nop
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_nop: /* 0x00 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (0 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move: /* 0x01 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (1 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_from16: /* 0x02 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (2 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_16: /* 0x03 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (3 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide: /* 0x04 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (4 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_from16: /* 0x05 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (5 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_16: /* 0x06 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (6 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object: /* 0x07 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (7 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_from16: /* 0x08 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (8 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_16: /* 0x09 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (9 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result: /* 0x0a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (10 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_wide: /* 0x0b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (11 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_object: /* 0x0c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (12 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_exception: /* 0x0d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (13 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void: /* 0x0e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (14 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return: /* 0x0f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (15 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_wide: /* 0x10 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (16 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_object: /* 0x11 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (17 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_4: /* 0x12 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (18 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_16: /* 0x13 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (19 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const: /* 0x14 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (20 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_high16: /* 0x15 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (21 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_16: /* 0x16 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (22 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_32: /* 0x17 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (23 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide: /* 0x18 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (24 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_high16: /* 0x19 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (25 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string: /* 0x1a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (26 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string_jumbo: /* 0x1b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (27 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_class: /* 0x1c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (28 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_enter: /* 0x1d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (29 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_exit: /* 0x1e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (30 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_check_cast: /* 0x1f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (31 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_instance_of: /* 0x20 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (32 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_array_length: /* 0x21 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (33 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_instance: /* 0x22 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (34 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_array: /* 0x23 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (35 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array: /* 0x24 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (36 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array_range: /* 0x25 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (37 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_fill_array_data: /* 0x26 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (38 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_throw: /* 0x27 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (39 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto: /* 0x28 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (40 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_16: /* 0x29 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (41 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_32: /* 0x2a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (42 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_packed_switch: /* 0x2b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (43 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sparse_switch: /* 0x2c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (44 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_float: /* 0x2d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (45 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_float: /* 0x2e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (46 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_double: /* 0x2f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (47 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_double: /* 0x30 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (48 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmp_long: /* 0x31 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (49 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eq: /* 0x32 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (50 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ne: /* 0x33 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (51 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lt: /* 0x34 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (52 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ge: /* 0x35 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (53 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gt: /* 0x36 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (54 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_le: /* 0x37 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (55 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eqz: /* 0x38 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (56 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_nez: /* 0x39 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (57 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ltz: /* 0x3a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (58 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gez: /* 0x3b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (59 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gtz: /* 0x3c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (60 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lez: /* 0x3d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (61 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3e: /* 0x3e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (62 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3f: /* 0x3f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (63 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_40: /* 0x40 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (64 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_41: /* 0x41 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (65 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_42: /* 0x42 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (66 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_43: /* 0x43 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (67 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget: /* 0x44 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (68 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_wide: /* 0x45 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (69 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_object: /* 0x46 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (70 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_boolean: /* 0x47 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (71 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_byte: /* 0x48 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (72 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_char: /* 0x49 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (73 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_short: /* 0x4a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (74 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput: /* 0x4b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (75 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_wide: /* 0x4c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (76 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_object: /* 0x4d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (77 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_boolean: /* 0x4e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (78 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_byte: /* 0x4f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (79 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_char: /* 0x50 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (80 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_short: /* 0x51 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (81 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget: /* 0x52 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (82 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide: /* 0x53 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (83 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object: /* 0x54 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (84 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean: /* 0x55 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (85 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte: /* 0x56 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (86 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char: /* 0x57 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (87 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short: /* 0x58 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (88 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput: /* 0x59 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (89 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide: /* 0x5a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (90 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object: /* 0x5b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (91 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean: /* 0x5c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (92 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte: /* 0x5d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (93 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char: /* 0x5e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (94 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short: /* 0x5f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (95 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget: /* 0x60 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (96 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_wide: /* 0x61 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (97 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_object: /* 0x62 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (98 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_boolean: /* 0x63 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (99 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_byte: /* 0x64 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (100 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_char: /* 0x65 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (101 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_short: /* 0x66 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (102 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput: /* 0x67 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (103 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_wide: /* 0x68 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (104 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_object: /* 0x69 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (105 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_boolean: /* 0x6a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (106 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_byte: /* 0x6b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (107 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_char: /* 0x6c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (108 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_short: /* 0x6d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (109 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual: /* 0x6e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (110 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super: /* 0x6f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (111 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct: /* 0x70 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (112 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static: /* 0x71 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (113 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface: /* 0x72 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (114 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void_no_barrier: /* 0x73 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (115 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range: /* 0x74 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (116 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super_range: /* 0x75 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (117 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct_range: /* 0x76 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (118 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static_range: /* 0x77 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (119 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface_range: /* 0x78 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (120 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_79: /* 0x79 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (121 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_7a: /* 0x7a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (122 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_int: /* 0x7b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (123 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_int: /* 0x7c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (124 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_long: /* 0x7d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (125 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_long: /* 0x7e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (126 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_float: /* 0x7f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (127 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_double: /* 0x80 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (128 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_long: /* 0x81 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (129 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_float: /* 0x82 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (130 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_double: /* 0x83 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (131 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_int: /* 0x84 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (132 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_float: /* 0x85 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (133 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_double: /* 0x86 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (134 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_int: /* 0x87 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (135 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_long: /* 0x88 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (136 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_double: /* 0x89 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (137 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_int: /* 0x8a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (138 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_long: /* 0x8b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (139 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_float: /* 0x8c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (140 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_byte: /* 0x8d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (141 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_char: /* 0x8e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (142 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_short: /* 0x8f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (143 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int: /* 0x90 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (144 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int: /* 0x91 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (145 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int: /* 0x92 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (146 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int: /* 0x93 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (147 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int: /* 0x94 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (148 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int: /* 0x95 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (149 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int: /* 0x96 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (150 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int: /* 0x97 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (151 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int: /* 0x98 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (152 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int: /* 0x99 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (153 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int: /* 0x9a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (154 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long: /* 0x9b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (155 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long: /* 0x9c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (156 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long: /* 0x9d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (157 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long: /* 0x9e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (158 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long: /* 0x9f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (159 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long: /* 0xa0 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (160 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long: /* 0xa1 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (161 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long: /* 0xa2 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (162 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long: /* 0xa3 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (163 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long: /* 0xa4 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (164 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long: /* 0xa5 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (165 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float: /* 0xa6 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (166 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float: /* 0xa7 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (167 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float: /* 0xa8 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (168 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float: /* 0xa9 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (169 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float: /* 0xaa */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (170 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double: /* 0xab */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (171 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double: /* 0xac */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (172 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double: /* 0xad */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (173 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double: /* 0xae */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (174 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double: /* 0xaf */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (175 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_2addr: /* 0xb0 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (176 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int_2addr: /* 0xb1 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (177 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_2addr: /* 0xb2 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (178 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_2addr: /* 0xb3 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (179 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_2addr: /* 0xb4 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (180 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_2addr: /* 0xb5 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (181 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_2addr: /* 0xb6 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (182 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_2addr: /* 0xb7 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (183 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_2addr: /* 0xb8 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (184 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_2addr: /* 0xb9 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (185 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_2addr: /* 0xba */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (186 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long_2addr: /* 0xbb */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (187 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long_2addr: /* 0xbc */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (188 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long_2addr: /* 0xbd */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (189 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long_2addr: /* 0xbe */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (190 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long_2addr: /* 0xbf */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (191 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long_2addr: /* 0xc0 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (192 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long_2addr: /* 0xc1 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (193 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long_2addr: /* 0xc2 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (194 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long_2addr: /* 0xc3 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (195 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long_2addr: /* 0xc4 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (196 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long_2addr: /* 0xc5 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (197 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float_2addr: /* 0xc6 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (198 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float_2addr: /* 0xc7 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (199 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float_2addr: /* 0xc8 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (200 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float_2addr: /* 0xc9 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (201 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float_2addr: /* 0xca */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (202 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double_2addr: /* 0xcb */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (203 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double_2addr: /* 0xcc */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (204 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double_2addr: /* 0xcd */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (205 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double_2addr: /* 0xce */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (206 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double_2addr: /* 0xcf */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (207 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit16: /* 0xd0 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (208 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int: /* 0xd1 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (209 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit16: /* 0xd2 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (210 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit16: /* 0xd3 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (211 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit16: /* 0xd4 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (212 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit16: /* 0xd5 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (213 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit16: /* 0xd6 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (214 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit16: /* 0xd7 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (215 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit8: /* 0xd8 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (216 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int_lit8: /* 0xd9 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (217 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit8: /* 0xda */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (218 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit8: /* 0xdb */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (219 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit8: /* 0xdc */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (220 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit8: /* 0xdd */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (221 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit8: /* 0xde */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (222 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit8: /* 0xdf */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (223 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_lit8: /* 0xe0 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (224 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_lit8: /* 0xe1 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (225 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_lit8: /* 0xe2 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (226 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_quick: /* 0xe3 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (227 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide_quick: /* 0xe4 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (228 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object_quick: /* 0xe5 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (229 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_quick: /* 0xe6 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (230 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide_quick: /* 0xe7 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (231 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object_quick: /* 0xe8 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (232 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_quick: /* 0xe9 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (233 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range_quick: /* 0xea */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (234 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean_quick: /* 0xeb */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (235 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte_quick: /* 0xec */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (236 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char_quick: /* 0xed */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (237 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short_quick: /* 0xee */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (238 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean_quick: /* 0xef */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (239 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte_quick: /* 0xf0 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (240 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char_quick: /* 0xf1 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (241 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short_quick: /* 0xf2 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (242 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_lambda: /* 0xf3 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (243 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f4: /* 0xf4 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (244 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_capture_variable: /* 0xf5 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (245 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_create_lambda: /* 0xf6 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (246 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_liberate_variable: /* 0xf7 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (247 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_box_lambda: /* 0xf8 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (248 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unbox_lambda: /* 0xf9 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (249 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fa: /* 0xfa */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (250 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fb: /* 0xfb */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (251 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fc: /* 0xfc */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (252 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fd: /* 0xfd */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (253 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fe: /* 0xfe */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (254 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_ff: /* 0xff */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (255 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+    .balign 128
+    .size   artMterpAsmAltInstructionStart, .-artMterpAsmAltInstructionStart
+    .global artMterpAsmAltInstructionEnd
+artMterpAsmAltInstructionEnd:
+/* File: arm/footer.S */
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align  2
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+#define MTERP_LOGGING 0
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogDivideByZeroException
+#endif
+    b MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogArrayIndexException
+#endif
+    b MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNegativeArraySizeException
+#endif
+    b MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNoSuchMethodException
+#endif
+    b MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNullObjectException
+#endif
+    b MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogExceptionThrownException
+#endif
+    b MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    ldr  r2, [rSELF, #THREAD_FLAGS_OFFSET]
+    bl MterpLogSuspendFallback
+#endif
+    b MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    ldr     r0, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    cmp     r0, #0                                  @ Exception pending?
+    beq     MterpFallback                           @ If not, fall back to reference interpreter.
+    /* intentional fallthrough - handle pending exception. */
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    bl      MterpHandleException                    @ (self, shadow_frame)
+    cmp     r0, #0
+    beq     MterpExceptionReturn                    @ no local catch, back to caller.
+    ldr     r0, [rFP, #OFF_FP_CODE_ITEM]
+    ldr     r1, [rFP, #OFF_FP_DEX_PC]
+    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
+    add     rPC, r0, #CODEITEM_INSNS_OFFSET
+    add     rPC, rPC, r1, lsl #1                    @ generate new dex_pc_ptr
+    str     rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+    /* resume execution at catch block */
+    FETCH_INST
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+    /* NOTE: no fallthrough */
+
+/*
+ * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
+ */
+MterpCheckSuspendAndContinue:
+    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    EXPORT_PC
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck           @ (self)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogFallback
+#endif
+MterpCommonFallback:
+    mov     r0, #0                                  @ signal retry with reference interpreter.
+    b       MterpDone
+
+/*
+ * We pushed some registers on the stack in ExecuteMterpImpl, then saved
+ * SP and LR.  Here we restore SP, restore the registers, and then restore
+ * LR to PC.
+ *
+ * On entry:
+ *  uint32_t* rFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    mov     r0, #1                                  @ signal return to caller.
+    b MterpDone
+MterpReturn:
+    ldr     r2, [rFP, #OFF_FP_RESULT_REGISTER]
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    str     r0, [r2]
+    str     r1, [r2, #4]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
+    mov     r0, #1                                  @ signal return to caller.
+MterpDone:
+    add     sp, sp, #4                              @ un-align 64
+    ldmfd   sp!, {r4-r10,fp,pc}                     @ restore 9 regs and return
+
+
+    .fnend
+    .size   ExecuteMterpImpl, .-ExecuteMterpImpl
+
+
diff --git a/runtime/interpreter/mterp/rebuild.sh b/runtime/interpreter/mterp/rebuild.sh
new file mode 100755
index 0000000..a325fff
--- /dev/null
+++ b/runtime/interpreter/mterp/rebuild.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Rebuild for all known targets.  Necessary until the stuff in "out" gets
+# generated as part of the build.
+#
+set -e
+
+# for arch in arm x86 mips arm64 x86_64 mips64; do TARGET_ARCH_EXT=$arch make -f Makefile_mterp; done
+for arch in arm; do TARGET_ARCH_EXT=$arch make -f Makefile_mterp; done
diff --git a/runtime/jit/debugger_interface.cc b/runtime/jit/debugger_interface.cc
index 3c2898b..f08a1a9 100644
--- a/runtime/jit/debugger_interface.cc
+++ b/runtime/jit/debugger_interface.cc
@@ -16,6 +16,13 @@
 
 #include "debugger_interface.h"
 
+#include "base/logging.h"
+#include "base/mutex.h"
+#include "thread-inl.h"
+#include "thread.h"
+
+#include <unordered_map>
+
 namespace art {
 
 // -------------------------------------------------------------------
@@ -57,13 +64,19 @@
   JITDescriptor __jit_debug_descriptor = { 1, JIT_NOACTION, nullptr, nullptr };
 }
 
-JITCodeEntry* CreateJITCodeEntry(const uint8_t *symfile_addr, uintptr_t symfile_size) {
+static Mutex g_jit_debug_mutex("JIT debug interface lock", kJitDebugInterfaceLock);
+
+static JITCodeEntry* CreateJITCodeEntryInternal(
+    std::unique_ptr<const uint8_t[]> symfile_addr,
+    uintptr_t symfile_size)
+    REQUIRES(g_jit_debug_mutex) {
+  DCHECK(symfile_addr.get() != nullptr);
+
   JITCodeEntry* entry = new JITCodeEntry;
-  entry->symfile_addr_ = symfile_addr;
+  entry->symfile_addr_ = symfile_addr.release();
   entry->symfile_size_ = symfile_size;
   entry->prev_ = nullptr;
 
-  // TODO: Do we need a lock here?
   entry->next_ = __jit_debug_descriptor.first_entry_;
   if (entry->next_ != nullptr) {
     entry->next_->prev_ = entry;
@@ -76,8 +89,7 @@
   return entry;
 }
 
-void DeleteJITCodeEntry(JITCodeEntry* entry) {
-  // TODO: Do we need a lock here?
+static void DeleteJITCodeEntryInternal(JITCodeEntry* entry) REQUIRES(g_jit_debug_mutex) {
   if (entry->prev_ != nullptr) {
     entry->prev_->next_ = entry->next_;
   } else {
@@ -91,7 +103,48 @@
   __jit_debug_descriptor.relevant_entry_ = entry;
   __jit_debug_descriptor.action_flag_ = JIT_UNREGISTER_FN;
   __jit_debug_register_code();
+  delete[] entry->symfile_addr_;
   delete entry;
 }
 
+JITCodeEntry* CreateJITCodeEntry(std::unique_ptr<const uint8_t[]> symfile_addr,
+                                 uintptr_t symfile_size) {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, g_jit_debug_mutex);
+  return CreateJITCodeEntryInternal(std::move(symfile_addr), symfile_size);
+}
+
+void DeleteJITCodeEntry(JITCodeEntry* entry) {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, g_jit_debug_mutex);
+  DeleteJITCodeEntryInternal(entry);
+}
+
+// Mapping from address to entry.  It takes ownership of the entries
+// so that the user of the JIT interface does not have to store them.
+static std::unordered_map<uintptr_t, JITCodeEntry*> g_jit_code_entries;
+
+void CreateJITCodeEntryForAddress(uintptr_t address,
+                                  std::unique_ptr<const uint8_t[]> symfile_addr,
+                                  uintptr_t symfile_size) {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, g_jit_debug_mutex);
+  DCHECK_NE(address, 0u);
+  DCHECK(g_jit_code_entries.find(address) == g_jit_code_entries.end());
+  JITCodeEntry* entry = CreateJITCodeEntryInternal(std::move(symfile_addr), symfile_size);
+  g_jit_code_entries.emplace(address, entry);
+}
+
+bool DeleteJITCodeEntryForAddress(uintptr_t address) {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, g_jit_debug_mutex);
+  const auto& it = g_jit_code_entries.find(address);
+  if (it == g_jit_code_entries.end()) {
+    return false;
+  }
+  DeleteJITCodeEntryInternal(it->second);
+  g_jit_code_entries.erase(it);
+  return true;
+}
+
 }  // namespace art
diff --git a/runtime/jit/debugger_interface.h b/runtime/jit/debugger_interface.h
index a784ef5..74469a9 100644
--- a/runtime/jit/debugger_interface.h
+++ b/runtime/jit/debugger_interface.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_JIT_DEBUGGER_INTERFACE_H_
 
 #include <inttypes.h>
+#include <memory>
 
 namespace art {
 
@@ -26,11 +27,25 @@
 }
 
 // Notify native debugger about new JITed code by passing in-memory ELF.
-JITCodeEntry* CreateJITCodeEntry(const uint8_t *symfile_addr, uintptr_t symfile_size);
+// It takes ownership of the in-memory ELF file.
+JITCodeEntry* CreateJITCodeEntry(std::unique_ptr<const uint8_t[]> symfile_addr,
+                                 uintptr_t symfile_size);
 
 // Notify native debugger that JITed code has been removed.
+// It also releases the associated in-memory ELF file.
 void DeleteJITCodeEntry(JITCodeEntry* entry);
 
+// Notify native debugger about new JITed code by passing in-memory ELF.
+// The address is used only to uniquely identify the entry.
+// It takes ownership of the in-memory ELF file.
+void CreateJITCodeEntryForAddress(uintptr_t address,
+                                  std::unique_ptr<const uint8_t[]> symfile_addr,
+                                  uintptr_t symfile_size);
+
+// Notify native debugger that JITed code has been removed.
+// Returns false if entry for the given address was not found.
+bool DeleteJITCodeEntryForAddress(uintptr_t address);
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_JIT_DEBUGGER_INTERFACE_H_
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index ab70f4c..8f4d24f 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -56,7 +56,8 @@
   os << "JIT code cache size=" << PrettySize(code_cache_->CodeCacheSize()) << "\n"
      << "JIT data cache size=" << PrettySize(code_cache_->DataCacheSize()) << "\n"
      << "JIT current capacity=" << PrettySize(code_cache_->GetCurrentCapacity()) << "\n"
-     << "JIT number of compiled code=" << code_cache_->NumberOfCompiledCode() << "\n";
+     << "JIT number of compiled code=" << code_cache_->NumberOfCompiledCode() << "\n"
+     << "JIT total number of compilations=" << code_cache_->NumberOfCompilations() << "\n";
   cumulative_timings_.Dump(os);
 }
 
@@ -64,10 +65,14 @@
   cumulative_timings_.AddLogger(logger);
 }
 
-Jit::Jit()
-    : jit_library_handle_(nullptr), jit_compiler_handle_(nullptr), jit_load_(nullptr),
-      jit_compile_method_(nullptr), dump_info_on_shutdown_(false),
-      cumulative_timings_("JIT timings"), save_profiling_info_(false) {
+Jit::Jit() : jit_library_handle_(nullptr),
+             jit_compiler_handle_(nullptr),
+             jit_load_(nullptr),
+             jit_compile_method_(nullptr),
+             dump_info_on_shutdown_(false),
+             cumulative_timings_("JIT timings"),
+             save_profiling_info_(false),
+             generate_debug_info_(false) {
 }
 
 Jit* Jit::Create(JitOptions* options, std::string* error_msg) {
@@ -77,7 +82,10 @@
     return nullptr;
   }
   jit->code_cache_.reset(JitCodeCache::Create(
-      options->GetCodeCacheInitialCapacity(), options->GetCodeCacheMaxCapacity(), error_msg));
+      options->GetCodeCacheInitialCapacity(),
+      options->GetCodeCacheMaxCapacity(),
+      jit->generate_debug_info_,
+      error_msg));
   if (jit->GetCodeCache() == nullptr) {
     return nullptr;
   }
@@ -99,7 +107,7 @@
     *error_msg = oss.str();
     return false;
   }
-  jit_load_ = reinterpret_cast<void* (*)(CompilerCallbacks**)>(
+  jit_load_ = reinterpret_cast<void* (*)(CompilerCallbacks**, bool*)>(
       dlsym(jit_library_handle_, "jit_load"));
   if (jit_load_ == nullptr) {
     dlclose(jit_library_handle_);
@@ -120,10 +128,18 @@
     *error_msg = "JIT couldn't find jit_compile_method entry point";
     return false;
   }
+  jit_types_loaded_ = reinterpret_cast<void (*)(void*, mirror::Class**, size_t)>(
+      dlsym(jit_library_handle_, "jit_types_loaded"));
+  if (jit_types_loaded_ == nullptr) {
+    dlclose(jit_library_handle_);
+    *error_msg = "JIT couldn't find jit_types_loaded entry point";
+    return false;
+  }
   CompilerCallbacks* callbacks = nullptr;
+  bool will_generate_debug_symbols = false;
   VLOG(jit) << "Calling JitLoad interpreter_only="
       << Runtime::Current()->GetInstrumentation()->InterpretOnly();
-  jit_compiler_handle_ = (jit_load_)(&callbacks);
+  jit_compiler_handle_ = (jit_load_)(&callbacks, &will_generate_debug_symbols);
   if (jit_compiler_handle_ == nullptr) {
     dlclose(jit_library_handle_);
     *error_msg = "JIT couldn't load compiler";
@@ -136,6 +152,7 @@
     return false;
   }
   compiler_callbacks_ = callbacks;
+  generate_debug_info_ = will_generate_debug_symbols;
   return true;
 }
 
@@ -205,5 +222,31 @@
       new jit::JitInstrumentationCache(compile_threshold, warmup_threshold));
 }
 
+void Jit::NewTypeLoadedIfUsingJit(mirror::Class* type) {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr && jit->generate_debug_info_) {
+    DCHECK(jit->jit_types_loaded_ != nullptr);
+    jit->jit_types_loaded_(jit->jit_compiler_handle_, &type, 1);
+  }
+}
+
+void Jit::DumpTypeInfoForLoadedTypes(ClassLinker* linker) {
+  struct CollectClasses : public ClassVisitor {
+    bool Visit(mirror::Class* klass) override {
+      classes_.push_back(klass);
+      return true;
+    }
+    std::vector<mirror::Class*> classes_;
+  };
+
+  if (generate_debug_info_) {
+    ScopedObjectAccess so(Thread::Current());
+
+    CollectClasses visitor;
+    linker->VisitClasses(&visitor);
+    jit_types_loaded_(jit_compiler_handle_, visitor.classes_.data(), visitor.classes_.size());
+  }
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 0edce2f..429edf6 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -79,6 +79,13 @@
     DumpInfo(os);
   }
 
+  static void NewTypeLoadedIfUsingJit(mirror::Class* type)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // If debug info generation is turned on then write the type information for types already loaded
+  // into the specified class linker to the jit debug interface,
+  void DumpTypeInfoForLoadedTypes(ClassLinker* linker);
+
  private:
   Jit();
   bool LoadCompiler(std::string* error_msg);
@@ -86,9 +93,10 @@
   // JIT compiler
   void* jit_library_handle_;
   void* jit_compiler_handle_;
-  void* (*jit_load_)(CompilerCallbacks**);
+  void* (*jit_load_)(CompilerCallbacks**, bool*);
   void (*jit_unload_)(void*);
   bool (*jit_compile_method_)(void*, ArtMethod*, Thread*);
+  void (*jit_types_loaded_)(void*, mirror::Class**, size_t count);
 
   // Performance monitoring.
   bool dump_info_on_shutdown_;
@@ -99,6 +107,7 @@
   CompilerCallbacks* compiler_callbacks_;  // Owned by the jit compiler.
 
   bool save_profiling_info_;
+  bool generate_debug_info_;
 
   DISALLOW_COPY_AND_ASSIGN(Jit);
 };
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index c260ca4..64b2c89 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -21,6 +21,7 @@
 #include "art_method-inl.h"
 #include "base/stl_util.h"
 #include "base/time_utils.h"
+#include "debugger_interface.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/bitmap-inl.h"
 #include "jit/profiling_info.h"
@@ -48,8 +49,16 @@
 
 JitCodeCache* JitCodeCache::Create(size_t initial_capacity,
                                    size_t max_capacity,
+                                   bool generate_debug_info,
                                    std::string* error_msg) {
   CHECK_GE(max_capacity, initial_capacity);
+
+  // Generating debug information is mostly for using the 'perf' tool, which does
+  // not work with ashmem.
+  bool use_ashmem = !generate_debug_info;
+  // With 'perf', we want a 1-1 mapping between an address and a method.
+  bool garbage_collect_code = !generate_debug_info;
+
   // We need to have 32 bit offsets from method headers in code cache which point to things
   // in the data cache. If the maps are more than 4G apart, having multiple maps wouldn't work.
   // Ensure we're below 1 GB to be safe.
@@ -64,7 +73,7 @@
   std::string error_str;
   // Map name specific for android_os_Debug.cpp accounting.
   MemMap* data_map = MemMap::MapAnonymous(
-    "data-code-cache", nullptr, max_capacity, kProtAll, false, false, &error_str);
+      "data-code-cache", nullptr, max_capacity, kProtAll, false, false, &error_str, use_ashmem);
   if (data_map == nullptr) {
     std::ostringstream oss;
     oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity;
@@ -83,7 +92,8 @@
   DCHECK_EQ(code_size + data_size, max_capacity);
   uint8_t* divider = data_map->Begin() + data_size;
 
-  MemMap* code_map = data_map->RemapAtEnd(divider, "jit-code-cache", kProtAll, &error_str);
+  MemMap* code_map =
+      data_map->RemapAtEnd(divider, "jit-code-cache", kProtAll, &error_str, use_ashmem);
   if (code_map == nullptr) {
     std::ostringstream oss;
     oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity;
@@ -94,14 +104,16 @@
   data_size = initial_capacity / 2;
   code_size = initial_capacity - data_size;
   DCHECK_EQ(code_size + data_size, initial_capacity);
-  return new JitCodeCache(code_map, data_map, code_size, data_size, max_capacity);
+  return new JitCodeCache(
+      code_map, data_map, code_size, data_size, max_capacity, garbage_collect_code);
 }
 
 JitCodeCache::JitCodeCache(MemMap* code_map,
                            MemMap* data_map,
                            size_t initial_code_capacity,
                            size_t initial_data_capacity,
-                           size_t max_capacity)
+                           size_t max_capacity,
+                           bool garbage_collect_code)
     : lock_("Jit code cache", kJitCodeCacheLock),
       lock_cond_("Jit code cache variable", lock_),
       collection_in_progress_(false),
@@ -112,8 +124,11 @@
       code_end_(initial_code_capacity),
       data_end_(initial_data_capacity),
       has_done_one_collection_(false),
-      last_update_time_ns_(0) {
+      last_update_time_ns_(0),
+      garbage_collect_code_(garbage_collect_code),
+      number_of_compilations_(0) {
 
+  DCHECK_GE(max_capacity, initial_code_capacity + initial_data_capacity);
   code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_end_, false /*locked*/);
   data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_end_, false /*locked*/);
 
@@ -215,6 +230,9 @@
   uintptr_t allocation = FromCodeToAllocation(code_ptr);
   const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
   const uint8_t* data = method_header->GetNativeGcMap();
+  // Notify native debugger that we are about to remove the code.
+  // It does nothing if we are not using native debugger.
+  DeleteJITCodeEntryForAddress(reinterpret_cast<uintptr_t>(code_ptr));
   if (data != nullptr) {
     mspace_free(data_mspace_, const_cast<uint8_t*>(data));
   }
@@ -305,6 +323,7 @@
 
     __builtin___clear_cache(reinterpret_cast<char*>(code_ptr),
                             reinterpret_cast<char*>(code_ptr + code_size));
+    number_of_compilations_++;
   }
   // We need to update the entry point in the runnable state for the instrumentation.
   {
@@ -330,6 +349,11 @@
   return reinterpret_cast<uint8_t*>(method_header);
 }
 
+size_t JitCodeCache::NumberOfCompilations() {
+  MutexLock mu(Thread::Current(), lock_);
+  return number_of_compilations_;
+}
+
 size_t JitCodeCache::CodeCacheSize() {
   MutexLock mu(Thread::Current(), lock_);
   return CodeCacheSizeLocked();
@@ -512,7 +536,11 @@
   // we hold the lock.
   {
     MutexLock mu(self, lock_);
-    if (has_done_one_collection_ && IncreaseCodeCacheCapacity()) {
+    if (!garbage_collect_code_) {
+      IncreaseCodeCacheCapacity();
+      NotifyCollectionDone(self);
+      return;
+    } else if (has_done_one_collection_ && IncreaseCodeCacheCapacity()) {
       has_done_one_collection_ = false;
       NotifyCollectionDone(self);
       return;
@@ -653,9 +681,7 @@
   size_t profile_info_size = RoundUp(
       sizeof(ProfilingInfo) + sizeof(InlineCache) * entries.size(),
       sizeof(void*));
-  ScopedThreadSuspension sts(self, kSuspended);
   MutexLock mu(self, lock_);
-  WaitForPotentialCollectionToComplete(self);
 
   // Check whether some other thread has concurrently created it.
   ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
@@ -726,5 +752,10 @@
   info->SetIsMethodBeingCompiled(false);
 }
 
+size_t JitCodeCache::GetMemorySizeOfCodePointer(const void* ptr) {
+  MutexLock mu(Thread::Current(), lock_);
+  return mspace_usable_size(reinterpret_cast<const void*>(FromCodeToAllocation(ptr)));
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 1c842e4..67fa928 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -53,7 +53,10 @@
 
   // Create the code cache with a code + data capacity equal to "capacity", error message is passed
   // in the out arg error_msg.
-  static JitCodeCache* Create(size_t initial_capacity, size_t max_capacity, std::string* error_msg);
+  static JitCodeCache* Create(size_t initial_capacity,
+                              size_t max_capacity,
+                              bool generate_debug_info,
+                              std::string* error_msg);
 
   // Number of bytes allocated in the code cache.
   size_t CodeCacheSize() REQUIRES(!lock_);
@@ -65,6 +68,9 @@
   // of methods that got JIT compiled, as we might have collected some.
   size_t NumberOfCompiledCode() REQUIRES(!lock_);
 
+  // Number of compilations done throughout the lifetime of the JIT.
+  size_t NumberOfCompilations() REQUIRES(!lock_);
+
   bool NotifyCompilationOf(ArtMethod* method, Thread* self)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!lock_);
@@ -159,13 +165,16 @@
     return current_capacity_;
   }
 
+  size_t GetMemorySizeOfCodePointer(const void* ptr) REQUIRES(!lock_);
+
  private:
   // Take ownership of maps.
   JitCodeCache(MemMap* code_map,
                MemMap* data_map,
                size_t initial_code_capacity,
                size_t initial_data_capacity,
-               size_t max_capacity);
+               size_t max_capacity,
+               bool garbage_collect_code);
 
   // Internal version of 'CommitCode' that will not retry if the
   // allocation fails. Return null if the allocation fails.
@@ -252,6 +261,12 @@
   // It is atomic to avoid locking when reading it.
   Atomic<uint64_t> last_update_time_ns_;
 
+  // Whether we can do garbage collection.
+  const bool garbage_collect_code_;
+
+  // Number of compilations done throughout the lifetime of the JIT.
+  size_t number_of_compilations_ GUARDED_BY(lock_);
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache);
 };
 
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index 6531325..4cbaf2c 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -177,9 +177,8 @@
                                                           ArtMethod* caller,
                                                           uint32_t dex_pc,
                                                           ArtMethod* callee ATTRIBUTE_UNUSED) {
-  instrumentation_cache_->AddSamples(thread, caller, 1);
   // We make sure we cannot be suspended, as the profiling info can be concurrently deleted.
-  thread->StartAssertNoThreadSuspension("Instrumenting invoke");
+  instrumentation_cache_->AddSamples(thread, caller, 1);
   DCHECK(this_object != nullptr);
   ProfilingInfo* info = caller->GetProfilingInfo(sizeof(void*));
   if (info != nullptr) {
@@ -188,7 +187,6 @@
     Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(caller->GetDeclaringClass());
     info->AddInvokeInfo(dex_pc, this_object->GetClass());
   }
-  thread->EndAssertNoThreadSuspension(nullptr);
 }
 
 void JitInstrumentationCache::WaitForCompilationToFinish(Thread* self) {
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
index 1f96d59..15969e4 100644
--- a/runtime/jit/jit_instrumentation.h
+++ b/runtime/jit/jit_instrumentation.h
@@ -78,7 +78,9 @@
                                 ArtMethod* caller,
                                 uint32_t dex_pc,
                                 ArtMethod* callee)
-      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
+      OVERRIDE
+      REQUIRES(Roles::uninterruptible_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   static constexpr uint32_t kJitEvents =
       instrumentation::Instrumentation::kMethodEntered |
diff --git a/runtime/jni_env_ext.cc b/runtime/jni_env_ext.cc
index aa25f67..1ee1611 100644
--- a/runtime/jni_env_ext.cc
+++ b/runtime/jni_env_ext.cc
@@ -59,6 +59,7 @@
       local_ref_cookie(IRT_FIRST_SEGMENT),
       locals(kLocalsInitial, kLocalsMax, kLocal, false),
       check_jni(false),
+      runtime_deleted(false),
       critical(0),
       monitors("monitors", kMonitorsInitial, kMonitorsMax) {
   functions = unchecked_functions = GetJniNativeInterface();
@@ -67,6 +68,11 @@
   }
 }
 
+void JNIEnvExt::SetFunctionsToRuntimeShutdownFunctions() {
+  functions = GetRuntimeShutdownNativeInterface();
+  runtime_deleted = true;
+}
+
 JNIEnvExt::~JNIEnvExt() {
 }
 
diff --git a/runtime/jni_env_ext.h b/runtime/jni_env_ext.h
index 2f8decf..d4accc3 100644
--- a/runtime/jni_env_ext.h
+++ b/runtime/jni_env_ext.h
@@ -74,6 +74,9 @@
   // Frequently-accessed fields cached from JavaVM.
   bool check_jni;
 
+  // If we are a JNI env for a daemon thread with a deleted runtime.
+  bool runtime_deleted;
+
   // How many nested "critical" JNI calls are we in?
   int critical;
 
@@ -95,6 +98,9 @@
   // Check that no monitors are held that have been acquired in this JNI "segment."
   void CheckNoHeldMonitors() SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Set the functions to the runtime shutdown functions.
+  void SetFunctionsToRuntimeShutdownFunctions();
+
  private:
   // The constructor should not be called directly. It may leave the object in an erronuous state,
   // and the result needs to be checked.
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index cb67ee3..c893a0f 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -2734,6 +2734,246 @@
   return &gJniNativeInterface;
 }
 
+void (*gJniSleepForeverStub[])()  = {
+  nullptr,  // reserved0.
+  nullptr,  // reserved1.
+  nullptr,  // reserved2.
+  nullptr,  // reserved3.
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+};
+
+const JNINativeInterface* GetRuntimeShutdownNativeInterface() {
+  return reinterpret_cast<JNINativeInterface*>(&gJniSleepForeverStub);
+}
+
 void RegisterNativeMethods(JNIEnv* env, const char* jni_class_name, const JNINativeMethod* methods,
                            jint method_count) {
   ScopedLocalRef<jclass> c(env, env->FindClass(jni_class_name));
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index 48b10f5..3429962 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -30,6 +30,7 @@
 namespace art {
 
 const JNINativeInterface* GetJniNativeInterface();
+const JNINativeInterface* GetRuntimeShutdownNativeInterface();
 
 // Similar to RegisterNatives except its passed a descriptor for a class name and failures are
 // fatal.
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index e133847..3571edb 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -34,14 +34,11 @@
 #include "thread-inl.h"
 #include "utils.h"
 
-#define USE_ASHMEM 1
-
-#ifdef USE_ASHMEM
 #include <cutils/ashmem.h>
+
 #ifndef ANDROID_OS
 #include <sys/resource.h>
 #endif
-#endif
 
 #ifndef MAP_ANONYMOUS
 #define MAP_ANONYMOUS MAP_ANON
@@ -282,7 +279,8 @@
                              int prot,
                              bool low_4gb,
                              bool reuse,
-                             std::string* error_msg) {
+                             std::string* error_msg,
+                             bool use_ashmem) {
 #ifndef __LP64__
   UNUSED(low_4gb);
 #endif
@@ -303,17 +301,17 @@
 
   ScopedFd fd(-1);
 
-#ifdef USE_ASHMEM
-#ifdef __ANDROID__
-  const bool use_ashmem = true;
-#else
-  // When not on Android ashmem is faked using files in /tmp. Ensure that such files won't
-  // fail due to ulimit restrictions. If they will then use a regular mmap.
-  struct rlimit rlimit_fsize;
-  CHECK_EQ(getrlimit(RLIMIT_FSIZE, &rlimit_fsize), 0);
-  const bool use_ashmem = (rlimit_fsize.rlim_cur == RLIM_INFINITY) ||
-      (page_aligned_byte_count < rlimit_fsize.rlim_cur);
-#endif
+  if (use_ashmem) {
+    if (!kIsTargetBuild) {
+      // When not on Android ashmem is faked using files in /tmp. Ensure that such files won't
+      // fail due to ulimit restrictions. If they will then use a regular mmap.
+      struct rlimit rlimit_fsize;
+      CHECK_EQ(getrlimit(RLIMIT_FSIZE, &rlimit_fsize), 0);
+      use_ashmem = (rlimit_fsize.rlim_cur == RLIM_INFINITY) ||
+        (page_aligned_byte_count < rlimit_fsize.rlim_cur);
+    }
+  }
+
   if (use_ashmem) {
     // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are
     // prefixed "dalvik-".
@@ -326,7 +324,6 @@
     }
     flags &= ~MAP_ANONYMOUS;
   }
-#endif
 
   // We need to store and potentially set an error number for pretty printing of errors
   int saved_errno = 0;
@@ -508,7 +505,7 @@
 }
 
 MemMap* MemMap::RemapAtEnd(uint8_t* new_end, const char* tail_name, int tail_prot,
-                           std::string* error_msg) {
+                           std::string* error_msg, bool use_ashmem) {
   DCHECK_GE(new_end, Begin());
   DCHECK_LE(new_end, End());
   DCHECK_LE(begin_ + size_, reinterpret_cast<uint8_t*>(base_begin_) + base_size_);
@@ -532,23 +529,22 @@
   DCHECK_EQ(tail_base_begin + tail_base_size, old_base_end);
   DCHECK_ALIGNED(tail_base_size, kPageSize);
 
-#ifdef USE_ASHMEM
-  // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are
-  // prefixed "dalvik-".
-  std::string debug_friendly_name("dalvik-");
-  debug_friendly_name += tail_name;
-  ScopedFd fd(ashmem_create_region(debug_friendly_name.c_str(), tail_base_size));
-  int flags = MAP_PRIVATE | MAP_FIXED;
-  if (fd.get() == -1) {
-    *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s",
-                              tail_name, strerror(errno));
-    return nullptr;
-  }
-#else
-  ScopedFd fd(-1);
+  int int_fd = -1;
   int flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#endif
-
+  if (use_ashmem) {
+    // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are
+    // prefixed "dalvik-".
+    std::string debug_friendly_name("dalvik-");
+    debug_friendly_name += tail_name;
+    int_fd = ashmem_create_region(debug_friendly_name.c_str(), tail_base_size);
+    flags = MAP_PRIVATE | MAP_FIXED;
+    if (int_fd == -1) {
+      *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s",
+                                tail_name, strerror(errno));
+      return nullptr;
+    }
+  }
+  ScopedFd fd(int_fd);
 
   MEMORY_TOOL_MAKE_UNDEFINED(tail_base_begin, tail_base_size);
   // Unmap/map the tail region.
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index efce09a..ed21365 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -57,17 +57,18 @@
   // "reuse" allows re-mapping an address range from an existing mapping.
   //
   // The word "anonymous" in this context means "not backed by a file". The supplied
-  // 'ashmem_name' will be used -- on systems that support it -- to give the mapping
+  // 'name' will be used -- on systems that support it -- to give the mapping
   // a name.
   //
   // On success, returns returns a MemMap instance.  On failure, returns null.
-  static MemMap* MapAnonymous(const char* ashmem_name,
+  static MemMap* MapAnonymous(const char* name,
                               uint8_t* addr,
                               size_t byte_count,
                               int prot,
                               bool low_4gb,
                               bool reuse,
-                              std::string* error_msg);
+                              std::string* error_msg,
+                              bool use_ashmem = true);
 
   // Create placeholder for a region allocated by direct call to mmap.
   // This is useful when we do not have control over the code calling mmap,
@@ -168,7 +169,8 @@
   MemMap* RemapAtEnd(uint8_t* new_end,
                      const char* tail_name,
                      int tail_prot,
-                     std::string* error_msg);
+                     std::string* error_msg,
+                     bool use_ashmem = true);
 
   static bool CheckNoGaps(MemMap* begin_map, MemMap* end_map)
       REQUIRES(!Locks::mem_maps_lock_);
diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc
index edcbcf2..2f903c8 100644
--- a/runtime/mem_map_test.cc
+++ b/runtime/mem_map_test.cc
@@ -271,8 +271,8 @@
         break;
       }
     }
-    ASSERT_GE(reinterpret_cast<uintptr_t>(map->End()), 2u * GB);
     ASSERT_TRUE(map.get() != nullptr) << error_msg;
+    ASSERT_GE(reinterpret_cast<uintptr_t>(map->End()), 2u * GB);
     ASSERT_TRUE(error_msg.empty());
     ASSERT_EQ(BaseBegin(map.get()), reinterpret_cast<void*>(start_addr));
   }
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index ef4fe15..53118e0 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -616,6 +616,7 @@
       << " IsErroneous=" <<
           IsErroneous<static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis)>()
       << " IsString=" << (this == String::GetJavaLangString())
+      << " status= " << GetStatus<kVerifyFlags>()
       << " descriptor=" << PrettyDescriptor(this);
   return GetField32<kVerifyFlags>(AccessFlagsOffset());
 }
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 66060f2..b49fc74 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -538,6 +538,71 @@
   return nullptr;
 }
 
+ArtMethod* Class::FindVirtualMethodForInterfaceSuper(ArtMethod* method, size_t pointer_size) {
+  DCHECK(method->GetDeclaringClass()->IsInterface());
+  DCHECK(IsInterface()) << "Should only be called on a interface class";
+  // Check if we have one defined on this interface first. This includes searching copied ones to
+  // get any conflict methods. Conflict methods are copied into each subtype from the supertype. We
+  // don't do any indirect method checks here.
+  for (ArtMethod& iface_method : GetVirtualMethods(pointer_size)) {
+    if (method->HasSameNameAndSignature(&iface_method)) {
+      return &iface_method;
+    }
+  }
+
+  std::vector<ArtMethod*> abstract_methods;
+  // Search through the IFTable for a working version. We don't need to check for conflicts
+  // because if there was one it would appear in this classes virtual_methods_ above.
+
+  Thread* self = Thread::Current();
+  StackHandleScope<2> hs(self);
+  MutableHandle<mirror::IfTable> iftable(hs.NewHandle(GetIfTable()));
+  MutableHandle<mirror::Class> iface(hs.NewHandle<mirror::Class>(nullptr));
+  size_t iftable_count = GetIfTableCount();
+  // Find the method. We don't need to check for conflicts because they would have been in the
+  // copied virtuals of this interface.  Order matters, traverse in reverse topological order; most
+  // subtypiest interfaces get visited first.
+  for (size_t k = iftable_count; k != 0;) {
+    k--;
+    DCHECK_LT(k, iftable->Count());
+    iface.Assign(iftable->GetInterface(k));
+    // Iterate through every declared method on this interface. Each direct method's name/signature
+    // is unique so the order of the inner loop doesn't matter.
+    for (auto& method_iter : iface->GetDeclaredVirtualMethods(pointer_size)) {
+      ArtMethod* current_method = &method_iter;
+      if (current_method->HasSameNameAndSignature(method)) {
+        if (current_method->IsDefault()) {
+          // Handle JLS soft errors, a default method from another superinterface tree can
+          // "override" an abstract method(s) from another superinterface tree(s).  To do this,
+          // ignore any [default] method which are dominated by the abstract methods we've seen so
+          // far. Check if overridden by any in abstract_methods. We do not need to check for
+          // default_conflicts because we would hit those before we get to this loop.
+          bool overridden = false;
+          for (ArtMethod* possible_override : abstract_methods) {
+            DCHECK(possible_override->HasSameNameAndSignature(current_method));
+            if (iface->IsAssignableFrom(possible_override->GetDeclaringClass())) {
+              overridden = true;
+              break;
+            }
+          }
+          if (!overridden) {
+            return current_method;
+          }
+        } else {
+          // Is not default.
+          // This might override another default method. Just stash it for now.
+          abstract_methods.push_back(current_method);
+        }
+      }
+    }
+  }
+  // If we reach here we either never found any declaration of the method (in which case
+  // 'abstract_methods' is empty or we found no non-overriden default methods in which case
+  // 'abstract_methods' contains a number of abstract implementations of the methods. We choose one
+  // of these arbitrarily.
+  return abstract_methods.empty() ? nullptr : abstract_methods[0];
+}
+
 ArtMethod* Class::FindClassInitializer(size_t pointer_size) {
   for (ArtMethod& method : GetDirectMethods(pointer_size)) {
     if (method.IsClassInitializer()) {
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 489c269..3a06b82 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -848,6 +848,11 @@
   ArtMethod* FindVirtualMethodForSuper(ArtMethod* method, size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Given a method from some implementor of this interface, return the specific implementation
+  // method for this class.
+  ArtMethod* FindVirtualMethodForInterfaceSuper(ArtMethod* method, size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Given a method implemented by this class, but potentially from a
   // super class or interface, return the specific implementation
   // method for this class.
@@ -1058,7 +1063,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   pid_t GetClinitThreadId() SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsIdxLoaded() || IsErroneous());
+    DCHECK(IsIdxLoaded() || IsErroneous()) << PrettyClass(this);
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, clinit_thread_id_));
   }
 
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 1977481..0ddd4a2 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -16,6 +16,8 @@
 
 #include "java_lang_Class.h"
 
+#include <iostream>
+
 #include "art_field-inl.h"
 #include "class_linker.h"
 #include "common_throws.h"
@@ -288,13 +290,6 @@
       GetPublicFieldRecursive(soa.Self(), DecodeClass(soa, javaThis), name_string));
 }
 
-static jobject Class_getDeclaredFieldInternal(JNIEnv* env, jobject javaThis, jstring name) {
-  ScopedFastNativeObjectAccess soa(env);
-  auto* name_string = soa.Decode<mirror::String*>(name);
-  return soa.AddLocalReference<jobject>(
-      GetDeclaredField(soa.Self(), DecodeClass(soa, javaThis), name_string));
-}
-
 static jobject Class_getDeclaredField(JNIEnv* env, jobject javaThis, jstring name) {
   ScopedFastNativeObjectAccess soa(env);
   auto* name_string = soa.Decode<mirror::String*>(name);
@@ -306,6 +301,15 @@
   mirror::Field* result = GetDeclaredField(soa.Self(), klass, name_string);
   if (result == nullptr) {
     std::string name_str = name_string->ToModifiedUtf8();
+    if (name_str == "value" && klass->IsStringClass()) {
+      // We log the error for this specific case, as the user might just swallow the exception.
+      // This helps diagnose crashes when applications rely on the String#value field being
+      // there.
+      // Also print on the error stream to test it through run-test.
+      std::string message("The String#value field is not present on Android versions >= 6.0");
+      LOG(ERROR) << message;
+      std::cerr << message << std::endl;
+    }
     // We may have a pending exception if we failed to resolve.
     if (!soa.Self()->IsExceptionPending()) {
       ThrowNoSuchFieldException(DecodeClass(soa, javaThis), name_str.c_str());
@@ -723,7 +727,6 @@
   NATIVE_METHOD(Class, getDeclaredConstructorsInternal, "!(Z)[Ljava/lang/reflect/Constructor;"),
   NATIVE_METHOD(Class, getDeclaredField, "!(Ljava/lang/String;)Ljava/lang/reflect/Field;"),
   NATIVE_METHOD(Class, getPublicFieldRecursive, "!(Ljava/lang/String;)Ljava/lang/reflect/Field;"),
-  NATIVE_METHOD(Class, getDeclaredFieldInternal, "!(Ljava/lang/String;)Ljava/lang/reflect/Field;"),
   NATIVE_METHOD(Class, getDeclaredFields, "!()[Ljava/lang/reflect/Field;"),
   NATIVE_METHOD(Class, getDeclaredFieldsUnchecked, "!(Z)[Ljava/lang/reflect/Field;"),
   NATIVE_METHOD(Class, getDeclaredMethodInternal,
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 83e594b..d2f563b 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -627,7 +627,7 @@
 
   if (dl_iterate_phdr(dl_iterate_context::callback, &context) == 0) {
     PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
-    LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but can not find its mmaps.";
+    LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but cannot find its mmaps.";
   }
 #endif
 }
diff --git a/runtime/prebuilt_tools_test.cc b/runtime/prebuilt_tools_test.cc
index a7f7bcd..eb226d4 100644
--- a/runtime/prebuilt_tools_test.cc
+++ b/runtime/prebuilt_tools_test.cc
@@ -34,7 +34,7 @@
     struct stat exec_st;
     std::string exec_path = tools_dir + tool;
     if (stat(exec_path.c_str(), &exec_st) != 0) {
-      ADD_FAILURE() << "Can not find " << tool << " in " << tools_dir;
+      ADD_FAILURE() << "Cannot find " << tool << " in " << tools_dir;
     }
   }
 }
@@ -42,7 +42,7 @@
 TEST_F(PrebuiltToolsTest, CheckHostTools) {
   std::string tools_dir = GetAndroidHostToolsDir();
   if (tools_dir.empty()) {
-    ADD_FAILURE() << "Can not find Android tools directory for host";
+    ADD_FAILURE() << "Cannot find Android tools directory for host";
   } else {
     CheckToolsExist(tools_dir);
   }
@@ -54,7 +54,7 @@
   for (InstructionSet isa : isas) {
     std::string tools_dir = GetAndroidTargetToolsDir(isa);
     if (tools_dir.empty()) {
-      ADD_FAILURE() << "Can not find Android tools directory for " << isa;
+      ADD_FAILURE() << "Cannot find Android tools directory for " << isa;
     } else {
       CheckToolsExist(tools_dir);
     }
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 9cb37ee..786cf06 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -221,18 +221,22 @@
   CodeInfo code_info = handler_method_header_->GetOptimizedCodeInfo();
   StackMapEncoding encoding = code_info.ExtractEncoding();
 
+  // Find stack map of the catch block.
+  StackMap catch_stack_map = code_info.GetCatchStackMapForDexPc(GetHandlerDexPc(), encoding);
+  DCHECK(catch_stack_map.IsValid());
+  DexRegisterMap catch_vreg_map =
+      code_info.GetDexRegisterMapOf(catch_stack_map, encoding, number_of_vregs);
+  if (!catch_vreg_map.IsValid()) {
+    return;
+  }
+
   // Find stack map of the throwing instruction.
   StackMap throw_stack_map =
       code_info.GetStackMapForNativePcOffset(stack_visitor->GetNativePcOffset(), encoding);
   DCHECK(throw_stack_map.IsValid());
   DexRegisterMap throw_vreg_map =
       code_info.GetDexRegisterMapOf(throw_stack_map, encoding, number_of_vregs);
-
-  // Find stack map of the catch block.
-  StackMap catch_stack_map = code_info.GetCatchStackMapForDexPc(GetHandlerDexPc(), encoding);
-  DCHECK(catch_stack_map.IsValid());
-  DexRegisterMap catch_vreg_map =
-      code_info.GetDexRegisterMapOf(catch_stack_map, encoding, number_of_vregs);
+  DCHECK(throw_vreg_map.IsValid());
 
   // Copy values between them.
   for (uint16_t vreg = 0; vreg < number_of_vregs; ++vreg) {
@@ -387,6 +391,10 @@
                                              number_of_vregs)
         : code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_vregs);
 
+    if (!vreg_map.IsValid()) {
+      return;
+    }
+
     for (uint16_t vreg = 0; vreg < number_of_vregs; ++vreg) {
       if (updated_vregs != nullptr && updated_vregs[vreg]) {
         // Keep the value set by debugger.
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 6b8f17d..1101acd 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -211,6 +211,7 @@
       safe_mode_(false) {
   CheckAsmSupportOffsetsAndSizes();
   std::fill(callee_save_methods_, callee_save_methods_ + arraysize(callee_save_methods_), 0u);
+  interpreter::CheckInterpreterAsmConstants();
 }
 
 Runtime::~Runtime() {
@@ -1272,11 +1273,14 @@
     }
   }
   {
+    constexpr const char* kOpenJdkLibrary = kIsDebugBuild
+                                                ? "libopenjdkd.so"
+                                                : "libopenjdk.so";
     std::string error_msg;
-    if (!java_vm_->LoadNativeLibrary(env, "libopenjdk.so", nullptr,
+    if (!java_vm_->LoadNativeLibrary(env, kOpenJdkLibrary, nullptr,
                                      /* is_shared_namespace */ false,
                                      nullptr, nullptr, &error_msg)) {
-      LOG(FATAL) << "LoadNativeLibrary failed for \"libopenjdk.so\": " << error_msg;
+      LOG(FATAL) << "LoadNativeLibrary failed for \"" << kOpenJdkLibrary << "\": " << error_msg;
     }
   }
 
@@ -1877,6 +1881,9 @@
     jit_->CreateInstrumentationCache(jit_options_->GetCompileThreshold(),
                                      jit_options_->GetWarmupThreshold());
     jit_->CreateThreadPool();
+
+    // Notify native debugger about the classes already loaded before the creation of the jit.
+    jit_->DumpTypeInfoForLoadedTypes(GetClassLinker());
   } else {
     LOG(WARNING) << "Failed to create JIT " << error_msg;
   }
diff --git a/runtime/safe_map.h b/runtime/safe_map.h
index 4e62dda..a8b48ee 100644
--- a/runtime/safe_map.h
+++ b/runtime/safe_map.h
@@ -146,7 +146,7 @@
 
 template<class Key, class T, AllocatorTag kTag, class Compare = std::less<Key>>
 class AllocationTrackingSafeMap : public SafeMap<
-    Key, T, Compare, TrackingAllocator<std::pair<Key, T>, kTag>> {
+    Key, T, Compare, TrackingAllocator<std::pair<const Key, T>, kTag>> {
 };
 
 }  // namespace art
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 9098d38..5faff93 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -322,6 +322,9 @@
                                            number_of_dex_registers)
       : code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
 
+  if (!dex_register_map.IsValid()) {
+    return false;
+  }
   DexRegisterLocation::Kind location_kind =
       dex_register_map.GetLocationKind(vreg, number_of_dex_registers, code_info, encoding);
   switch (location_kind) {
diff --git a/runtime/stack.h b/runtime/stack.h
index a0c44cb..4fa1a4f 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -184,11 +184,12 @@
   }
 
   uint32_t GetDexPC() const {
-    return dex_pc_;
+    return (dex_pc_ptr_ == nullptr) ? dex_pc_ : dex_pc_ptr_ - code_item_->insns_;
   }
 
   void SetDexPC(uint32_t dex_pc) {
     dex_pc_ = dex_pc;
+    dex_pc_ptr_ = nullptr;
   }
 
   ShadowFrame* GetLink() const {
@@ -206,6 +207,20 @@
     return *reinterpret_cast<const int32_t*>(vreg);
   }
 
+  uint32_t* GetVRegAddr(size_t i) {
+    return &vregs_[i];
+  }
+
+  uint32_t* GetShadowRefAddr(size_t i) {
+    DCHECK(HasReferenceArray());
+    DCHECK_LT(i, NumberOfVRegs());
+    return &vregs_[i + NumberOfVRegs()];
+  }
+
+  void SetCodeItem(const DexFile::CodeItem* code_item) {
+    code_item_ = code_item;
+  }
+
   float GetVRegFloat(size_t i) const {
     DCHECK_LT(i, NumberOfVRegs());
     // NOTE: Strict-aliasing?
@@ -346,6 +361,10 @@
     return lock_count_data_;
   }
 
+  static size_t LockCountDataOffset() {
+    return OFFSETOF_MEMBER(ShadowFrame, lock_count_data_);
+  }
+
   static size_t LinkOffset() {
     return OFFSETOF_MEMBER(ShadowFrame, link_);
   }
@@ -366,6 +385,18 @@
     return OFFSETOF_MEMBER(ShadowFrame, vregs_);
   }
 
+  static size_t ResultRegisterOffset() {
+    return OFFSETOF_MEMBER(ShadowFrame, result_register_);
+  }
+
+  static size_t DexPCPtrOffset() {
+    return OFFSETOF_MEMBER(ShadowFrame, dex_pc_ptr_);
+  }
+
+  static size_t CodeItemOffset() {
+    return OFFSETOF_MEMBER(ShadowFrame, code_item_);
+  }
+
   // Create ShadowFrame for interpreter using provided memory.
   static ShadowFrame* CreateShadowFrameImpl(uint32_t num_vregs,
                                             ShadowFrame* link,
@@ -375,10 +406,19 @@
     return new (memory) ShadowFrame(num_vregs, link, method, dex_pc, true);
   }
 
+  uint16_t* GetDexPCPtr() {
+    return dex_pc_ptr_;
+  }
+
+  JValue* GetResultRegister() {
+    return result_register_;
+  }
+
  private:
   ShadowFrame(uint32_t num_vregs, ShadowFrame* link, ArtMethod* method,
               uint32_t dex_pc, bool has_reference_array)
-      : number_of_vregs_(num_vregs), link_(link), method_(method), dex_pc_(dex_pc) {
+      : link_(link), method_(method), result_register_(nullptr), dex_pc_ptr_(nullptr),
+        code_item_(nullptr), number_of_vregs_(num_vregs), dex_pc_(dex_pc) {
     // TODO(iam): Remove this parameter, it's an an artifact of portable removal
     DCHECK(has_reference_array);
     if (has_reference_array) {
@@ -399,12 +439,15 @@
         const_cast<const ShadowFrame*>(this)->References());
   }
 
-  const uint32_t number_of_vregs_;
   // Link to previous shadow frame or null.
   ShadowFrame* link_;
   ArtMethod* method_;
-  uint32_t dex_pc_;
+  JValue* result_register_;
+  uint16_t* dex_pc_ptr_;
+  const DexFile::CodeItem* code_item_;
   LockCountData lock_count_data_;  // This may contain GC roots when lock counting is active.
+  const uint32_t number_of_vregs_;
+  uint32_t dex_pc_;
 
   // This is a two-part array:
   //  - [0..number_of_vregs) holds the raw virtual registers, and each element here is always 4
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index a15a081..84185ce 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -473,6 +473,9 @@
 class DexRegisterMap {
  public:
   explicit DexRegisterMap(MemoryRegion region) : region_(region) {}
+  DexRegisterMap() {}
+
+  bool IsValid() const { return region_.pointer() != nullptr; }
 
   // Get the surface kind of Dex register `dex_register_number`.
   DexRegisterLocation::Kind GetLocationKind(uint16_t dex_register_number,
@@ -1136,11 +1139,14 @@
   DexRegisterMap GetDexRegisterMapOf(StackMap stack_map,
                                      const StackMapEncoding& encoding,
                                      uint32_t number_of_dex_registers) const {
-    DCHECK(stack_map.HasDexRegisterMap(encoding));
-    uint32_t offset = GetDexRegisterMapsOffset(encoding)
-                      + stack_map.GetDexRegisterMapOffset(encoding);
-    size_t size = ComputeDexRegisterMapSizeOf(offset, number_of_dex_registers);
-    return DexRegisterMap(region_.Subregion(offset, size));
+    if (!stack_map.HasDexRegisterMap(encoding)) {
+      return DexRegisterMap();
+    } else {
+      uint32_t offset = GetDexRegisterMapsOffset(encoding)
+                        + stack_map.GetDexRegisterMapOffset(encoding);
+      size_t size = ComputeDexRegisterMapSizeOf(offset, number_of_dex_registers);
+      return DexRegisterMap(region_.Subregion(offset, size));
+    }
   }
 
   // Return the `DexRegisterMap` pointed by `inline_info` at depth `depth`.
@@ -1148,11 +1154,14 @@
                                           InlineInfo inline_info,
                                           const StackMapEncoding& encoding,
                                           uint32_t number_of_dex_registers) const {
-    DCHECK(inline_info.HasDexRegisterMapAtDepth(depth));
-    uint32_t offset = GetDexRegisterMapsOffset(encoding)
-                      + inline_info.GetDexRegisterMapOffsetAtDepth(depth);
-    size_t size = ComputeDexRegisterMapSizeOf(offset, number_of_dex_registers);
-    return DexRegisterMap(region_.Subregion(offset, size));
+    if (!inline_info.HasDexRegisterMapAtDepth(depth)) {
+      return DexRegisterMap();
+    } else {
+      uint32_t offset = GetDexRegisterMapsOffset(encoding)
+                        + inline_info.GetDexRegisterMapOffsetAtDepth(depth);
+      size_t size = ComputeDexRegisterMapSizeOf(offset, number_of_dex_registers);
+      return DexRegisterMap(region_.Subregion(offset, size));
+    }
   }
 
   InlineInfo GetInlineInfoOf(StackMap stack_map, const StackMapEncoding& encoding) const {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 13e3774..21241d2 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -76,6 +76,7 @@
 #include "verify_object-inl.h"
 #include "vmap_table.h"
 #include "well_known_classes.h"
+#include "interpreter/interpreter.h"
 
 #if ART_USE_FUTEXES
 #include "linux/futex.h"
@@ -686,6 +687,7 @@
   RemoveSuspendTrigger();
   InitCardTable();
   InitTid();
+  interpreter::InitInterpreterTls(this);
 
 #ifdef __ANDROID__
   __get_tls()[TLS_SLOT_ART_THREAD_SELF] = this;
diff --git a/runtime/thread.h b/runtime/thread.h
index 6cb895c..b25bcb2 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -601,6 +601,24 @@
   }
 
   template<size_t pointer_size>
+  static ThreadOffset<pointer_size> MterpCurrentIBaseOffset() {
+    return ThreadOffsetFromTlsPtr<pointer_size>(
+        OFFSETOF_MEMBER(tls_ptr_sized_values, mterp_current_ibase));
+  }
+
+  template<size_t pointer_size>
+  static ThreadOffset<pointer_size> MterpDefaultIBaseOffset() {
+    return ThreadOffsetFromTlsPtr<pointer_size>(
+        OFFSETOF_MEMBER(tls_ptr_sized_values, mterp_default_ibase));
+  }
+
+  template<size_t pointer_size>
+  static ThreadOffset<pointer_size> MterpAltIBaseOffset() {
+    return ThreadOffsetFromTlsPtr<pointer_size>(
+        OFFSETOF_MEMBER(tls_ptr_sized_values, mterp_alt_ibase));
+  }
+
+  template<size_t pointer_size>
   static ThreadOffset<pointer_size> ExceptionOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, exception));
   }
@@ -1001,6 +1019,30 @@
   void ProtectStack();
   bool UnprotectStack();
 
+  void SetMterpDefaultIBase(void* ibase) {
+    tlsPtr_.mterp_default_ibase = ibase;
+  }
+
+  void SetMterpCurrentIBase(void* ibase) {
+    tlsPtr_.mterp_current_ibase = ibase;
+  }
+
+  void SetMterpAltIBase(void* ibase) {
+    tlsPtr_.mterp_alt_ibase = ibase;
+  }
+
+  const void* GetMterpDefaultIBase() const {
+    return tlsPtr_.mterp_default_ibase;
+  }
+
+  const void* GetMterpCurrentIBase() const {
+    return tlsPtr_.mterp_current_ibase;
+  }
+
+  const void* GetMterpAltIBase() const {
+    return tlsPtr_.mterp_alt_ibase;
+  }
+
   void NoteSignalBeingHandled() {
     if (tls32_.handling_signal_) {
       LOG(FATAL) << "Detected signal while processing a signal";
@@ -1246,6 +1288,7 @@
       frame_id_to_shadow_frame(nullptr), name(nullptr), pthread_self(0),
       last_no_thread_suspension_cause(nullptr), thread_local_start(nullptr),
       thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_objects(0),
+      mterp_current_ibase(nullptr), mterp_default_ibase(nullptr), mterp_alt_ibase(nullptr),
       thread_local_alloc_stack_top(nullptr), thread_local_alloc_stack_end(nullptr),
       nested_signal_state(nullptr), flip_function(nullptr), method_verifier(nullptr),
       thread_local_mark_stack(nullptr) {
@@ -1364,6 +1407,11 @@
     uint8_t* thread_local_end;
     size_t thread_local_objects;
 
+    // Mterp jump table bases.
+    void* mterp_current_ibase;
+    void* mterp_default_ibase;
+    void* mterp_alt_ibase;
+
     // There are RosAlloc::kNumThreadLocalSizeBrackets thread-local size brackets per thread.
     void* rosalloc_runs[kNumRosAllocThreadLocalSizeBrackets];
 
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 77f780f..fc1a445 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -97,8 +97,8 @@
   ATRACE_END();
   // TODO: there's an unaddressed race here where a thread may attach during shutdown, see
   //       Thread::Init.
-  ATRACE_BEGIN("SuspendAllDaemonThreads");
-  SuspendAllDaemonThreads();
+  ATRACE_BEGIN("SuspendAllDaemonThreadsForShutdown");
+  SuspendAllDaemonThreadsForShutdown();
   ATRACE_END();
   ATRACE_END();
 }
@@ -1142,9 +1142,10 @@
   }
 }
 
-void ThreadList::SuspendAllDaemonThreads() {
+void ThreadList::SuspendAllDaemonThreadsForShutdown() {
   Thread* self = Thread::Current();
   MutexLock mu(self, *Locks::thread_list_lock_);
+  size_t daemons_left = 0;
   {  // Tell all the daemons it's time to suspend.
     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     for (const auto& thread : list_) {
@@ -1153,13 +1154,26 @@
       CHECK(thread->IsDaemon()) << *thread;
       if (thread != self) {
         thread->ModifySuspendCount(self, +1, nullptr, false);
+        ++daemons_left;
       }
+      // We are shutting down the runtime, set the JNI functions of all the JNIEnvs to be
+      // the sleep forever one.
+      thread->GetJniEnv()->SetFunctionsToRuntimeShutdownFunctions();
     }
   }
+  // If we have any daemons left, wait 200ms to ensure they are not stuck in a place where they
+  // are about to access runtime state and are not in a runnable state. Examples: Monitor code
+  // or waking up from a condition variable. TODO: Try and see if there is a better way to wait
+  // for daemon threads to be in a blocked state.
+  if (daemons_left > 0) {
+    static constexpr size_t kDaemonSleepTime = 200 * 1000;
+    usleep(kDaemonSleepTime);
+  }
   // Give the threads a chance to suspend, complaining if they're slow.
   bool have_complained = false;
-  for (int i = 0; i < 10; ++i) {
-    usleep(200 * 1000);
+  static constexpr size_t kTimeoutMicroseconds = 2000 * 1000;
+  static constexpr size_t kSleepMicroseconds = 1000;
+  for (size_t i = 0; i < kTimeoutMicroseconds / kSleepMicroseconds; ++i) {
     bool all_suspended = true;
     for (const auto& thread : list_) {
       if (thread != self && thread->GetState() == kRunnable) {
@@ -1173,8 +1187,9 @@
     if (all_suspended) {
       return;
     }
+    usleep(kSleepMicroseconds);
   }
-  LOG(ERROR) << "suspend all daemons failed";
+  LOG(WARNING) << "timed out suspending all daemon threads";
 }
 void ThreadList::Register(Thread* self) {
   DCHECK_EQ(self, Thread::Current());
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 07ea10d..2e73f6a 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -164,7 +164,7 @@
   void DumpUnattachedThreads(std::ostream& os)
       REQUIRES(!Locks::thread_list_lock_);
 
-  void SuspendAllDaemonThreads()
+  void SuspendAllDaemonThreadsForShutdown()
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
   void WaitForOtherNonDaemonThreadsToExit()
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
diff --git a/runtime/type_lookup_table_test.cc b/runtime/type_lookup_table_test.cc
index 7f500cc..ea4d8b5 100644
--- a/runtime/type_lookup_table_test.cc
+++ b/runtime/type_lookup_table_test.cc
@@ -25,10 +25,10 @@
 
 namespace art {
 
-class TypeLookupTableTest : public CommonRuntimeTest {
- public:
-  size_t kDexNoIndex = DexFile::kDexNoIndex;  // Make copy to prevent linking errors.
-};
+static const size_t kDexNoIndex = DexFile::kDexNoIndex;  // Make copy to prevent linking errors.
+
+using DescriptorClassDefIdxPair = std::pair<const char*, uint32_t>;
+class TypeLookupTableTest : public CommonRuntimeTestWithParam<DescriptorClassDefIdxPair> {};
 
 TEST_F(TypeLookupTableTest, CreateLookupTable) {
   ScopedObjectAccess soa(Thread::Current());
@@ -39,48 +39,28 @@
   ASSERT_EQ(32U, table->RawDataLength());
 }
 
-TEST_F(TypeLookupTableTest, FindNonExistingClassWithoutCollisions) {
+TEST_P(TypeLookupTableTest, Find) {
   ScopedObjectAccess soa(Thread::Current());
   std::unique_ptr<const DexFile> dex_file(OpenTestDexFile("Lookup"));
   std::unique_ptr<TypeLookupTable> table(TypeLookupTable::Create(*dex_file));
   ASSERT_NE(nullptr, table.get());
-  const char* descriptor = "LBA;";
+  auto pair = GetParam();
+  const char* descriptor = pair.first;
   size_t hash = ComputeModifiedUtf8Hash(descriptor);
   uint32_t class_def_idx = table->Lookup(descriptor, hash);
-  ASSERT_EQ(kDexNoIndex, class_def_idx);
+  ASSERT_EQ(pair.second, class_def_idx);
 }
 
-TEST_F(TypeLookupTableTest, FindNonExistingClassWithCollisions) {
-  ScopedObjectAccess soa(Thread::Current());
-  std::unique_ptr<const DexFile> dex_file(OpenTestDexFile("Lookup"));
-  std::unique_ptr<TypeLookupTable> table(TypeLookupTable::Create(*dex_file));
-  ASSERT_NE(nullptr, table.get());
-  const char* descriptor = "LDA;";
-  size_t hash = ComputeModifiedUtf8Hash(descriptor);
-  uint32_t class_def_idx = table->Lookup(descriptor, hash);
-  ASSERT_EQ(kDexNoIndex, class_def_idx);
-}
-
-TEST_F(TypeLookupTableTest, FindClassNoCollisions) {
-  ScopedObjectAccess soa(Thread::Current());
-  std::unique_ptr<const DexFile> dex_file(OpenTestDexFile("Lookup"));
-  std::unique_ptr<TypeLookupTable> table(TypeLookupTable::Create(*dex_file));
-  ASSERT_NE(nullptr, table.get());
-  const char* descriptor = "LC;";
-  size_t hash = ComputeModifiedUtf8Hash(descriptor);
-  uint32_t class_def_idx = table->Lookup(descriptor, hash);
-  ASSERT_EQ(2U, class_def_idx);
-}
-
-TEST_F(TypeLookupTableTest, FindClassWithCollisions) {
-  ScopedObjectAccess soa(Thread::Current());
-  std::unique_ptr<const DexFile> dex_file(OpenTestDexFile("Lookup"));
-  std::unique_ptr<TypeLookupTable> table(TypeLookupTable::Create(*dex_file));
-  ASSERT_NE(nullptr, table.get());
-  const char* descriptor = "LAB;";
-  size_t hash = ComputeModifiedUtf8Hash(descriptor);
-  uint32_t class_def_idx = table->Lookup(descriptor, hash);
-  ASSERT_EQ(1U, class_def_idx);
-}
-
+INSTANTIATE_TEST_CASE_P(FindNonExistingClassWithoutCollisions,
+                        TypeLookupTableTest,
+                        testing::Values(DescriptorClassDefIdxPair("LAB;", 1U)));
+INSTANTIATE_TEST_CASE_P(FindNonExistingClassWithCollisions,
+                        TypeLookupTableTest,
+                        testing::Values(DescriptorClassDefIdxPair("LDA;", kDexNoIndex)));
+INSTANTIATE_TEST_CASE_P(FindClassNoCollisions,
+                        TypeLookupTableTest,
+                        testing::Values(DescriptorClassDefIdxPair("LC;", 2U)));
+INSTANTIATE_TEST_CASE_P(FindClassWithCollisions,
+                        TypeLookupTableTest,
+                        testing::Values(DescriptorClassDefIdxPair("LAB;", 1U)));
 }  // namespace art
diff --git a/runtime/utf_test.cc b/runtime/utf_test.cc
index 5239e40..c67879b 100644
--- a/runtime/utf_test.cc
+++ b/runtime/utf_test.cc
@@ -353,7 +353,7 @@
     if (codePoint <= 0xffff) {
       if (codePoint >= 0xd800 && codePoint <= 0xdfff) {
         // According to the Unicode standard, no character will ever
-        // be assigned to these code points, and they can not be encoded
+        // be assigned to these code points, and they cannot be encoded
         // into either utf-16 or utf-8.
         continue;
       }
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 1e1c7e7..8e9f12b 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1871,4 +1871,10 @@
   return rc == 0 ? stat_buf.st_size : -1;
 }
 
+void SleepForever() {
+  while (true) {
+    usleep(1000000);
+  }
+}
+
 }  // namespace art
diff --git a/runtime/utils.h b/runtime/utils.h
index 5ceb3b5..153749e 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -99,6 +99,18 @@
   return (ch < ' ' || ch > '~');
 }
 
+template <typename T> T SafeAbs(T value) {
+  // std::abs has undefined behavior on min limits.
+  DCHECK_NE(value, std::numeric_limits<T>::min());
+  return std::abs(value);
+}
+
+template <typename T> T AbsOrMin(T value) {
+  return (value == std::numeric_limits<T>::min())
+      ? value
+      : std::abs(value);
+}
+
 std::string PrintableChar(uint16_t ch);
 
 // Returns an ASCII string corresponding to the given UTF-8 string.
@@ -373,6 +385,9 @@
 // Return the file size in bytes or -1 if the file does not exists.
 int64_t GetFileSizeBytes(const std::string& filename);
 
+// Sleep forever and never come back.
+NO_RETURN void SleepForever();
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_UTILS_H_
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index d75587b..1c95648 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -3639,8 +3639,9 @@
   return *common_super;
 }
 
+// TODO Maybe I should just add a METHOD_SUPER to MethodType?
 ArtMethod* MethodVerifier::ResolveMethodAndCheckAccess(
-    uint32_t dex_method_idx, MethodType method_type) {
+    uint32_t dex_method_idx, MethodType method_type, bool is_super) {
   const DexFile::MethodId& method_id = dex_file_->GetMethodId(dex_method_idx);
   const RegType& klass_type = ResolveClassAndCheckAccess(method_id.class_idx_);
   if (klass_type.IsConflict()) {
@@ -3667,6 +3668,8 @@
       res_method = klass->FindDirectMethod(name, signature, pointer_size);
     } else if (method_type == METHOD_INTERFACE) {
       res_method = klass->FindInterfaceMethod(name, signature, pointer_size);
+    } else if (is_super && klass->IsInterface()) {
+      res_method = klass->FindInterfaceMethod(name, signature, pointer_size);
     } else {
       res_method = klass->FindVirtualMethod(name, signature, pointer_size);
     }
@@ -3939,7 +3942,7 @@
   // we're making.
   const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
 
-  ArtMethod* res_method = ResolveMethodAndCheckAccess(method_idx, method_type);
+  ArtMethod* res_method = ResolveMethodAndCheckAccess(method_idx, method_type, is_super);
   if (res_method == nullptr) {  // error or class is unresolved
     // Check what we can statically.
     if (!have_pending_hard_failure_) {
@@ -3949,24 +3952,32 @@
   }
 
   // If we're using invoke-super(method), make sure that the executing method's class' superclass
-  // has a vtable entry for the target method.
+  // has a vtable entry for the target method. Or the target is on a interface.
   if (is_super) {
     DCHECK(method_type == METHOD_VIRTUAL);
-    const RegType& super = GetDeclaringClass().GetSuperClass(&reg_types_);
-    if (super.IsUnresolvedTypes()) {
-      Fail(VERIFY_ERROR_NO_METHOD) << "unknown super class in invoke-super from "
-                                   << PrettyMethod(dex_method_idx_, *dex_file_)
-                                   << " to super " << PrettyMethod(res_method);
-      return nullptr;
-    }
-    mirror::Class* super_klass = super.GetClass();
-    if (res_method->GetMethodIndex() >= super_klass->GetVTableLength()) {
-      Fail(VERIFY_ERROR_NO_METHOD) << "invalid invoke-super from "
-                                   << PrettyMethod(dex_method_idx_, *dex_file_)
-                                   << " to super " << super
-                                   << "." << res_method->GetName()
-                                   << res_method->GetSignature();
-      return nullptr;
+    if (res_method->GetDeclaringClass()->IsInterface()) {
+      // TODO Fill in this part. Verify what we can...
+      if (Runtime::Current()->IsAotCompiler()) {
+        Fail(VERIFY_ERROR_FORCE_INTERPRETER) << "Currently we only allow invoke-super in "
+                                             << "interpreter when using interface methods";
+      }
+    } else {
+      const RegType& super = GetDeclaringClass().GetSuperClass(&reg_types_);
+      if (super.IsUnresolvedTypes()) {
+        Fail(VERIFY_ERROR_NO_METHOD) << "unknown super class in invoke-super from "
+                                    << PrettyMethod(dex_method_idx_, *dex_file_)
+                                    << " to super " << PrettyMethod(res_method);
+        return nullptr;
+      }
+      mirror::Class* super_klass = super.GetClass();
+      if (res_method->GetMethodIndex() >= super_klass->GetVTableLength()) {
+        Fail(VERIFY_ERROR_NO_METHOD) << "invalid invoke-super from "
+                                    << PrettyMethod(dex_method_idx_, *dex_file_)
+                                    << " to super " << super
+                                    << "." << res_method->GetName()
+                                    << res_method->GetSignature();
+        return nullptr;
+      }
     }
   }
 
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 79db576..ec0a8f9 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -654,7 +654,7 @@
    * the referrer can access the resolved method.
    * Does not throw exceptions.
    */
-  ArtMethod* ResolveMethodAndCheckAccess(uint32_t method_idx, MethodType method_type)
+  ArtMethod* ResolveMethodAndCheckAccess(uint32_t method_idx, MethodType method_type, bool is_super)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   /*
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index 16cab03..0894f5d 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -667,13 +667,13 @@
     // float/long/double MERGE float/long/double_constant => float/long/double
     return SelectNonConstant(*this, incoming_type);
   } else if (IsReferenceTypes() && incoming_type.IsReferenceTypes()) {
-    if (IsZero() || incoming_type.IsZero()) {
-      return SelectNonConstant(*this, incoming_type);  // 0 MERGE ref => ref
-    } else if (IsUninitializedTypes() || incoming_type.IsUninitializedTypes()) {
+    if (IsUninitializedTypes() || incoming_type.IsUninitializedTypes()) {
       // Something that is uninitialized hasn't had its constructor called. Unitialized types are
       // special. They may only ever be merged with themselves (must be taken care of by the
       // caller of Merge(), see the DCHECK on entry). So mark any other merge as conflicting here.
       return conflict;
+    } else if (IsZero() || incoming_type.IsZero()) {
+      return SelectNonConstant(*this, incoming_type);  // 0 MERGE ref => ref
     } else if (IsJavaLangObject() || incoming_type.IsJavaLangObject()) {
       return reg_types->JavaLangObject(false);  // Object MERGE ref => Object
     } else if (IsUnresolvedTypes() || incoming_type.IsUnresolvedTypes()) {
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index 80b751c..7c7981e 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -246,28 +246,18 @@
   }
 
   /*
-   * A basic Join operation on classes. For a pair of types S and T the Join,
-   *written S v T = J, is
-   * S <: J, T <: J and for-all U such that S <: U, T <: U then J <: U. That is
-   *J is the parent of
-   * S and T such that there isn't a parent of both S and T that isn't also the
-   *parent of J (ie J
+   * A basic Join operation on classes. For a pair of types S and T the Join, written S v T = J, is
+   * S <: J, T <: J and for-all U such that S <: U, T <: U then J <: U. That is J is the parent of
+   * S and T such that there isn't a parent of both S and T that isn't also the parent of J (ie J
    * is the deepest (lowest upper bound) parent of S and T).
    *
-   * This operation applies for regular classes and arrays, however, for
-   *interface types there
-   * needn't be a partial ordering on the types. We could solve the problem of a
-   *lack of a partial
-   * order by introducing sets of types, however, the only operation permissible
-   *on an interface is
-   * invoke-interface. In the tradition of Java verifiers [1] we defer the
-   *verification of interface
-   * types until an invoke-interface call on the interface typed reference at
-   *runtime and allow
-   * the perversion of Object being assignable to an interface type (note,
-   *however, that we don't
-   * allow assignment of Object or Interface to any concrete class and are
-   *therefore type safe).
+   * This operation applies for regular classes and arrays, however, for interface types there
+   * needn't be a partial ordering on the types. We could solve the problem of a lack of a partial
+   * order by introducing sets of types, however, the only operation permissible on an interface is
+   * invoke-interface. In the tradition of Java verifiers [1] we defer the verification of interface
+   * types until an invoke-interface call on the interface typed reference at runtime and allow
+   * the perversion of Object being assignable to an interface type (note, however, that we don't
+   * allow assignment of Object or Interface to any concrete class and are therefore type safe).
    *
    * [1] Java bytecode verification: algorithms and formalizations, Xavier Leroy
    */
diff --git a/test/091-override-package-private-method/build b/test/091-override-package-private-method/build
new file mode 100755
index 0000000..5a340dc
--- /dev/null
+++ b/test/091-override-package-private-method/build
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Stop if something fails.
+set -e
+
+mkdir classes
+${JAVAC} -d classes `find src -name '*.java'`
+
+mkdir classes-ex
+mv classes/OverridePackagePrivateMethodSuper.class classes-ex
+
+if [ ${USE_JACK} = "true" ]; then
+  # Create .jack files from classes generated with javac.
+  ${JILL} classes --output classes.jack
+  ${JILL} classes-ex --output classes-ex.jack
+
+  # Create DEX files from .jack files.
+  ${JACK} --import classes.jack --output-dex .
+  zip $TEST_NAME.jar classes.dex
+  ${JACK} --import classes-ex.jack --output-dex .
+  zip ${TEST_NAME}-ex.jar classes.dex
+else
+  if [ ${NEED_DEX} = "true" ]; then
+    ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes
+    zip $TEST_NAME.jar classes.dex
+    ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes.dex --dump-width=1000 classes-ex
+    zip ${TEST_NAME}-ex.jar classes.dex
+  fi
+fi
diff --git a/test/091-override-package-private-method/expected.txt b/test/091-override-package-private-method/expected.txt
new file mode 100644
index 0000000..286cfcd
--- /dev/null
+++ b/test/091-override-package-private-method/expected.txt
@@ -0,0 +1 @@
+OverridePackagePrivateMethodTest
diff --git a/test/091-override-package-private-method/info.txt b/test/091-override-package-private-method/info.txt
new file mode 100644
index 0000000..8e183bf
--- /dev/null
+++ b/test/091-override-package-private-method/info.txt
@@ -0,0 +1,3 @@
+Test features with a secondary dex file.
+
+- Regression test to ensure AOT compiler correctly manages overriden package-private method.
diff --git a/test/127-secondarydex/run b/test/091-override-package-private-method/run
similarity index 100%
copy from test/127-secondarydex/run
copy to test/091-override-package-private-method/run
diff --git a/test/127-secondarydex/src/Test.java b/test/091-override-package-private-method/src/Main.java
similarity index 61%
copy from test/127-secondarydex/src/Test.java
copy to test/091-override-package-private-method/src/Main.java
index 8547e79..6543c98 100644
--- a/test/127-secondarydex/src/Test.java
+++ b/test/091-override-package-private-method/src/Main.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,16 +14,15 @@
  * limitations under the License.
  */
 
-public class Test extends Super {
-    public void test(Test t) {
-        t.print();
-    }
-
-    private void print() {
-        System.out.println("Test");
-    }
-
-    public String toString() {
-        return new String("Test");
+/**
+ *  Override package-private method test.
+ */
+public class Main {
+    public static void main(String[] args) {
+        try {
+            new OverridePackagePrivateMethodTest().test(new Object());
+        } catch (Exception e) {
+            System.out.println("Got unexpected exception " + e);
+        }
     }
 }
diff --git a/test/127-secondarydex/src/Test.java b/test/091-override-package-private-method/src/OverridePackagePrivateMethodSuper.java
similarity index 66%
copy from test/127-secondarydex/src/Test.java
copy to test/091-override-package-private-method/src/OverridePackagePrivateMethodSuper.java
index 8547e79..4ad051e 100644
--- a/test/127-secondarydex/src/Test.java
+++ b/test/091-override-package-private-method/src/OverridePackagePrivateMethodSuper.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,16 +14,8 @@
  * limitations under the License.
  */
 
-public class Test extends Super {
-    public void test(Test t) {
-        t.print();
-    }
-
-    private void print() {
-        System.out.println("Test");
-    }
-
-    public String toString() {
-        return new String("Test");
+public class OverridePackagePrivateMethodSuper {
+    void print() {
+        System.out.println("OverridePackagePrivateMethodSuper");
     }
 }
diff --git a/test/127-secondarydex/src/Test.java b/test/091-override-package-private-method/src/OverridePackagePrivateMethodTest.java
similarity index 60%
copy from test/127-secondarydex/src/Test.java
copy to test/091-override-package-private-method/src/OverridePackagePrivateMethodTest.java
index 8547e79..2f2b7ca 100644
--- a/test/127-secondarydex/src/Test.java
+++ b/test/091-override-package-private-method/src/OverridePackagePrivateMethodTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,16 +14,15 @@
  * limitations under the License.
  */
 
-public class Test extends Super {
-    public void test(Test t) {
-        t.print();
+public class OverridePackagePrivateMethodTest extends OverridePackagePrivateMethodSuper {
+    public void test(Object obj) {
+        if (obj == null) {
+            throw new NullPointerException("Got null");
+        }
+        print();
     }
 
-    private void print() {
-        System.out.println("Test");
-    }
-
-    public String toString() {
-        return new String("Test");
+    void print() {
+        System.out.println("OverridePackagePrivateMethodTest");
     }
 }
diff --git a/test/127-secondarydex/build b/test/127-checker-secondarydex/build
similarity index 100%
rename from test/127-secondarydex/build
rename to test/127-checker-secondarydex/build
diff --git a/test/127-secondarydex/expected.txt b/test/127-checker-secondarydex/expected.txt
similarity index 100%
rename from test/127-secondarydex/expected.txt
rename to test/127-checker-secondarydex/expected.txt
diff --git a/test/127-secondarydex/info.txt b/test/127-checker-secondarydex/info.txt
similarity index 100%
rename from test/127-secondarydex/info.txt
rename to test/127-checker-secondarydex/info.txt
diff --git a/test/127-secondarydex/run b/test/127-checker-secondarydex/run
similarity index 100%
rename from test/127-secondarydex/run
rename to test/127-checker-secondarydex/run
diff --git a/test/127-secondarydex/src/Main.java b/test/127-checker-secondarydex/src/Main.java
similarity index 100%
rename from test/127-secondarydex/src/Main.java
rename to test/127-checker-secondarydex/src/Main.java
diff --git a/test/127-secondarydex/src/Super.java b/test/127-checker-secondarydex/src/Super.java
similarity index 100%
rename from test/127-secondarydex/src/Super.java
rename to test/127-checker-secondarydex/src/Super.java
diff --git a/test/127-secondarydex/src/Test.java b/test/127-checker-secondarydex/src/Test.java
similarity index 78%
rename from test/127-secondarydex/src/Test.java
rename to test/127-checker-secondarydex/src/Test.java
index 8547e79..266ed19 100644
--- a/test/127-secondarydex/src/Test.java
+++ b/test/127-checker-secondarydex/src/Test.java
@@ -23,6 +23,13 @@
         System.out.println("Test");
     }
 
+    /// CHECK-START: java.lang.Integer Test.toInteger() ssa_builder (after)
+    /// CHECK:         LoadClass needs_access_check:false klass:java.lang.Integer
+
+    public Integer toInteger() {
+        return new Integer(42);
+    }
+
     public String toString() {
         return new String("Test");
     }
diff --git a/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
new file mode 100644
index 0000000..54879fb
--- /dev/null
+++ b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+
+#include "base/casts.h"
+#include "base/macros.h"
+#include "java_vm_ext.h"
+#include "jni_env_ext.h"
+#include "thread-inl.h"
+
+namespace art {
+namespace {
+
+static volatile std::atomic<bool> vm_was_shutdown(false);
+
+extern "C" JNIEXPORT void JNICALL Java_Main_waitAndCallIntoJniEnv(JNIEnv* env, jclass) {
+  // Wait until the runtime is shutdown.
+  while (!vm_was_shutdown.load()) {
+    usleep(1000);
+  }
+  std::cout << "About to call exception check\n";
+  env->ExceptionCheck();
+  LOG(ERROR) << "Should not be reached!";
+}
+
+// NO_RETURN does not work with extern "C" for target builds.
+extern "C" JNIEXPORT void JNICALL Java_Main_destroyJavaVMAndExit(JNIEnv* env, jclass) {
+  // Fake up the managed stack so we can detach.
+  Thread* const self = Thread::Current();
+  self->SetTopOfStack(nullptr);
+  self->SetTopOfShadowStack(nullptr);
+  JavaVM* vm = down_cast<JNIEnvExt*>(env)->vm;
+  vm->DetachCurrentThread();
+  vm->DestroyJavaVM();
+  vm_was_shutdown.store(true);
+  // Give threads some time to get stuck in ExceptionCheck.
+  usleep(1000000);
+  if (env != nullptr) {
+    // Use env != nullptr to trick noreturn.
+    exit(0);
+  }
+}
+
+}  // namespace
+}  // namespace art
diff --git a/test/136-daemon-jni-shutdown/expected.txt b/test/136-daemon-jni-shutdown/expected.txt
new file mode 100644
index 0000000..f0b6353
--- /dev/null
+++ b/test/136-daemon-jni-shutdown/expected.txt
@@ -0,0 +1,5 @@
+JNI_OnLoad called
+About to call exception check
+About to call exception check
+About to call exception check
+About to call exception check
diff --git a/test/136-daemon-jni-shutdown/info.txt b/test/136-daemon-jni-shutdown/info.txt
new file mode 100644
index 0000000..06a12df
--- /dev/null
+++ b/test/136-daemon-jni-shutdown/info.txt
@@ -0,0 +1 @@
+Test that daemon threads that call into a JNI env after the runtime is shutdown do not crash.
\ No newline at end of file
diff --git a/test/136-daemon-jni-shutdown/src/Main.java b/test/136-daemon-jni-shutdown/src/Main.java
new file mode 100644
index 0000000..6eceb75
--- /dev/null
+++ b/test/136-daemon-jni-shutdown/src/Main.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Test that daemon threads that call into a JNI env after the runtime is shutdown do not crash.
+ */
+public class Main {
+
+    public final static int THREAD_COUNT = 4;
+
+    public static void main(String[] args) throws Exception {
+        System.loadLibrary(args[0]);
+
+        for (int i = 0; i < THREAD_COUNT; i++) {
+            Thread t = new Thread(new DaemonRunnable());
+            t.setDaemon(true);
+            t.start();
+        }
+        // Give threads time to start and become stuck in waitAndCallIntoJniEnv.
+        Thread.sleep(1000);
+        destroyJavaVMAndExit();
+    }
+
+    static native void waitAndCallIntoJniEnv();
+    static native void destroyJavaVMAndExit();
+
+    private static class DaemonRunnable implements Runnable {
+        public void run() {
+            for (;;) {
+                waitAndCallIntoJniEnv();
+            }
+        }
+    }
+}
diff --git a/test/137-cfi/cfi.cc b/test/137-cfi/cfi.cc
index 9bfe429..77301d2 100644
--- a/test/137-cfi/cfi.cc
+++ b/test/137-cfi/cfi.cc
@@ -76,7 +76,7 @@
     }
   }
 
-  printf("Can not find %s in backtrace:\n", seq[cur_search_index].c_str());
+  printf("Cannot find %s in backtrace:\n", seq[cur_search_index].c_str());
   for (Backtrace::const_iterator it = bt->begin(); it != bt->end(); ++it) {
     if (BacktraceMap::IsValid(it->map)) {
       printf("  %s\n", it->func_name.c_str());
@@ -112,7 +112,7 @@
 
   std::unique_ptr<Backtrace> bt(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, GetTid()));
   if (!bt->Unwind(0, nullptr)) {
-    printf("Can not unwind in process.\n");
+    printf("Cannot unwind in process.\n");
     return JNI_FALSE;
   } else if (bt->NumFrames() == 0) {
     printf("No frames for unwind in process.\n");
@@ -205,7 +205,7 @@
   std::unique_ptr<Backtrace> bt(Backtrace::Create(pid, BACKTRACE_CURRENT_THREAD));
   bool result = true;
   if (!bt->Unwind(0, nullptr)) {
-    printf("Can not unwind other process.\n");
+    printf("Cannot unwind other process.\n");
     result = false;
   } else if (bt->NumFrames() == 0) {
     printf("No frames for unwind of other process.\n");
diff --git a/test/137-cfi/run b/test/137-cfi/run
index ecbbbc7..9c567b6 100755
--- a/test/137-cfi/run
+++ b/test/137-cfi/run
@@ -14,4 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} "$@"
+exec ${RUN} "$@" -Xcompiler-option --generate-debug-info
diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java
index 0640b36..bcb697a 100644
--- a/test/141-class-unload/src/Main.java
+++ b/test/141-class-unload/src/Main.java
@@ -79,7 +79,7 @@
 
     private static void testUnloadClass(Constructor constructor) throws Exception {
         WeakReference<Class> klass = setUpUnloadClass(constructor);
-        // No strong refernces to class loader, should get unloaded.
+        // No strong references to class loader, should get unloaded.
         Runtime.getRuntime().gc();
         WeakReference<Class> klass2 = setUpUnloadClass(constructor);
         Runtime.getRuntime().gc();
@@ -91,7 +91,7 @@
     private static void testUnloadLoader(Constructor constructor)
         throws Exception {
       WeakReference<ClassLoader> loader = setUpUnloadLoader(constructor, true);
-      // No strong refernces to class loader, should get unloaded.
+      // No strong references to class loader, should get unloaded.
       Runtime.getRuntime().gc();
       // If the weak reference is cleared, then it was unloaded.
       System.out.println(loader.get());
@@ -109,7 +109,7 @@
 
     private static void testLoadAndUnloadLibrary(Constructor constructor) throws Exception {
         WeakReference<ClassLoader> loader = setUpLoadLibrary(constructor);
-        // No strong refernces to class loader, should get unloaded.
+        // No strong references to class loader, should get unloaded.
         Runtime.getRuntime().gc();
         // If the weak reference is cleared, then it was unloaded.
         System.out.println(loader.get());
diff --git a/test/143-string-value/check b/test/143-string-value/check
new file mode 100755
index 0000000..92f6e90
--- /dev/null
+++ b/test/143-string-value/check
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Strip error log messages.
+sed -e '/^art E.*\] /d' "$2" > "$2.tmp"
+
+diff --strip-trailing-cr -q "$1" "$2.tmp" >/dev/null
diff --git a/test/143-string-value/expected.txt b/test/143-string-value/expected.txt
new file mode 100644
index 0000000..06cdb89
--- /dev/null
+++ b/test/143-string-value/expected.txt
@@ -0,0 +1 @@
+The String#value field is not present on Android versions >= 6.0
diff --git a/test/143-string-value/info.txt b/test/143-string-value/info.txt
new file mode 100644
index 0000000..61ec816
--- /dev/null
+++ b/test/143-string-value/info.txt
@@ -0,0 +1,2 @@
+Test to ensure we emit an error message when being asked
+for String#value.
diff --git a/test/127-secondarydex/src/Test.java b/test/143-string-value/src/Main.java
similarity index 66%
copy from test/127-secondarydex/src/Test.java
copy to test/143-string-value/src/Main.java
index 8547e79..e970692 100644
--- a/test/127-secondarydex/src/Test.java
+++ b/test/143-string-value/src/Main.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,16 +14,13 @@
  * limitations under the License.
  */
 
-public class Test extends Super {
-    public void test(Test t) {
-        t.print();
+public class Main {
+  public static void main(String[] args) {
+    try {
+      String.class.getDeclaredField("value");
+      throw new Error("Expected to fail");
+    } catch (ReflectiveOperationException e) {
+      // Ignore...
     }
-
-    private void print() {
-        System.out.println("Test");
-    }
-
-    public String toString() {
-        return new String("Test");
-    }
+  }
 }
diff --git a/test/442-checker-constant-folding/src/Main.java b/test/442-checker-constant-folding/src/Main.java
index 43bc9d0..0e07f47 100644
--- a/test/442-checker-constant-folding/src/Main.java
+++ b/test/442-checker-constant-folding/src/Main.java
@@ -120,9 +120,10 @@
   /// CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
   /// CHECK-DAG:     <<Const5:i\d+>>  IntConstant 5
   /// CHECK-DAG:     <<Const6:i\d+>>  IntConstant 6
+  /// CHECK-DAG:     <<Const11:i\d+>> IntConstant 11
   /// CHECK-DAG:     <<Add1:i\d+>>    Add [<<Const1>>,<<Const2>>]
-  /// CHECK-DAG:     <<Add2:i\d+>>    Add [<<Const5>>,<<Const6>>]
-  /// CHECK-DAG:     <<Add3:i\d+>>    Add [<<Add1>>,<<Add2>>]
+  /// CHECK-DAG:                      Add [<<Const5>>,<<Const6>>]
+  /// CHECK-DAG:     <<Add3:i\d+>>    Add [<<Add1>>,<<Const11>>]
   /// CHECK-DAG:                      Return [<<Add3>>]
 
   /// CHECK-START: int Main.IntAddition2() constant_folding (after)
@@ -522,7 +523,7 @@
   /// CHECK-DAG:     <<Const10L:j\d+>> LongConstant 10
   /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const3>>]
-  /// CHECK-DAG:     <<And:j\d+>>      And [<<Const10L>>,<<TypeConv>>]
+  /// CHECK-DAG:     <<And:j\d+>>      And [<<TypeConv>>,<<Const10L>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
   /// CHECK-START: long Main.AndLongInt() constant_folding (after)
@@ -567,7 +568,7 @@
   /// CHECK-DAG:     <<Const10L:j\d+>> LongConstant 10
   /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const3>>]
-  /// CHECK-DAG:     <<Or:j\d+>>       Or [<<Const10L>>,<<TypeConv>>]
+  /// CHECK-DAG:     <<Or:j\d+>>       Or [<<TypeConv>>,<<Const10L>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
   /// CHECK-START: long Main.OrLongInt() constant_folding (after)
@@ -612,7 +613,7 @@
   /// CHECK-DAG:     <<Const10L:j\d+>> LongConstant 10
   /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const3>>]
-  /// CHECK-DAG:     <<Xor:j\d+>>      Xor [<<Const10L>>,<<TypeConv>>]
+  /// CHECK-DAG:     <<Xor:j\d+>>      Xor [<<TypeConv>>,<<Const10L>>]
   /// CHECK-DAG:                       Return [<<Xor>>]
 
   /// CHECK-START: long Main.XorLongInt() constant_folding (after)
@@ -749,7 +750,7 @@
   /// CHECK-START: long Main.Mul0(long) constant_folding (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
-  /// CHECK-DAG:     <<Mul:j\d+>>      Mul [<<Arg>>,<<Const0>>]
+  /// CHECK-DAG:     <<Mul:j\d+>>      Mul [<<Const0>>,<<Arg>>]
   /// CHECK-DAG:                       Return [<<Mul>>]
 
   /// CHECK-START: long Main.Mul0(long) constant_folding (after)
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index 6151fc1..0fd7801 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -288,7 +288,7 @@
   /// CHECK-START: long Main.Mul1(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Const1:j\d+>>  LongConstant 1
-  /// CHECK-DAG:     <<Mul:j\d+>>     Mul [<<Arg>>,<<Const1>>]
+  /// CHECK-DAG:     <<Mul:j\d+>>     Mul [<<Const1>>,<<Arg>>]
   /// CHECK-DAG:                      Return [<<Mul>>]
 
   /// CHECK-START: long Main.Mul1(long) instruction_simplifier (after)
@@ -323,7 +323,7 @@
   /// CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
   /// CHECK-DAG:     <<Const128:j\d+>>  LongConstant 128
-  /// CHECK-DAG:     <<Mul:j\d+>>       Mul [<<Arg>>,<<Const128>>]
+  /// CHECK-DAG:     <<Mul:j\d+>>       Mul [<<Const128>>,<<Arg>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
   /// CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (after)
@@ -705,7 +705,7 @@
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg>>]
   /// CHECK-DAG:     <<Neg2:i\d+>>     Neg [<<Neg1>>]
-  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Neg1>>,<<Neg2>>]
+  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Neg2>>,<<Neg1>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
   /// CHECK-START: int Main.NegNeg2(int) instruction_simplifier (after)
@@ -841,13 +841,13 @@
   /// CHECK-DAG:     <<ConstF1:i\d+>>  IntConstant -1
   /// CHECK-DAG:     <<Xor1:i\d+>>     Xor [<<Arg>>,<<ConstF1>>]
   /// CHECK-DAG:     <<Xor2:i\d+>>     Xor [<<Xor1>>,<<ConstF1>>]
-  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Xor1>>,<<Xor2>>]
+  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Xor2>>,<<Xor1>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
   /// CHECK-START: int Main.NotNot2(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Not:i\d+>>      Not [<<Arg>>]
-  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Not>>,<<Arg>>]
+  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Arg>>,<<Not>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
   /// CHECK-START: int Main.NotNot2(int) instruction_simplifier (after)
@@ -1005,7 +1005,7 @@
   /// CHECK-START: int Main.EqualFalseLhs(boolean) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
-  /// CHECK-DAG:     <<Cond:z\d+>>     Equal [<<Const0>>,<<Arg>>]
+  /// CHECK-DAG:     <<Cond:z\d+>>     Equal [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       If [<<Cond>>]
 
   /// CHECK-START: int Main.EqualFalseLhs(boolean) instruction_simplifier (after)
@@ -1064,7 +1064,7 @@
   /// CHECK-START: int Main.NotEqualFalseLhs(boolean) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
-  /// CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<Const0>>,<<Arg>>]
+  /// CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       If [<<Cond>>]
 
   /// CHECK-START: int Main.NotEqualFalseLhs(boolean) instruction_simplifier (after)
@@ -1234,7 +1234,7 @@
   /// CHECK-START: long Main.mulPow2Minus1(long) instruction_simplifier (before)
   /// CHECK-DAG:   <<Arg:j\d+>>         ParameterValue
   /// CHECK-DAG:   <<Const31:j\d+>>     LongConstant 31
-  /// CHECK:                            Mul [<<Arg>>,<<Const31>>]
+  /// CHECK:                            Mul [<<Const31>>,<<Arg>>]
 
   /// CHECK-START: long Main.mulPow2Minus1(long) instruction_simplifier (after)
   /// CHECK-DAG:   <<Arg:j\d+>>         ParameterValue
diff --git a/test/482-checker-loop-back-edge-use/src/Main.java b/test/482-checker-loop-back-edge-use/src/Main.java
index 6b4da9d..d0b33b9 100644
--- a/test/482-checker-loop-back-edge-use/src/Main.java
+++ b/test/482-checker-loop-back-edge-use/src/Main.java
@@ -163,8 +163,8 @@
   /// CHECK:         <<Arg:z\d+>>  StaticFieldGet  liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse>>]
   /// CHECK:                       If [<<Arg>>]    liveness:<<IfLiv:\d+>>
   /// CHECK:                       Goto            liveness:<<GotoLiv1:\d+>>
-  /// CHECK:                       Goto            liveness:<<GotoLiv2:\d+>>
   /// CHECK:                       Exit
+  /// CHECK:                       Goto            liveness:<<GotoLiv2:\d+>>
   /// CHECK-EVAL:    <<IfLiv>> + 1 == <<ArgUse>>
   /// CHECK-EVAL:    <<GotoLiv1>> < <<GotoLiv2>>
   /// CHECK-EVAL:    <<GotoLiv1>> + 2 == <<ArgLoopUse>>
diff --git a/test/495-checker-checkcast-tests/src/Main.java b/test/495-checker-checkcast-tests/src/Main.java
index 4b2bf09..6011c7c 100644
--- a/test/495-checker-checkcast-tests/src/Main.java
+++ b/test/495-checker-checkcast-tests/src/Main.java
@@ -113,13 +113,13 @@
   }
 
   /// CHECK-START: java.lang.String Main.knownTestWithLoadedClass() register (after)
-  /// CHECK-NOT: LoadClass
+  /// CHECK-NOT: CheckCast
   public static String knownTestWithLoadedClass() {
     return (String)$inline$getString();
   }
 
   /// CHECK-START: Itf Main.knownTestWithUnloadedClass() register (after)
-  /// CHECK: LoadClass
+  /// CHECK: CheckCast
   public static Itf knownTestWithUnloadedClass() {
     return (Itf)$inline$getString();
   }
diff --git a/test/559-checker-irreducible-loop/expected.txt b/test/559-checker-irreducible-loop/expected.txt
new file mode 100644
index 0000000..b64be7a
--- /dev/null
+++ b/test/559-checker-irreducible-loop/expected.txt
@@ -0,0 +1,7 @@
+84
+30
+168
+126
+class Main
+42
+-42
diff --git a/test/559-checker-irreducible-loop/info.txt b/test/559-checker-irreducible-loop/info.txt
new file mode 100644
index 0000000..e0ace18
--- /dev/null
+++ b/test/559-checker-irreducible-loop/info.txt
@@ -0,0 +1 @@
+Tests for irreducible loop support in compiler.
diff --git a/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..971ad84
--- /dev/null
+++ b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -0,0 +1,549 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+# Back-edges in the ascii-art graphs are represented with dash '-'.
+
+# Test that we support a simple irreducible loop.
+#
+#        entry
+#       /    \
+#      /      \
+# loop_entry   \
+#    /    \-    \
+#  exit    \-    \
+#           other_loop_entry
+#
+## CHECK-START: int IrreducibleLoop.simpleLoop(int) dead_code_elimination (before)
+## CHECK: irreducible:true
+.method public static simpleLoop(I)I
+   .registers 2
+   const/16 v0, 42
+   if-eq v1, v0, :other_loop_entry
+   :loop_entry
+   if-ne v1, v0, :exit
+   add-int v0, v0, v0
+   :other_loop_entry
+   add-int v0, v0, v0
+   goto :loop_entry
+   :exit
+   return v0
+.end method
+
+# Test that lse does not wrongly optimize loads in irreducible loops. At the
+# SSA level, since we create redundant phis for irreducible loop headers, lse
+# does not see the relation between the dex register and the phi.
+#
+#               entry
+#                p1
+#             /     \
+#            /       \
+#           /         \
+#          /           \
+#   loop_pre_entry      \
+# set 42 in p1:myField   \
+#        /                \
+#   loop_entry             \
+#  get p1.myField           \
+#    /         \-            \
+#  exit         \-            \
+#                \-            \
+#                other_loop_entry
+#              set 30 in p1:myField
+#
+## CHECK-START: int IrreducibleLoop.lse(int, Main) dead_code_elimination (after)
+## CHECK: irreducible:true
+#
+## CHECK-START: int IrreducibleLoop.lse(int, Main) load_store_elimination (after)
+## CHECK: InstanceFieldGet
+.method public static lse(ILMain;)I
+   .registers 4
+   const/16 v0, 42
+   const/16 v1, 30
+   if-eq p0, v0, :other_loop_pre_entry
+   goto: loop_pre_entry
+   :loop_pre_entry
+   iput v0, p1, LMain;->myField:I
+   :loop_entry
+   if-ne v1, v0, :exit
+   :other_loop_entry
+   iget v0, p1, LMain;->myField:I
+   if-eq v1, v0, :exit
+   goto :loop_entry
+   :exit
+   return v0
+   :other_loop_pre_entry
+   iput v1, p1, LMain;->myField:I
+   goto :other_loop_entry
+.end method
+
+# Check that dce does not apply for irreducible loops.
+#
+#        entry
+#       /    \
+#      /      \
+# loop_entry   \
+#    /    \-    \
+#  exit    \-    \
+#           other_loop_entry
+#
+## CHECK-START: int IrreducibleLoop.dce(int) dead_code_elimination (before)
+## CHECK: irreducible:true
+
+## CHECK-START: int IrreducibleLoop.dce(int) dead_code_elimination (after)
+## CHECK: irreducible:true
+.method public static dce(I)I
+   .registers 3
+   const/16 v0, 42
+   const/16 v1, 168
+   if-ne v0, v0, :other_loop_pre_entry
+   :loop_entry
+   if-ne v0, v0, :exit
+   add-int v0, v0, v0
+   :other_loop_entry
+   add-int v0, v0, v0
+   if-eq v0, v1, :exit
+   goto :loop_entry
+   :exit
+   return v0
+   :other_loop_pre_entry
+   add-int v0, v0, v0
+   goto :other_loop_entry
+.end method
+
+# Check that a dex register only used in the loop header remains live thanks
+# to the (redundant) Phi created at the loop header for it.
+#
+#           entry
+#            p0
+#          /   \
+#         /     \
+#        /       \
+#   loop_entry    \
+# i0 = phi(p0,i1)  \
+#    /    \-        \
+#  exit    \-        \
+#        other_loop_entry
+#        i1 = phi(p0, i0)
+#
+## CHECK-START: int IrreducibleLoop.liveness(int) liveness (after)
+## CHECK-DAG: <<Arg:i\d+>>      ParameterValue liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopPhiUse:\d+>>)}
+## CHECK-DAG: <<LoopPhi:i\d+>>  Phi [<<Arg>>,<<PhiInLoop:i\d+>>] liveness:<<ArgLoopPhiUse>> ranges:{[<<ArgLoopPhiUse>>,<<PhiInLoopUse:\d+>>)}
+## CHECK-DAG: <<PhiInLoop>>     Phi [<<Arg>>,<<LoopPhi>>] liveness:<<PhiInLoopUse>> ranges:{[<<PhiInLoopUse>>,<<BackEdgeLifetimeEnd:\d+>>)}
+## CHECK:                       Return liveness:<<ReturnLiveness:\d+>>
+## CHECK-EVAL:    <<ReturnLiveness>> == <<BackEdgeLifetimeEnd>> + 2
+.method public static liveness(I)I
+   .registers 2
+   const/16 v0, 42
+   if-eq p0, v0, :other_loop_entry
+   :loop_entry
+   add-int v0, v0, p0
+   if-ne v1, v0, :exit
+   :other_loop_entry
+   add-int v0, v0, v0
+   goto :loop_entry
+   :exit
+   return v0
+.end method
+
+# Check that we don't GVN across irreducible loops:
+# "const-class 1" in loop_entry should not be GVN with
+# "const-class 1" in entry.
+#
+#        entry
+#     const-class 1
+#       /    \
+#      /      \
+# loop_entry   \
+# const-class 1 \
+#    /    \-     \
+#  exit    \-     \
+#           other_loop_entry
+#             const-class 2
+#
+## CHECK-START: java.lang.Class IrreducibleLoop.gvn() GVN (before)
+## CHECK: LoadClass
+## CHECK: LoadClass
+## CHECK: LoadClass
+## CHECK-NOT: LoadClass
+
+## CHECK-START: java.lang.Class IrreducibleLoop.gvn() GVN (after)
+## CHECK: LoadClass
+## CHECK: LoadClass
+## CHECK: LoadClass
+## CHECK-NOT: LoadClass
+
+.method public static gvn()Ljava/lang/Class;
+  .registers 3
+  const/4 v2, 0
+  const-class v0, LMain;
+  if-ne v0, v2, :other_loop_entry
+  :loop_entry
+  const-class v0, LMain;
+  if-ne v0, v2, :exit
+  :other_loop_entry
+  const-class v1, LIrreducibleLoop;
+  goto :loop_entry
+  :exit
+  return-object v0
+.end method
+
+# Check that we don't LICM across irreducible loops:
+# "add" in loop_entry should not be LICMed.
+#
+#        entry
+#        /   \
+#       /     \
+#  loop_entry  \
+#      add      \
+#    /    \-     \
+#  exit    \-     \
+#           other_loop_entry
+#
+## CHECK-START: int IrreducibleLoop.licm1(int) licm (after)
+## CHECK: Add irreducible:true
+.method public static licm1(I)I
+  .registers 3
+  const/4 v0, 0
+  if-ne p0, v0, :other_loop_entry
+  :loop_entry
+  add-int v0, p0, p0
+  if-ne v0, p0, :exit
+  :other_loop_entry
+  sub-int v1, p0, p0
+  goto :loop_entry
+  :exit
+  sub-int v0, v0, p0
+  return v0
+.end method
+
+# Check that we don't LICM across irreducible loops:
+# "const-class" in loop_entry should not be LICMed.
+#
+#        entry
+#        /   \
+#       /     \
+#  loop_entry  \
+#  const-class  \
+#    /    \-     \
+#  exit    \-     \
+#           other_loop_entry
+#
+## CHECK-START: int IrreducibleLoop.licm2(int) licm (after)
+## CHECK: LoadClass irreducible:true
+.method public static licm2(I)I
+  .registers 3
+  const/4 v0, 0
+  if-ne p0, v0, :other_loop_entry
+  :loop_entry
+  const-class v1, LIrreducibleLoop;
+  if-ne v0, p0, :exit
+  :other_loop_entry
+  sub-int v1, p0, p0
+  goto :loop_entry
+  :exit
+  sub-int v0, v0, p0
+  return v0
+.end method
+
+# Check that we don't LICM in a natural loop that contains an irreducible loop:
+# "const-class" should not be LICMed.
+#
+#        entry
+#          |
+#       loop_entry
+#       const-class -------------------
+#        /        \                   -
+#       /          \                  -
+#     exit         loop_body          -
+#                  /       \          -
+#                 /         \         -
+#   irreducible_loop_entry   \        -
+#        -      \             \       -
+#        -       \             \      -
+#        -      irreducible_loop_other_entry
+#        -                  |
+#        -                  |
+#        ------ irreducible_loop_back_edge
+#
+## CHECK-START: int IrreducibleLoop.licm3(int, int, int) licm (after)
+## CHECK: LoadClass loop:<<OuterLoop:B\d+>>  irreducible:false
+## CHECK: Goto outer_loop:<<OuterLoop>>  irreducible:true
+.method public static licm3(III)I
+  .registers 4
+  :loop_entry
+  const-class v0, LIrreducibleLoop;
+  if-ne p1, p2, :exit
+  goto :loop_body
+
+  :loop_body
+  if-eq p0, p1, :irreducible_loop_entry
+  goto :irreducible_loop_other_entry
+
+  :irreducible_loop_entry
+  goto :irreducible_loop_other_entry
+
+  :irreducible_loop_other_entry
+  if-eq p0, p2, :loop_entry
+  goto :irreducible_loop_back_edge
+
+  :irreducible_loop_back_edge
+  goto :irreducible_loop_entry
+  :exit
+  return p0
+.end method
+
+# Check a loop within an irreducible loop
+#
+#                      entry
+#                    /       \
+#                   /         \
+# irreducible_loop_entry       \
+#    / -       \         irreducible_loop_pre_other_entry
+# exit -        \              /
+#      -    irreducible_loop_body
+#      -              |
+#      -              |
+#      -      loop_within_header
+#      -        /               \-
+#      -       /                 \-
+# irreducible_loop_back_edge    loop_within_back_edge
+#
+## CHECK-START: void IrreducibleLoop.analyze1(int) ssa_builder (after)
+## CHECK-DAG: Goto loop:<<OuterLoop:B\d+>> outer_loop:none irreducible:true
+## CHECK-DAG: Goto outer_loop:<<OuterLoop>> irreducible:false
+.method public static analyze1(I)V
+  .registers 1
+  if-eq p0, p0, :irreducible_loop_entry
+  goto :irreducible_loop_pre_other_entry
+
+  :irreducible_loop_entry
+  if-eq p0, p0, :exit
+  goto :irreducible_loop_body
+
+  :irreducible_loop_body
+  :loop_within_header
+  if-eq p0, p0, :irreducible_loop_back_edge
+  goto :loop_within_back_edge
+
+  :loop_within_back_edge
+  goto :loop_within_header
+
+  :irreducible_loop_back_edge
+  goto :irreducible_loop_entry
+
+  :irreducible_loop_pre_other_entry
+  goto :irreducible_loop_body
+
+  :exit
+  return-void
+.end method
+
+# Check than a loop before an irreducible loop is not part of the
+# irreducible loop.
+#
+#                      entry
+#                        |
+#                        |
+#                   loop_header
+#                    /        \-
+#                   /          \-
+# irreducible_loop_pre_entry  loop_body
+#           /             \
+#          /               \
+#  irreducible_loop_entry   \
+#    /        \-       irreducible_loop_other_pre_entry
+#   /          \-           /
+# exit          \-         /
+#          irreducible_loop_body
+#
+## CHECK-START: void IrreducibleLoop.analyze2(int) ssa_builder (after)
+## CHECK-DAG: Goto outer_loop:none irreducible:false
+## CHECK-DAG: Goto outer_loop:none irreducible:true
+.method public static analyze2(I)V
+  .registers 1
+  :loop_header
+  if-eq p0, p0, :irreducible_loop_pre_entry
+  goto :loop_body
+  :loop_body
+  goto :loop_header
+
+  :irreducible_loop_pre_entry
+  if-eq p0, p0, :irreducible_loop_other_pre_entry
+  goto :irreducible_loop_entry
+
+  :irreducible_loop_entry
+  if-eq p0, p0, :exit
+  goto :irreducible_loop_body
+
+  :irreducible_loop_body
+  goto :irreducible_loop_entry
+
+  :irreducible_loop_other_pre_entry
+  goto :irreducible_loop_body
+
+  :exit
+  return-void
+.end method
+
+# Check two irreducible loops, one within another.
+#
+#                      entry
+#                    /       \
+#                   /         \
+#           loop1_header   loop2_header
+#           -   |          /       -
+#           -   |         /        -
+#           -   |        /         -
+#           -   |       /          -
+#           -  loop2_body          -
+#           -    /     \           -
+#           -   /       \          -
+#         loop1_body   loop2_back_edge
+#             |
+#             |
+#           exit
+#
+## CHECK-START: void IrreducibleLoop.analyze3(int) ssa_builder (after)
+## CHECK-DAG: Goto loop:<<OuterLoop:B\d+>> outer_loop:none irreducible:true
+## CHECK-DAG: Goto outer_loop:<<OuterLoop>> irreducible:true
+.method public static analyze3(I)V
+  .registers 1
+  if-eq p0, p0, :loop2_header
+  goto :loop1_header
+
+  :loop1_header
+  goto :loop2_body
+
+  :loop2_header
+  goto :loop2_body
+
+  :loop2_body
+  if-eq p0, p0, :loop2_back_edge
+  goto :loop1_body
+
+  :loop2_back_edge
+  goto :loop2_header
+
+  :loop1_body
+  if-eq p0, p0, :exit
+  goto :loop1_header
+
+  :exit
+  return-void
+.end method
+
+# Check two irreducible loops, one within another. Almost identical
+# to analyze3 except the branches of the first 'if' are swapped, to
+# ensure the order at which we find the back edges does not matter.
+#
+#                      entry
+#                    /       \
+#                   /         \
+#           loop1_header   loop2_header
+#           -   |          /       -
+#           -   |         /        -
+#           -   |        /         -
+#           -   |       /          -
+#           -  loop2_body          -
+#           -    /     \           -
+#           -   /       \          -
+#         loop1_body   loop2_back_edge
+#             |
+#             |
+#           exit
+#
+## CHECK-START: void IrreducibleLoop.analyze4(int) ssa_builder (after)
+## CHECK-DAG: Goto loop:<<OuterLoop:B\d+>> outer_loop:none irreducible:true
+## CHECK-DAG: Goto outer_loop:<<OuterLoop>> irreducible:true
+.method public static analyze4(I)V
+  .registers 1
+  if-eq p0, p0, :loop1_header
+  goto :loop2_header
+
+  :loop1_header
+  goto :loop2_body
+
+  :loop2_header
+  goto :loop2_body
+
+  :loop2_body
+  if-eq p0, p0, :loop2_back_edge
+  goto :loop1_body
+
+  :loop2_back_edge
+  goto :loop2_header
+
+  :loop1_body
+  if-eq p0, p0, :exit
+  goto :loop1_header
+
+  :exit
+  return-void
+.end method
+
+# Check two irreducible loops, one within another. Almost identical
+# to analyze3 and analyze4, except that the inner loop exits from the
+# back edge, and not the body.
+#
+#                      entry
+#                    /       \
+#                   /         \
+#           loop1_header   loop2_header
+#           -   \            /       -
+#           -    \          /        -
+#           -     \        /         -
+#           -      \      /          -
+#           -     loop2_body         -
+#           -        |               -
+#           -        |               -
+#           -   loop2_back_edge ------
+#           -        |
+#           -        |
+#           ----- loop1_body
+#                    |
+#                    |
+#                   exit
+#
+## CHECK-START: void IrreducibleLoop.analyze5(int) ssa_builder (after)
+## CHECK-DAG: Goto loop:<<OuterLoop:B\d+>> outer_loop:none irreducible:true
+## CHECK-DAG: Goto outer_loop:<<OuterLoop>> irreducible:true
+.method public static analyze5(I)V
+  .registers 1
+  if-eq p0, p0, :loop1_header
+  goto :loop2_header
+
+  :loop1_header
+  goto :loop2_body
+
+  :loop2_header
+  goto :loop2_body
+
+  :loop2_body
+  goto :loop2_back_edge
+
+  :loop2_back_edge
+  if-eq p0, p0, :loop2_header
+  goto :loop1_body
+
+  :loop1_body
+  if-eq p0, p0, :exit
+  goto :loop1_header
+
+  :exit
+  return-void
+.end method
diff --git a/test/559-checker-irreducible-loop/src/Main.java b/test/559-checker-irreducible-loop/src/Main.java
new file mode 100644
index 0000000..ab84f81
--- /dev/null
+++ b/test/559-checker-irreducible-loop/src/Main.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    {
+      Method m = c.getMethod("simpleLoop", int.class);
+      Object[] arguments = { 42 };
+      System.out.println(m.invoke(null, arguments));
+    }
+
+    {
+      Method m = c.getMethod("lse", int.class, Main.class);
+      Object[] arguments = { 42, new Main() };
+      System.out.println(m.invoke(null, arguments));
+    }
+
+    {
+      Method m = c.getMethod("dce", int.class);
+      Object[] arguments = { 42 };
+      System.out.println(m.invoke(null, arguments));
+    }
+
+    {
+      Method m = c.getMethod("liveness", int.class);
+      Object[] arguments = { 42 };
+      System.out.println(m.invoke(null, arguments));
+    }
+
+    {
+      Method m = c.getMethod("gvn");
+      Object[] arguments = { };
+      System.out.println(m.invoke(null, arguments));
+    }
+
+    {
+      Method m = c.getMethod("licm1", int.class);
+      Object[] arguments = { 42 };
+      System.out.println(m.invoke(null, arguments));
+    }
+
+    {
+      Method m = c.getMethod("licm2", int.class);
+      Object[] arguments = { 42 };
+      System.out.println(m.invoke(null, arguments));
+    }
+  }
+
+  int myField;
+}
diff --git a/test/561-divrem/expected.txt b/test/561-divrem/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/561-divrem/expected.txt
diff --git a/test/561-divrem/info.txt b/test/561-divrem/info.txt
new file mode 100644
index 0000000..71c9601
--- /dev/null
+++ b/test/561-divrem/info.txt
@@ -0,0 +1,2 @@
+Regression test for div/rem taking Integer.MIN_VALUE and
+Long.MIN_VALUE.
diff --git a/test/561-divrem/src/Main.java b/test/561-divrem/src/Main.java
new file mode 100644
index 0000000..082783d
--- /dev/null
+++ b/test/561-divrem/src/Main.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void assertEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Expected " + expected + ", got " + actual);
+    }
+  }
+
+  public static void assertEquals(long expected, long actual) {
+    if (expected != actual) {
+      throw new Error("Expected " + expected + ", got " + actual);
+    }
+  }
+
+  public static void main(String[] args) {
+    assertEquals(0, $noinline$divInt(1));
+    assertEquals(1, $noinline$remInt(1));
+
+    assertEquals(0, $noinline$divInt(-1));
+    assertEquals(-1, $noinline$remInt(-1));
+
+    assertEquals(0, $noinline$divInt(0));
+    assertEquals(0, $noinline$remInt(0));
+
+    assertEquals(1, $noinline$divInt(Integer.MIN_VALUE));
+    assertEquals(0, $noinline$remInt(Integer.MIN_VALUE));
+
+    assertEquals(0, $noinline$divInt(Integer.MAX_VALUE));
+    assertEquals(Integer.MAX_VALUE, $noinline$remInt(Integer.MAX_VALUE));
+
+    assertEquals(0, $noinline$divInt(Integer.MAX_VALUE - 1));
+    assertEquals(Integer.MAX_VALUE - 1, $noinline$remInt(Integer.MAX_VALUE - 1));
+
+    assertEquals(0, $noinline$divInt(Integer.MIN_VALUE + 1));
+    assertEquals(Integer.MIN_VALUE + 1, $noinline$remInt(Integer.MIN_VALUE + 1));
+
+    assertEquals(0L, $noinline$divLong(1L));
+    assertEquals(1L, $noinline$remLong(1L));
+
+    assertEquals(0L, $noinline$divLong(-1L));
+    assertEquals(-1L, $noinline$remLong(-1L));
+
+    assertEquals(0L, $noinline$divLong(0L));
+    assertEquals(0L, $noinline$remLong(0L));
+
+    assertEquals(1L, $noinline$divLong(Long.MIN_VALUE));
+    assertEquals(0L, $noinline$remLong(Long.MIN_VALUE));
+
+    assertEquals(0L, $noinline$divLong(Long.MAX_VALUE));
+    assertEquals(Long.MAX_VALUE, $noinline$remLong(Long.MAX_VALUE));
+
+    assertEquals(0L, $noinline$divLong(Long.MAX_VALUE - 1));
+    assertEquals(Long.MAX_VALUE - 1, $noinline$remLong(Long.MAX_VALUE - 1));
+
+    assertEquals(0L, $noinline$divLong(Integer.MIN_VALUE + 1));
+    assertEquals(Long.MIN_VALUE + 1, $noinline$remLong(Long.MIN_VALUE + 1));
+  }
+
+  public static int $noinline$divInt(int value) {
+    if (doThrow) {
+      throw new Error("");
+    }
+    return value / Integer.MIN_VALUE;
+  }
+
+  public static int $noinline$remInt(int value) {
+    if (doThrow) {
+      throw new Error("");
+    }
+    return value % Integer.MIN_VALUE;
+  }
+
+  public static long $noinline$divLong(long value) {
+    if (doThrow) {
+      throw new Error("");
+    }
+    return value / Long.MIN_VALUE;
+  }
+
+  public static long $noinline$remLong(long value) {
+    if (doThrow) {
+      throw new Error("");
+    }
+    return value % Long.MIN_VALUE;
+  }
+
+  static boolean doThrow = false;
+}
diff --git a/test/561-shared-slowpaths/expected.txt b/test/561-shared-slowpaths/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/561-shared-slowpaths/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/561-shared-slowpaths/info.txt b/test/561-shared-slowpaths/info.txt
new file mode 100644
index 0000000..c51e70b
--- /dev/null
+++ b/test/561-shared-slowpaths/info.txt
@@ -0,0 +1 @@
+Test on correctness while possibly sharing slow paths.
diff --git a/test/561-shared-slowpaths/src/Main.java b/test/561-shared-slowpaths/src/Main.java
new file mode 100644
index 0000000..718b875
--- /dev/null
+++ b/test/561-shared-slowpaths/src/Main.java
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Test on correctness in situations where slow paths may be shared
+// (actual sharing may vary between different code generators).
+//
+//
+public class Main {
+
+  // A method with two loops that can be optimized with dynamic BCE,
+  // resulting in a two times a deopt on null, a deopt on lower OOB,
+  // and a deopt on upper OOB.
+  private static void init(int[] x, int [] y, int l1, int h1, int l2, int h2) {
+    for (int i = l1; i < h1; i++) {
+      x[i] = i;
+    }
+    for (int i = l2; i < h2; i++) {
+      y[i] = i;
+    }
+  }
+
+  // Test that each of the six possible exceptions situations for init()
+  // are correctly handled by the deopt instructions.
+  public static void main(String[] args) {
+    int[] x = new int[100];
+    int[] y = new int[100];
+    int z;
+
+    // All is well.
+    z = 0;
+    reset(x, y);
+    try {
+      init(x, y, 0, 100, 0, 100);
+    } catch (Exception e) {
+      z = 1;
+    }
+    expectEquals(z, 0);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(x[i], i);
+      expectEquals(y[i], i);
+    }
+
+    // Null deopt on x.
+    z = 0;
+    reset(x, y);
+    try {
+      init(null, y, 0, 100, 0, 100);
+    } catch (NullPointerException e) {
+      z = 1;
+    }
+    expectEquals(z, 1);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(x[i], 0);
+      expectEquals(y[i], 0);
+    }
+
+    // Lower out-of-bounds on x.
+    z = 0;
+    reset(x, y);
+    try {
+      init(x, y, -1, 100, 0, 100);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      z = 1;
+    }
+    expectEquals(z, 1);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(x[i], 0);
+      expectEquals(y[i], 0);
+    }
+
+    // Upper out-of-bounds on x.
+    z = 0;
+    reset(x, y);
+    try {
+      init(x, y, 0, 101, 0, 100);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      z = 1;
+    }
+    expectEquals(z, 1);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(x[i], i);
+      expectEquals(y[i], 0);
+    }
+
+    // Null deopt on y.
+    z = 0;
+    reset(x, y);
+    try {
+      init(x, null, 0, 100, 0, 100);
+    } catch (NullPointerException e) {
+      z = 1;
+    }
+    expectEquals(z, 1);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(x[i], i);
+      expectEquals(y[i], 0);
+    }
+
+    // Lower out-of-bounds on y.
+    z = 0;
+    reset(x, y);
+    try {
+      init(x, y, 0, 100, -1, 100);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      z = 1;
+    }
+    expectEquals(z, 1);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(x[i], i);
+      expectEquals(y[i], 0);
+    }
+
+    // Upper out-of-bounds on y.
+    z = 0;
+    reset(x, y);
+    try {
+      init(x, y, 0, 100, 0, 101);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      z = 1;
+    }
+    expectEquals(z, 1);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(x[i], i);
+      expectEquals(y[i], i);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void reset(int[] x, int[] y) {
+    for (int i = 0; i < x.length; i++) x[i] = 0;
+    for (int i = 0; i < y.length; i++) y[i] = 0;
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/562-bce-preheader/expected.txt b/test/562-bce-preheader/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/562-bce-preheader/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/562-bce-preheader/info.txt b/test/562-bce-preheader/info.txt
new file mode 100644
index 0000000..ae006ac
--- /dev/null
+++ b/test/562-bce-preheader/info.txt
@@ -0,0 +1 @@
+Regression test for correct placement of hoisting/deopting code.
diff --git a/test/562-bce-preheader/src/Main.java b/test/562-bce-preheader/src/Main.java
new file mode 100644
index 0000000..8de0533
--- /dev/null
+++ b/test/562-bce-preheader/src/Main.java
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /**
+   * Method with an outer countable loop and an inner do-while loop.
+   * Since all work is done in the header of the inner loop, any invariant hoisting
+   * and deopting should be done in its proper loop preheader, not the true-block
+   * of the newly generated taken-test after dynamic BCE.
+   */
+  public static int doit(int[][] x, int j) {
+    float f = 0;
+    int acc = 0;
+    for (int i = 0; i < 2; i++) {
+      // The full body of a do-while loop is the loop header.
+      do {
+        // Some "noise" to avoid hoisting the array reference
+        // before the dynamic BCE phase runs.
+        f++;
+        // The invariant array reference with corresponding bounds check
+        // is a candidate for hoisting when dynamic BCE runs. If it is
+        // not moved to the proper loop preheader, the wrong values
+        // cause the test to fail.
+        acc += x[i][i];
+      } while (++j < i);
+    }
+    return acc;
+  }
+
+  /**
+   * Single countable loop with a clear header and a loop body. In this case,
+   * after dynamic bce, some invariant hoisting and deopting must go to the
+   * proper loop preheader and some must go to the true-block.
+   */
+  public static int foo(int[] x, int[] y, int n) {
+    float f = 0;
+    int acc = 0;
+    int i = 0;
+    while (true) {
+      // This part is the loop header.
+      // Some "noise" to avoid hoisting the array reference
+      // before the dynamic BCE phase runs.
+      f++;
+      // The invariant array reference with corresponding bounds check
+      // is a candidate for hoisting when dynamic BCE runs. If it is
+      // not moved to the proper loop preheader, the wrong values
+      // cause the test to fail.
+      acc += y[0];
+      if (++i > n)
+        break;
+      // From here on, this part is the loop body.
+      // The unit-stride array reference is a candidate for dynamic BCE.
+      // The deopting appears in the true-block.
+      acc += x[i];
+    }
+    return acc;
+  }
+
+  public static void main(String args[]) {
+    int[][] x = new int[2][2];
+    int y;
+
+    x[0][0] = 1;
+    x[1][1] = 2;
+
+    expectEquals(8, doit(x, -6));
+    expectEquals(7, doit(x, -5));
+    expectEquals(6, doit(x, -4));
+    expectEquals(5, doit(x, -3));
+    expectEquals(4, doit(x, -2));
+    expectEquals(3, doit(x, -1));
+    expectEquals(3, doit(x,  0));
+    expectEquals(3, doit(x,  1));
+    expectEquals(3, doit(x, 22));
+
+    int a[] = { 1, 2, 3, 5 };
+    int b[] = { 7 };
+
+    expectEquals(7,  foo(a, b, -1));
+    expectEquals(7,  foo(a, b,  0));
+    expectEquals(16, foo(a, b,  1));
+    expectEquals(26, foo(a, b,  2));
+    expectEquals(38, foo(a, b,  3));
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/562-no-intermediate/expected.txt b/test/562-no-intermediate/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/562-no-intermediate/expected.txt
diff --git a/test/562-no-intermediate/info.txt b/test/562-no-intermediate/info.txt
new file mode 100644
index 0000000..4f21aeb
--- /dev/null
+++ b/test/562-no-intermediate/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing, checking that there is no
+intermediate address between a Java call.
diff --git a/test/127-secondarydex/src/Test.java b/test/562-no-intermediate/src/Main.java
similarity index 62%
copy from test/127-secondarydex/src/Test.java
copy to test/562-no-intermediate/src/Main.java
index 8547e79..3b74d6f 100644
--- a/test/127-secondarydex/src/Test.java
+++ b/test/562-no-intermediate/src/Main.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2016 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,16 +14,14 @@
  * limitations under the License.
  */
 
-public class Test extends Super {
-    public void test(Test t) {
-        t.print();
-    }
+public class Main {
 
-    private void print() {
-        System.out.println("Test");
-    }
+  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
+  /// CHECK-NOT: IntermediateAddress
+  public static void main(String[] args) {
+    array[index] += Math.cos(42);
+  }
 
-    public String toString() {
-        return new String("Test");
-    }
+  static int index = 0;
+  static double[] array = new double[2];
 }
diff --git a/test/563-checker-fakestring/expected.txt b/test/563-checker-fakestring/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/563-checker-fakestring/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/563-checker-fakestring/info.txt b/test/563-checker-fakestring/info.txt
new file mode 100644
index 0000000..ef09d8c
--- /dev/null
+++ b/test/563-checker-fakestring/info.txt
@@ -0,0 +1,2 @@
+Regression test for FakeString simplification which incorrectly assumed that
+it cannot be used before a call to StringFactory.
\ No newline at end of file
diff --git a/test/563-checker-fakestring/smali/TestCase.smali b/test/563-checker-fakestring/smali/TestCase.smali
new file mode 100644
index 0000000..4bd804d
--- /dev/null
+++ b/test/563-checker-fakestring/smali/TestCase.smali
@@ -0,0 +1,126 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+# Test that all vregs holding the new-instance are updated after the
+# StringFactory call.
+
+## CHECK-START: java.lang.String TestCase.vregAliasing(byte[]) register (after)
+## CHECK-DAG:                Return [<<String:l\d+>>]
+## CHECK-DAG:     <<String>> InvokeStaticOrDirect  method_name:java.lang.String.<init>
+
+.method public static vregAliasing([B)Ljava/lang/String;
+   .registers 5
+
+   # Create new instance of String and store it to v0, v1, v2.
+   new-instance v0, Ljava/lang/String;
+   move-object v1, v0
+   move-object v2, v0
+
+   # Call String.<init> on v1.
+   const-string v3, "UTF8"
+   invoke-direct {v1, p0, v3}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+
+   # Return the object from v2.
+   return-object v2
+
+.end method
+
+# Test usage of String new-instance before it is initialized.
+
+## CHECK-START: void TestCase.compareNewInstance() register (after)
+## CHECK-DAG:     <<Null:l\d+>>   NullConstant
+## CHECK-DAG:     <<String:l\d+>> NewInstance
+## CHECK-DAG:     <<Cond:z\d+>>   NotEqual [<<String>>,<<Null>>]
+## CHECK-DAG:                     If [<<Cond>>]
+
+.method public static compareNewInstance()V
+   .registers 3
+
+   new-instance v0, Ljava/lang/String;
+   if-nez v0, :return
+
+   # Will throw NullPointerException if this branch is taken.
+   const v1, 0x0
+   const-string v2, "UTF8"
+   invoke-direct {v0, v1, v2}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+   return-void
+
+   :return
+   return-void
+
+.end method
+
+# Test deoptimization between String's allocation and initialization. When not
+# compiling --debuggable, the NewInstance will be optimized out.
+
+## CHECK-START: int TestCase.deoptimizeNewInstance(int[], byte[]) register (after)
+## CHECK:         <<Null:l\d+>>   NullConstant
+## CHECK:                         Deoptimize env:[[<<Null>>,{{.*]]}}
+## CHECK:                         InvokeStaticOrDirect method_name:java.lang.String.<init>
+
+## CHECK-START-DEBUGGABLE: int TestCase.deoptimizeNewInstance(int[], byte[]) register (after)
+## CHECK:         <<String:l\d+>> NewInstance
+## CHECK:                         Deoptimize env:[[<<String>>,{{.*]]}}
+## CHECK:                         InvokeStaticOrDirect method_name:java.lang.String.<init>
+
+.method public static deoptimizeNewInstance([I[B)I
+   .registers 6
+
+   const v2, 0x0
+   const v1, 0x1
+
+   new-instance v0, Ljava/lang/String;
+
+   # Deoptimize here if the array is too short.
+   aget v1, p0, v1
+   add-int/2addr v2, v1
+
+   # Check that we're being executed by the interpreter.
+   invoke-static {}, LMain;->assertIsInterpreted()V
+
+   # String allocation should succeed.
+   const-string v3, "UTF8"
+   invoke-direct {v0, p1, v3}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+
+   # This ArrayGet will throw ArrayIndexOutOfBoundsException.
+   const v1, 0x4
+   aget v1, p0, v1
+   add-int/2addr v2, v1
+
+   return v2
+
+.end method
+
+# Test that a redundant NewInstance is removed if not used and not compiling
+# --debuggable.
+
+## CHECK-START: java.lang.String TestCase.removeNewInstance(byte[]) register (after)
+## CHECK-NOT:     NewInstance
+## CHECK-NOT:     LoadClass
+
+## CHECK-START-DEBUGGABLE: java.lang.String TestCase.removeNewInstance(byte[]) register (after)
+## CHECK:         NewInstance
+
+.method public static removeNewInstance([B)Ljava/lang/String;
+   .registers 5
+
+   new-instance v0, Ljava/lang/String;
+   const-string v1, "UTF8"
+   invoke-direct {v0, p0, v1}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+   return-object v0
+
+.end method
diff --git a/test/563-checker-fakestring/src/Main.java b/test/563-checker-fakestring/src/Main.java
new file mode 100644
index 0000000..04df0f6
--- /dev/null
+++ b/test/563-checker-fakestring/src/Main.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+public class Main {
+  // Workaround for b/18051191.
+  class Inner {}
+
+  public static native void assertIsInterpreted();
+
+  private static void assertEqual(String expected, String actual) {
+    if (!expected.equals(actual)) {
+      throw new Error("Assertion failed: " + expected + " != " + actual);
+    }
+  }
+
+  public static void main(String[] args) throws Throwable {
+    System.loadLibrary(args[0]);
+    Class<?> c = Class.forName("TestCase");
+    String testString = "Hello world";
+    byte[] testData = testString.getBytes("UTF8");
+
+    {
+      Method m = c.getMethod("vregAliasing", byte[].class);
+      String result = (String) m.invoke(null, new Object[] { testData });
+      assertEqual(testString, result);
+    }
+
+    {
+      c.getMethod("compareNewInstance").invoke(null, (Object[]) null);
+    }
+
+    {
+      Method m = c.getMethod("deoptimizeNewInstance", int[].class, byte[].class);
+      try {
+        m.invoke(null, new Object[] { new int[] { 1, 2, 3 }, testData });
+      } catch (InvocationTargetException ex) {
+        if (ex.getCause() instanceof ArrayIndexOutOfBoundsException) {
+          // Expected.
+        } else {
+          throw ex.getCause();
+        }
+      }
+    }
+
+    {
+      Method m = c.getMethod("removeNewInstance", byte[].class);
+      String result = (String) m.invoke(null, new Object[] { testData });
+      assertEqual(testString, result);
+    }
+  }
+}
diff --git a/test/701-easy-div-rem/genMain.py b/test/701-easy-div-rem/genMain.py
index 75eee17..b6c769f 100644
--- a/test/701-easy-div-rem/genMain.py
+++ b/test/701-easy-div-rem/genMain.py
@@ -13,25 +13,27 @@
 # limitations under the License.
 
 upper_bound_int_pow2 = 31
+upper_bound_int_pow2_neg = 32
 upper_bound_long_pow2 = 63
+upper_bound_long_pow2_neg = 64
 upper_bound_constant = 100
 all_tests = [
     ({'@INT@': 'int', '@SUFFIX@':''},
      [('CheckDiv', 'idiv_by_pow2_', [2**i for i in range(upper_bound_int_pow2)]),
-      ('CheckDiv', 'idiv_by_pow2_neg_', [-2**i for i in range(upper_bound_int_pow2)]),
+      ('CheckDiv', 'idiv_by_pow2_neg_', [-2**i for i in range(upper_bound_int_pow2_neg)]),
       ('CheckDiv', 'idiv_by_constant_', [i for i in range(1, upper_bound_constant)]),
       ('CheckDiv', 'idiv_by_constant_neg_', [-i for i in range(1, upper_bound_constant)]),
       ('CheckRem', 'irem_by_pow2_', [2**i for i in range(upper_bound_int_pow2)]),
-      ('CheckRem', 'irem_by_pow2_neg_', [-2**i for i in range(upper_bound_int_pow2)]),
+      ('CheckRem', 'irem_by_pow2_neg_', [-2**i for i in range(upper_bound_int_pow2_neg)]),
       ('CheckRem', 'irem_by_constant_', [i for i in range(1, upper_bound_constant)]),
       ('CheckRem', 'irem_by_constant_neg_', [-i for i in range(1, upper_bound_constant)])]),
     ({'@INT@': 'long', '@SUFFIX@': 'l'},
      [('CheckDiv', 'ldiv_by_pow2_', [2**i for i in range(upper_bound_long_pow2)]),
-      ('CheckDiv', 'ldiv_by_pow2_neg_', [-2**i for i in range(upper_bound_long_pow2)]),
+      ('CheckDiv', 'ldiv_by_pow2_neg_', [-2**i for i in range(upper_bound_long_pow2_neg)]),
       ('CheckDiv', 'ldiv_by_constant_', [i for i in range(1, upper_bound_constant)]),
       ('CheckDiv', 'ldiv_by_constant_neg_', [-i for i in range(1, upper_bound_constant)]),
       ('CheckRem', 'lrem_by_pow2_', [2**i for i in range(upper_bound_long_pow2)]),
-      ('CheckRem', 'lrem_by_pow2_neg_', [-2**i for i in range(upper_bound_long_pow2)]),
+      ('CheckRem', 'lrem_by_pow2_neg_', [-2**i for i in range(upper_bound_long_pow2_neg)]),
       ('CheckRem', 'lrem_by_constant_', [i for i in range(1, upper_bound_constant)]),
       ('CheckRem', 'lrem_by_constant_neg_', [-i for i in range(1, upper_bound_constant)])])
 ]
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 27f5b5d..2e66af5 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -49,4 +49,5 @@
 b/25494456
 b/21869691
 b/26143249
+b/26579108
 Done!
diff --git a/test/800-smali/smali/b_26579108.smali b/test/800-smali/smali/b_26579108.smali
new file mode 100644
index 0000000..dde3825
--- /dev/null
+++ b/test/800-smali/smali/b_26579108.smali
@@ -0,0 +1,34 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LB26579108;
+.super Ljava/lang/Object;
+
+# Ensure that merging uninitialized type and null does not pass verification.
+
+.field public static field:I
+
+.method public static run()Ljava/lang/String;
+    .registers 2
+    new-instance v0, Ljava/lang/String;
+
+    sget v1, LB26579108;->field:I
+    if-eqz v1, :cond_5
+
+    const/4 v0, 0x0
+    :cond_5
+
+    invoke-direct {v0}, Ljava/lang/String;-><init>()V
+    return-object v0
+  .end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index cc3b0b4..38aa58d 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -143,6 +143,8 @@
                 new IncompatibleClassChangeError(), null));
         testCases.add(new TestCase("b/26143249", "B26143249", "run", null,
                 new AbstractMethodError(), null));
+        testCases.add(new TestCase("b/26579108", "B26579108", "run", null, new VerifyError(),
+                null));
     }
 
     public void runTests() {
@@ -188,8 +190,7 @@
                 if (tc.expectedException != null) {
                     errorReturn = new IllegalStateException("Expected an exception in test " +
                                                             tc.testName);
-                }
-                if (tc.expectedReturn == null && retValue != null) {
+                } else if (tc.expectedReturn == null && retValue != null) {
                     errorReturn = new IllegalStateException("Expected a null result in test " +
                                                             tc.testName);
                 } else if (tc.expectedReturn != null &&
diff --git a/test/969-iface-super/build b/test/969-iface-super/build
new file mode 100755
index 0000000..b1ef320
--- /dev/null
+++ b/test/969-iface-super/build
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+# Should we compile with Java source code. By default we will use Smali.
+USES_JAVA_SOURCE="false"
+if [[ $@ == *"--jvm"* ]]; then
+  USES_JAVA_SOURCE="true"
+elif [[ "$USE_JACK" == "true" ]]; then
+  if $JACK -D jack.java.source.version=1.8 2>/dev/null; then
+    USES_JAVA_SOURCE="true"
+  else
+    echo "WARNING: Cannot use jack because it does not support JLS 1.8. Falling back to smali" >&2
+  fi
+fi
+
+# Generate the smali Main.smali file or fail
+${ANDROID_BUILD_TOP}/art/test/utils/python/generate_smali_main.py ./smali
+
+if [[ "$USES_JAVA_SOURCE" == "true" ]]; then
+  # We are compiling java code, create it.
+  mkdir -p src
+  ${ANDROID_BUILD_TOP}/art/tools/extract-embedded-java ./smali ./src
+  # Ignore the smali directory.
+  EXTRA_ARGS="--no-smali"
+fi
+
+./default-build "$@" "$EXTRA_ARGS" --experimental default-methods
diff --git a/test/969-iface-super/expected.txt b/test/969-iface-super/expected.txt
new file mode 100644
index 0000000..f310d10
--- /dev/null
+++ b/test/969-iface-super/expected.txt
@@ -0,0 +1,47 @@
+Testing for type A
+A-virtual           A.SayHi()='Hello '
+A-interface     iface.SayHi()='Hello '
+End testing for type A
+Testing for type B
+B-virtual           B.SayHi()='Hello Hello '
+B-interface     iface.SayHi()='Hello Hello '
+B-interface    iface2.SayHi()='Hello Hello '
+End testing for type B
+Testing for type C
+C-virtual           C.SayHi()='Hello  and welcome '
+C-interface     iface.SayHi()='Hello  and welcome '
+End testing for type C
+Testing for type D
+D-virtual           D.SayHi()='Hello Hello  and welcome '
+D-interface     iface.SayHi()='Hello Hello  and welcome '
+D-interface    iface2.SayHi()='Hello Hello  and welcome '
+End testing for type D
+Testing for type E
+E-virtual           E.SayHi()='Hello  there!'
+E-interface     iface.SayHi()='Hello  there!'
+E-interface    iface3.SayHi()='Hello  there!'
+End testing for type E
+Testing for type F
+F-virtual           E.SayHi()='Hello  there!'
+F-virtual           F.SayHi()='Hello  there!'
+F-interface     iface.SayHi()='Hello  there!'
+F-interface    iface3.SayHi()='Hello  there!'
+F-virtual           F.SaySurprisedHi()='Hello  there!!'
+End testing for type F
+Testing for type G
+G-virtual           E.SayHi()='Hello  there!?'
+G-virtual           F.SayHi()='Hello  there!?'
+G-virtual           G.SayHi()='Hello  there!?'
+G-interface     iface.SayHi()='Hello  there!?'
+G-interface    iface3.SayHi()='Hello  there!?'
+G-virtual           F.SaySurprisedHi()='Hello  there!!'
+G-virtual           G.SaySurprisedHi()='Hello  there!!'
+G-virtual           G.SayVerySurprisedHi()='Hello  there!!!'
+End testing for type G
+Testing for type H
+H-virtual           H.SayConfusedHi()='Hello ?!'
+H-virtual           A.SayHi()='Hello ?'
+H-virtual           H.SayHi()='Hello ?'
+H-interface     iface.SayHi()='Hello ?'
+H-virtual           H.SaySurprisedHi()='Hello !'
+End testing for type H
diff --git a/test/969-iface-super/info.txt b/test/969-iface-super/info.txt
new file mode 100644
index 0000000..c0555d2
--- /dev/null
+++ b/test/969-iface-super/info.txt
@@ -0,0 +1,6 @@
+Smali-based tests for experimental interface static methods.
+
+This tests invoke-super with default methods.
+
+To run with --jvm you must export JAVA_HOME to a java-8 installation and pass
+the --use-java-home to run-test
diff --git a/test/969-iface-super/run b/test/969-iface-super/run
new file mode 100755
index 0000000..8944ea9
--- /dev/null
+++ b/test/969-iface-super/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+${RUN} "$@" --experimental default-methods
diff --git a/test/969-iface-super/smali/A.smali b/test/969-iface-super/smali/A.smali
new file mode 100644
index 0000000..e7760a1
--- /dev/null
+++ b/test/969-iface-super/smali/A.smali
@@ -0,0 +1,28 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public class A implements iface {
+# }
+
+.class public LA;
+.super Ljava/lang/Object;
+.implements Liface;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
diff --git a/test/969-iface-super/smali/B.smali b/test/969-iface-super/smali/B.smali
new file mode 100644
index 0000000..e529d05
--- /dev/null
+++ b/test/969-iface-super/smali/B.smali
@@ -0,0 +1,28 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public class B implements iface2 {
+# }
+
+.class public LB;
+.super Ljava/lang/Object;
+.implements Liface2;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
diff --git a/test/969-iface-super/smali/C.smali b/test/969-iface-super/smali/C.smali
new file mode 100644
index 0000000..6fbb0c4
--- /dev/null
+++ b/test/969-iface-super/smali/C.smali
@@ -0,0 +1,41 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public class C implements iface {
+#   public String SayHi() {
+#       return iface.super.SayHi() + " and welcome ";
+#   }
+# }
+
+.class public LC;
+.super Ljava/lang/Object;
+.implements Liface;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public SayHi()Ljava/lang/String;
+    .locals 2
+    invoke-super {p0}, Liface;->SayHi()Ljava/lang/String;
+    move-result-object v0
+    const-string v1, " and welcome "
+    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v0
+    return-object v0
+.end method
diff --git a/test/969-iface-super/smali/D.smali b/test/969-iface-super/smali/D.smali
new file mode 100644
index 0000000..ecd4629
--- /dev/null
+++ b/test/969-iface-super/smali/D.smali
@@ -0,0 +1,41 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public class D implements iface2 {
+#   public String SayHi() {
+#       return iface2.super.SayHi() + " and welcome ";
+#   }
+# }
+
+.class public LD;
+.super Ljava/lang/Object;
+.implements Liface2;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public SayHi()Ljava/lang/String;
+    .locals 2
+    invoke-super {p0}, Liface2;->SayHi()Ljava/lang/String;
+    move-result-object v0
+    const-string v1, " and welcome "
+    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v0
+    return-object v0
+.end method
diff --git a/test/969-iface-super/smali/E.smali b/test/969-iface-super/smali/E.smali
new file mode 100644
index 0000000..558aaea
--- /dev/null
+++ b/test/969-iface-super/smali/E.smali
@@ -0,0 +1,41 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public class E implements iface3 {
+#   public String SayHi() {
+#       return iface3.super.SayHi() + " there!";
+#   }
+# }
+
+.class public LE;
+.super Ljava/lang/Object;
+.implements Liface3;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public SayHi()Ljava/lang/String;
+    .locals 2
+    invoke-super {p0}, Liface3;->SayHi()Ljava/lang/String;
+    move-result-object v0
+    const-string v1, " there!"
+    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v0
+    return-object v0
+.end method
diff --git a/test/969-iface-super/smali/F.smali b/test/969-iface-super/smali/F.smali
new file mode 100644
index 0000000..c402d5c
--- /dev/null
+++ b/test/969-iface-super/smali/F.smali
@@ -0,0 +1,40 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public class F extends E {
+#   public String SaySurprisedHi() {
+#       return super.SayHi() + "!";
+#   }
+# }
+
+.class public LF;
+.super LE;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, LE;-><init>()V
+    return-void
+.end method
+
+.method public SaySurprisedHi()Ljava/lang/String;
+    .registers 2
+    invoke-super {p0}, LE;->SayHi()Ljava/lang/String;
+    move-result-object v0
+    const-string v1, "!"
+    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v0
+    return-object v0
+.end method
diff --git a/test/969-iface-super/smali/G.smali b/test/969-iface-super/smali/G.smali
new file mode 100644
index 0000000..45705e6
--- /dev/null
+++ b/test/969-iface-super/smali/G.smali
@@ -0,0 +1,53 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public class G extends F {
+#   public String SayHi() {
+#       return super.SayHi() + "?";
+#   }
+#   public String SayVerySurprisedHi() {
+#       return super.SaySurprisedHi() + "!";
+#   }
+# }
+
+.class public LG;
+.super LF;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, LF;-><init>()V
+    return-void
+.end method
+
+.method public SayHi()Ljava/lang/String;
+    .locals 2
+    invoke-super {p0}, LF;->SayHi()Ljava/lang/String;
+    move-result-object v0
+    const-string v1, "?"
+    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v0
+    return-object v0
+.end method
+
+.method public SayVerySurprisedHi()Ljava/lang/String;
+    .locals 2
+    invoke-super {p0}, LF;->SaySurprisedHi()Ljava/lang/String;
+    move-result-object v0
+    const-string v1, "!"
+    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v0
+    return-object v0
+.end method
diff --git a/test/969-iface-super/smali/H.smali b/test/969-iface-super/smali/H.smali
new file mode 100644
index 0000000..12f246b
--- /dev/null
+++ b/test/969-iface-super/smali/H.smali
@@ -0,0 +1,66 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public class H extends A {
+#   public String SayHi() {
+#       return super.SayHi() + "?";
+#   }
+#   public String SaySurprisedHi() {
+#       return super.SayHi() + "!";
+#   }
+#   public String SayConfusedHi() {
+#       return SayHi() + "!";
+#   }
+# }
+
+.class public LH;
+.super LA;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, LA;-><init>()V
+    return-void
+.end method
+
+.method public SayHi()Ljava/lang/String;
+    .locals 2
+    invoke-super {p0}, LA;->SayHi()Ljava/lang/String;
+    move-result-object v0
+    const-string v1, "?"
+    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v0
+    return-object v0
+.end method
+
+.method public SaySurprisedHi()Ljava/lang/String;
+    .locals 2
+    invoke-super {p0}, LA;->SayHi()Ljava/lang/String;
+    move-result-object v0
+    const-string v1, "!"
+    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v0
+    return-object v0
+.end method
+
+.method public SayConfusedHi()Ljava/lang/String;
+    .locals 2
+    invoke-virtual {p0}, LH;->SayHi()Ljava/lang/String;
+    move-result-object v0
+    const-string v1, "!"
+    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v0
+    return-object v0
+.end method
diff --git a/test/969-iface-super/smali/classes.xml b/test/969-iface-super/smali/classes.xml
new file mode 100644
index 0000000..4d205bd
--- /dev/null
+++ b/test/969-iface-super/smali/classes.xml
@@ -0,0 +1,99 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Copyright 2015 The Android Open Source Project
+
+     Licensed under the Apache License, Version 2.0 (the "License");
+     you may not use this file except in compliance with the License.
+     You may obtain a copy of the License at
+
+          http://www.apache.org/licenses/LICENSE-2.0
+
+     Unless required by applicable law or agreed to in writing, software
+     distributed under the License is distributed on an "AS IS" BASIS,
+     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     See the License for the specific language governing permissions and
+     limitations under the License.
+-->
+
+<data>
+  <classes>
+    <class name="A" super="java/lang/Object">
+      <implements>
+        <item>iface</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="B" super="java/lang/Object">
+      <implements>
+        <item>iface2</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="C" super="java/lang/Object">
+      <implements>
+        <item>iface</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="D" super="java/lang/Object">
+      <implements>
+        <item>iface2</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="E" super="java/lang/Object">
+      <implements>
+        <item>iface3</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="F" super="E">
+      <implements> </implements>
+      <methods>
+        <item>SaySurprisedHi</item>
+      </methods>
+    </class>
+
+    <class name="G" super="F">
+      <implements> </implements>
+      <methods>
+        <item>SayVerySurprisedHi</item>
+      </methods>
+    </class>
+
+    <class name="H" super="A">
+      <implements> </implements>
+      <methods>
+        <item>SaySurprisedHi</item>
+        <item>SayConfusedHi</item>
+      </methods>
+    </class>
+  </classes>
+
+  <interfaces>
+    <interface name="iface" super="java/lang/Object">
+      <implements> </implements>
+      <methods>
+        <item>SayHi</item>
+      </methods>
+    </interface>
+
+    <interface name="iface2" super="java/lang/Object">
+      <implements>
+        <item>iface</item>
+      </implements>
+      <methods> </methods>
+    </interface>
+
+    <interface name="iface3" super="java/lang/Object">
+      <implements>
+        <item>iface</item>
+      </implements>
+      <methods> </methods>
+    </interface>
+  </interfaces>
+</data>
diff --git a/test/969-iface-super/smali/iface.smali b/test/969-iface-super/smali/iface.smali
new file mode 100644
index 0000000..08bb93d
--- /dev/null
+++ b/test/969-iface-super/smali/iface.smali
@@ -0,0 +1,30 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public interface iface {
+#   public default String SayHi() {
+#       return "Hello ";
+#   }
+# }
+
+.class public abstract interface Liface;
+.super Ljava/lang/Object;
+
+.method public SayHi()Ljava/lang/String;
+    .locals 1
+    const-string v0, "Hello "
+    return-object v0
+.end method
diff --git a/test/969-iface-super/smali/iface2.smali b/test/969-iface-super/smali/iface2.smali
new file mode 100644
index 0000000..ce6f864
--- /dev/null
+++ b/test/969-iface-super/smali/iface2.smali
@@ -0,0 +1,36 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public interface iface2 extends iface {
+#   public default String SayHi() {
+#       return iface.super.SayHi() + iface.super.SayHi();
+#   }
+# }
+
+.class public abstract interface Liface2;
+.super Ljava/lang/Object;
+.implements Liface;
+
+.method public SayHi()Ljava/lang/String;
+    .locals 2
+    invoke-super {p0}, Liface;->SayHi()Ljava/lang/String;
+    move-result-object v0
+    invoke-super {p0}, Liface;->SayHi()Ljava/lang/String;
+    move-result-object v1
+    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v0
+    return-object v0
+.end method
diff --git a/test/969-iface-super/smali/iface3.smali b/test/969-iface-super/smali/iface3.smali
new file mode 100644
index 0000000..bf20036
--- /dev/null
+++ b/test/969-iface-super/smali/iface3.smali
@@ -0,0 +1,22 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public interface iface3 extends iface {
+# }
+
+.class public abstract interface Liface3;
+.super Ljava/lang/Object;
+.implements Liface;
diff --git a/test/970-iface-super-resolution-generated/build b/test/970-iface-super-resolution-generated/build
new file mode 100755
index 0000000..2d9830b
--- /dev/null
+++ b/test/970-iface-super-resolution-generated/build
@@ -0,0 +1,55 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+# We will be making more files than the ulimit is set to allow. Remove it temporarily.
+OLD_ULIMIT=`ulimit -S`
+ulimit -S unlimited
+
+restore_ulimit() {
+  ulimit -S "$OLD_ULIMIT"
+}
+trap 'restore_ulimit' ERR
+
+# Should we compile with Java source code. By default we will use Smali.
+USES_JAVA_SOURCE="false"
+if [[ $@ == *"--jvm"* ]]; then
+  USES_JAVA_SOURCE="true"
+elif [[ "$USE_JACK" == "true" ]]; then
+  if $JACK -D jack.java.source.version=1.8 2>/dev/null; then
+    USES_JAVA_SOURCE="true"
+  else
+    echo "WARNING: Cannot use jack because it does not support JLS 1.8. Falling back to smali" >&2
+  fi
+fi
+
+if [[ "$USES_JAVA_SOURCE" == "true" ]]; then
+  # Build the Java files
+  mkdir -p src
+  mkdir -p src2
+  ./util-src/generate_java.py ./src2 ./src ./expected.txt
+else
+  # Generate the smali files and expected.txt or fail
+  mkdir -p smali
+  ./util-src/generate_smali.py ./smali ./expected.txt
+fi
+
+./default-build "$@" --experimental default-methods
+
+# Reset the ulimit back to its initial value
+restore_ulimit
diff --git a/test/970-iface-super-resolution-generated/expected.txt b/test/970-iface-super-resolution-generated/expected.txt
new file mode 100644
index 0000000..1ddd65d
--- /dev/null
+++ b/test/970-iface-super-resolution-generated/expected.txt
@@ -0,0 +1 @@
+This file is generated by util-src/generate_smali.py do not directly modify!
diff --git a/test/970-iface-super-resolution-generated/info.txt b/test/970-iface-super-resolution-generated/info.txt
new file mode 100644
index 0000000..2cd2cc7
--- /dev/null
+++ b/test/970-iface-super-resolution-generated/info.txt
@@ -0,0 +1,17 @@
+Smali-based tests for experimental interface default methods.
+
+This tests that interface method resolution order is correct.
+
+Obviously needs to run under ART or a Java 8 Language runtime and compiler.
+
+When run smali test files are generated by the util-src/generate_smali.py
+script.  If we run with --jvm we will use the
+$(ANDROID_BUILD_TOP)/art/tools/extract-embedded-java script to turn the smali
+into equivalent Java using the embedded Java code.
+
+Care should be taken when updating the generate_smali.py script. It should always
+return equivalent output when run multiple times and the expected output should
+be valid.
+
+Do not modify the expected.txt file. It is generated on each run by
+util-src/generate_smali.py.
diff --git a/test/970-iface-super-resolution-generated/run b/test/970-iface-super-resolution-generated/run
new file mode 100755
index 0000000..6d2930d
--- /dev/null
+++ b/test/970-iface-super-resolution-generated/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+${RUN} "$@" --experimental default-methods
diff --git a/test/970-iface-super-resolution-generated/util-src/generate_java.py b/test/970-iface-super-resolution-generated/util-src/generate_java.py
new file mode 100755
index 0000000..c12f10d
--- /dev/null
+++ b/test/970-iface-super-resolution-generated/util-src/generate_java.py
@@ -0,0 +1,77 @@
+#!/usr/bin/python3
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Generate java test files for test 966.
+"""
+
+import generate_smali as base
+import os
+import sys
+from pathlib import Path
+
+BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
+if BUILD_TOP is None:
+  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
+  sys.exit(1)
+
+# Allow us to import mixins.
+sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
+
+import testgen.mixins as mixins
+
+class JavaConverter(mixins.DumpMixin, mixins.Named, mixins.JavaFileMixin):
+  """
+  A class that can convert a SmaliFile to a JavaFile.
+  """
+  def __init__(self, inner):
+    self.inner = inner
+
+  def get_name(self):
+    return self.inner.get_name()
+
+  def __str__(self):
+    out = ""
+    for line in str(self.inner).splitlines(keepends = True):
+      if line.startswith("#"):
+        out += line[1:]
+    return out
+
+def main(argv):
+  final_java_dir = Path(argv[1])
+  if not final_java_dir.exists() or not final_java_dir.is_dir():
+    print("{} is not a valid java dir".format(final_java_dir), file=sys.stderr)
+    sys.exit(1)
+  initial_java_dir = Path(argv[2])
+  if not initial_java_dir.exists() or not initial_java_dir.is_dir():
+    print("{} is not a valid java dir".format(initial_java_dir), file=sys.stderr)
+    sys.exit(1)
+  expected_txt = Path(argv[3])
+  mainclass, all_files = base.create_all_test_files()
+  with expected_txt.open('w') as out:
+    print(mainclass.get_expected(), file=out)
+  for f in all_files:
+    if f.initial_build_different():
+      JavaConverter(f).dump(final_java_dir)
+      JavaConverter(f.get_initial_build_version()).dump(initial_java_dir)
+    else:
+      JavaConverter(f).dump(initial_java_dir)
+      if isinstance(f, base.TestInterface):
+        JavaConverter(f).dump(final_java_dir)
+
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/test/970-iface-super-resolution-generated/util-src/generate_smali.py b/test/970-iface-super-resolution-generated/util-src/generate_smali.py
new file mode 100755
index 0000000..cb7b0fa
--- /dev/null
+++ b/test/970-iface-super-resolution-generated/util-src/generate_smali.py
@@ -0,0 +1,614 @@
+#!/usr/bin/python3
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Generate Smali test files for test 966.
+"""
+
+import os
+import sys
+from pathlib import Path
+
+BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
+if BUILD_TOP is None:
+  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
+  sys.exit(1)
+
+# Allow us to import utils and mixins.
+sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
+
+from testgen.utils import get_copyright, subtree_sizes, gensym, filter_blanks
+import testgen.mixins as mixins
+
+from functools import total_ordering
+import itertools
+import string
+
+# The max depth the tree can have.
+MAX_IFACE_DEPTH = 3
+
+class MainClass(mixins.DumpMixin, mixins.Named, mixins.SmaliFileMixin):
+  """
+  A Main.smali file containing the Main class and the main function. It will run
+  all the test functions we have.
+  """
+
+  MAIN_CLASS_TEMPLATE = """{copyright}
+
+.class public LMain;
+.super Ljava/lang/Object;
+
+# class Main {{
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+{test_groups}
+
+{main_func}
+
+# }}
+"""
+
+  MAIN_FUNCTION_TEMPLATE = """
+#   public static void main(String[] args) {{
+.method public static main([Ljava/lang/String;)V
+    .locals 2
+
+    {test_group_invoke}
+
+    return-void
+.end method
+#   }}
+"""
+
+  TEST_GROUP_INVOKE_TEMPLATE = """
+#     {test_name}();
+    invoke-static {{}}, {test_name}()V
+"""
+
+  def __init__(self):
+    """
+    Initialize this MainClass. We start out with no tests.
+    """
+    self.tests = set()
+
+  def add_test(self, ty):
+    """
+    Add a test for the concrete type 'ty'
+    """
+    self.tests.add(Func(ty))
+
+  def get_expected(self):
+    """
+    Get the expected output of this test.
+    """
+    all_tests = sorted(self.tests)
+    return filter_blanks("\n".join(a.get_expected() for a in all_tests))
+
+  def initial_build_different(self):
+    return False
+
+  def get_name(self):
+    """
+    Gets the name of this class
+    """
+    return "Main"
+
+  def __str__(self):
+    """
+    Print the smali code for this test.
+    """
+    all_tests = sorted(self.tests)
+    test_invoke = ""
+    test_groups = ""
+    for t in all_tests:
+      test_groups += str(t)
+    for t in all_tests:
+      test_invoke += self.TEST_GROUP_INVOKE_TEMPLATE.format(test_name=t.get_name())
+    main_func = self.MAIN_FUNCTION_TEMPLATE.format(test_group_invoke=test_invoke)
+
+    return self.MAIN_CLASS_TEMPLATE.format(copyright = get_copyright('smali'),
+                                           test_groups = test_groups,
+                                           main_func = main_func)
+
+class Func(mixins.Named, mixins.NameComparableMixin):
+  """
+  A function that tests the functionality of a concrete type. Should only be
+  constructed by MainClass.add_test.
+  """
+
+  TEST_FUNCTION_TEMPLATE = """
+#   public static void {fname}() {{
+#     try {{
+#       {farg} v = new {farg}();
+#       System.out.println("Testing {tree}");
+#       v.testAll();
+#       System.out.println("Success: testing {tree}");
+#       return;
+#     }} catch (Exception e) {{
+#       System.out.println("Failure: testing {tree}");
+#       e.printStackTrace(System.out);
+#       return;
+#     }}
+#   }}
+.method public static {fname}()V
+    .locals 7
+    :call_{fname}_try_start
+      sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
+
+      new-instance v6, L{farg};
+      invoke-direct {{v6}}, L{farg};-><init>()V
+
+      const-string v3, "Testing {tree}"
+      invoke-virtual {{v2, v3}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+      invoke-virtual {{v6}}, L{farg};->testAll()V
+
+      const-string v3, "Success: testing {tree}"
+      invoke-virtual {{v2, v3}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+      return-void
+    :call_{fname}_try_end
+    .catch Ljava/lang/Exception; {{:call_{fname}_try_start .. :call_{fname}_try_end}} :error_{fname}_start
+    :error_{fname}_start
+      move-exception v3
+      sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
+      const-string v4, "Failure: testing {tree}"
+      invoke-virtual {{v2, v3}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      invoke-virtual {{v3,v2}}, Ljava/lang/Error;->printStackTrace(Ljava/io/PrintStream;)V
+      return-void
+.end method
+"""
+
+  OUTPUT_FORMAT = """
+Testing {tree}
+{test_output}
+Success: testing {tree}
+""".strip()
+
+  def __init__(self, farg):
+    """
+    Initialize a test function for the given argument
+    """
+    self.farg = farg
+
+  def __str__(self):
+    """
+    Print the smali code for this test function.
+    """
+    return self.TEST_FUNCTION_TEMPLATE.format(fname = self.get_name(),
+                                              farg  = self.farg.get_name(),
+                                              tree  = self.farg.get_tree())
+
+  def get_name(self):
+    """
+    Gets the name of this test function
+    """
+    return "TEST_FUNC_{}".format(self.farg.get_name())
+
+  def get_expected(self):
+    """
+    Get the expected output of this function.
+    """
+    return self.OUTPUT_FORMAT.format(
+        tree = self.farg.get_tree(),
+        test_output = self.farg.get_test_output().strip())
+
+class TestClass(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.SmaliFileMixin):
+  """
+  A class that will be instantiated to test interface initialization order.
+  """
+
+  TEST_CLASS_TEMPLATE = """{copyright}
+
+.class public L{class_name};
+.super Ljava/lang/Object;
+{implements_spec}
+
+# public class {class_name} implements {ifaces} {{
+#
+#   public {class_name}() {{
+#   }}
+.method public constructor <init>()V
+  .locals 2
+  invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
+  return-void
+.end method
+
+#   public String getCalledInterface() {{
+#     throw new Error("Should not be called");
+#   }}
+.method public getCalledInterface()V
+  .locals 2
+  const-string v0, "Should not be called"
+  new-instance v1, Ljava/lang/Error;
+  invoke-direct {{v1, v0}}, Ljava/lang/Error;-><init>(Ljava/lang/String;)V
+  throw v1
+.end method
+
+#   public void testAll() {{
+#     boolean failed = false;
+#     Error exceptions = new Error("Test failures");
+.method public testAll()V
+  .locals 5
+  const/4 v0, 0
+  const-string v1, "Test failures"
+  new-instance v2, Ljava/lang/Error;
+  invoke-direct {{v2, v1}}, Ljava/lang/Error;-><init>(Ljava/lang/String;)V
+
+  {test_calls}
+
+#     if (failed) {{
+  if-eqz v0, :end
+#       throw exceptions;
+    throw v2
+  :end
+#     }}
+  return-void
+#   }}
+.end method
+
+{test_funcs}
+
+# }}
+"""
+
+  IMPLEMENTS_TEMPLATE = """
+.implements L{iface_name};
+"""
+
+  TEST_CALL_TEMPLATE = """
+#     try {{
+#       test_{iface}_super();
+#     }} catch (Throwable t) {{
+#       exceptions.addSuppressed(t);
+#       failed = true;
+#     }}
+  :try_{iface}_start
+    invoke-virtual {{p0}}, L{class_name};->test_{iface}_super()V
+    goto :error_{iface}_end
+  :try_{iface}_end
+  .catch Ljava/lang/Throwable; {{:try_{iface}_start .. :try_{iface}_end}} :error_{iface}_start
+  :error_{iface}_start
+    move-exception v3
+    invoke-virtual {{v2, v3}}, Ljava/lang/Throwable;->addSuppressed(Ljava/lang/Throwable;)V
+    const/4 v0, 1
+  :error_{iface}_end
+"""
+
+  TEST_FUNC_TEMPLATE = """
+#   public void test_{iface}_super() {{
+#     try {{
+#       System.out.println("{class_name} -> {iface}.super.getCalledInterface(): " +
+#                          {iface}.super.getCalledInterface());
+#     }} catch (NoSuchMethodError e) {{
+#       System.out.println("{class_name} -> {iface}.super.getCalledInterface(): NoSuchMethodError");
+#     }} catch (IncompatibleClassChangeError e) {{
+#       System.out.println("{class_name} -> {iface}.super.getCalledInterface(): IncompatibleClassChangeError");
+#     }} catch (Throwable t) {{
+#       System.out.println("{class_name} -> {iface}.super.getCalledInterface(): Unknown error occurred");
+#       throw t;
+#     }}
+#   }}
+.method public test_{iface}_super()V
+  .locals 3
+  sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+  :try_start
+    const-string v1, "{class_name} -> {iface}.super.getCalledInterface(): "
+    invoke-super {{p0}}, L{iface};->getCalledInterface()Ljava/lang/String;
+    move-result-object v2
+
+    invoke-virtual {{v1, v2}}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v1
+
+    invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    return-void
+  :try_end
+  .catch Ljava/lang/NoSuchMethodError; {{:try_start .. :try_end}} :AME_catch
+  .catch Ljava/lang/IncompatibleClassChangeError; {{:try_start .. :try_end}} :ICCE_catch
+  .catch Ljava/lang/Throwable; {{:try_start .. :try_end}} :throwable_catch
+  :AME_catch
+    const-string v1, "{class_name} -> {iface}.super.getCalledInterface(): NoSuchMethodError"
+    invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    return-void
+  :ICCE_catch
+    const-string v1, "{class_name} -> {iface}.super.getCalledInterface(): IncompatibleClassChangeError"
+    invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    return-void
+  :throwable_catch
+    move-exception v2
+    const-string v1, "{class_name} -> {iface}.super.getCalledInterface(): Unknown error occurred"
+    invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    throw v2
+.end method
+""".strip()
+
+  OUTPUT_TEMPLATE = "{class_name} -> {iface}.super.getCalledInterface(): {result}"
+
+  def __init__(self, ifaces, name = None):
+    """
+    Initialize this test class which implements the given interfaces
+    """
+    self.ifaces = ifaces
+    if name is None:
+      self.class_name = "CLASS_"+gensym()
+    else:
+      self.class_name = name
+
+  def get_initial_build_version(self):
+    """
+    Returns a version of this class that can be used for the initial build (meaning no compiler
+    checks will be triggered).
+    """
+    return TestClass([i.get_initial_build_version() for i in self.ifaces], self.class_name)
+
+  def initial_build_different(self):
+    return False
+
+  def get_name(self):
+    """
+    Gets the name of this interface
+    """
+    return self.class_name
+
+  def get_tree(self):
+    """
+    Print out a representation of the type tree of this class
+    """
+    return "[{fname} {iftree}]".format(fname = self.get_name(), iftree = print_tree(self.ifaces))
+
+  def get_test_output(self):
+    return '\n'.join(map(lambda a: self.OUTPUT_TEMPLATE.format(class_name = self.get_name(),
+                                                               iface = a.get_name(),
+                                                               result = a.get_output()),
+                         self.ifaces))
+
+  def __str__(self):
+    """
+    Print the smali code for this class.
+    """
+    funcs = '\n'.join(map(lambda a: self.TEST_FUNC_TEMPLATE.format(iface = a.get_name(),
+                                                                   class_name = self.get_name()),
+                          self.ifaces))
+    calls = '\n'.join(map(lambda a: self.TEST_CALL_TEMPLATE.format(iface = a.get_name(),
+                                                                   class_name = self.get_name()),
+                          self.ifaces))
+    s_ifaces = '\n'.join(map(lambda a: self.IMPLEMENTS_TEMPLATE.format(iface_name = a.get_name()),
+                             self.ifaces))
+    j_ifaces = ', '.join(map(lambda a: a.get_name(), self.ifaces))
+    return self.TEST_CLASS_TEMPLATE.format(copyright = get_copyright('smali'),
+                                           implements_spec = s_ifaces,
+                                           ifaces = j_ifaces,
+                                           class_name = self.class_name,
+                                           test_funcs = funcs,
+                                           test_calls = calls)
+
+class IncompatibleClassChangeErrorResult(mixins.Named):
+  def get_name(self):
+    return "IncompatibleClassChangeError"
+
+ICCE = IncompatibleClassChangeErrorResult()
+
+class NoSuchMethodErrorResult(mixins.Named):
+  def get_name(self):
+    return "NoSuchMethodError"
+
+NSME = NoSuchMethodErrorResult()
+
+class TestInterface(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.SmaliFileMixin):
+  """
+  An interface that will be used to test default method resolution order.
+  """
+
+  TEST_INTERFACE_TEMPLATE = """{copyright}
+.class public abstract interface L{class_name};
+.super Ljava/lang/Object;
+{implements_spec}
+
+# public interface {class_name} {extends} {ifaces} {{
+
+{funcs}
+
+# }}
+"""
+
+  ABSTRACT_FUNC_TEMPLATE = """
+"""
+
+  DEFAULT_FUNC_TEMPLATE = """
+#   public default String getCalledInterface() {{
+#     return "{class_name}";
+#   }}
+.method public getCalledInterface()Ljava/lang/String;
+  .locals 1
+  const-string v0, "{class_name}"
+  return-object v0
+.end method
+"""
+
+  IMPLEMENTS_TEMPLATE = """
+.implements L{iface_name};
+"""
+
+  def __init__(self, ifaces, default, name = None):
+    """
+    Initialize interface with the given super-interfaces
+    """
+    self.ifaces = ifaces
+    self.default = default
+    if name is None:
+      end = "_DEFAULT" if default else ""
+      self.class_name = "INTERFACE_"+gensym()+end
+    else:
+      self.class_name = name
+
+  def get_initial_build_version(self):
+    """
+    Returns a version of this class that can be used for the initial build (meaning no compiler
+    checks will be triggered).
+    """
+    return TestInterface([i.get_initial_build_version() for i in self.ifaces],
+                         True,
+                         self.class_name)
+
+  def initial_build_different(self):
+    return not self.default
+
+  def get_name(self):
+    """
+    Gets the name of this interface
+    """
+    return self.class_name
+
+  def __iter__(self):
+    """
+    Performs depth-first traversal of the interface tree this interface is the
+    root of. Does not filter out repeats.
+    """
+    for i in self.ifaces:
+      yield i
+      yield from i
+
+  def get_called(self):
+    """
+    Get the interface whose default method would be called when calling the
+    CalledInterfaceName function.
+    """
+    all_ifaces = set(iface for iface in self if iface.default)
+    for i in all_ifaces:
+      if all(map(lambda j: i not in j.get_super_types(), all_ifaces)):
+        return i
+    return ICCE if any(map(lambda i: i.default, all_ifaces)) else NSME
+
+  def get_super_types(self):
+    """
+    Returns a set of all the supertypes of this interface
+    """
+    return set(i2 for i2 in self)
+
+  def get_output(self):
+    if self.default:
+      return self.get_name()
+    else:
+      return self.get_called().get_name()
+
+  def get_tree(self):
+    """
+    Print out a representation of the type tree of this class
+    """
+    return "[{class_name} {iftree}]".format(class_name = self.get_name(),
+                                            iftree = print_tree(self.ifaces))
+  def __str__(self):
+    """
+    Print the smali code for this interface.
+    """
+    s_ifaces = '\n'.join(map(lambda a: self.IMPLEMENTS_TEMPLATE.format(iface_name = a.get_name()),
+                             self.ifaces))
+    j_ifaces = ', '.join(map(lambda a: a.get_name(), self.ifaces))
+    if self.default:
+      funcs = self.DEFAULT_FUNC_TEMPLATE.format(class_name = self.class_name)
+    else:
+      funcs = self.ABSTRACT_FUNC_TEMPLATE
+    return self.TEST_INTERFACE_TEMPLATE.format(copyright = get_copyright('smali'),
+                                               implements_spec = s_ifaces,
+                                               extends = "extends" if len(self.ifaces) else "",
+                                               ifaces = j_ifaces,
+                                               funcs = funcs,
+                                               class_name = self.class_name)
+
+def dump_tree(ifaces):
+  """
+  Yields all the interfaces transitively implemented by the set in
+  reverse-depth-first order
+  """
+  for i in ifaces:
+    yield from dump_tree(i.ifaces)
+    yield i
+
+def print_tree(ifaces):
+  """
+  Prints the tree for the given ifaces.
+  """
+  return " ".join(i.get_tree() for i in  ifaces)
+
+# Cached output of subtree_sizes for speed of access.
+SUBTREES = [set(tuple(sorted(l)) for l in subtree_sizes(i)) for i in range(MAX_IFACE_DEPTH + 1)]
+
+def create_test_classes():
+  """
+  Yield all the test classes with the different interface trees
+  """
+  for num in range(1, MAX_IFACE_DEPTH + 1):
+    for split in SUBTREES[num]:
+      ifaces = []
+      for sub in split:
+        ifaces.append(list(create_interface_trees(sub)))
+    for supers in itertools.product(*ifaces):
+      yield TestClass(supers)
+
+def create_interface_trees(num):
+  """
+  Yield all the interface trees up to 'num' depth.
+  """
+  if num == 0:
+    yield TestInterface(tuple(), False)
+    yield TestInterface(tuple(), True)
+    return
+  for split in SUBTREES[num]:
+    ifaces = []
+    for sub in split:
+      ifaces.append(list(create_interface_trees(sub)))
+    for supers in itertools.product(*ifaces):
+      yield TestInterface(supers, False)
+      yield TestInterface(supers, True)
+      for selected in (set(dump_tree(supers)) - set(supers)):
+         yield TestInterface(tuple([selected] + list(supers)), True)
+         yield TestInterface(tuple([selected] + list(supers)), False)
+      # TODO Should add on some from higher up the tree.
+
+def create_all_test_files():
+  """
+  Creates all the objects representing the files in this test. They just need to
+  be dumped.
+  """
+  mc = MainClass()
+  classes = {mc}
+  for clazz in create_test_classes():
+    classes.add(clazz)
+    for i in dump_tree(clazz.ifaces):
+      classes.add(i)
+    mc.add_test(clazz)
+  return mc, classes
+
+def main(argv):
+  smali_dir = Path(argv[1])
+  if not smali_dir.exists() or not smali_dir.is_dir():
+    print("{} is not a valid smali dir".format(smali_dir), file=sys.stderr)
+    sys.exit(1)
+  expected_txt = Path(argv[2])
+  mainclass, all_files = create_all_test_files()
+  with expected_txt.open('w') as out:
+    print(mainclass.get_expected(), file=out)
+  for f in all_files:
+    f.dump(smali_dir)
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/test/971-iface-super/build b/test/971-iface-super/build
new file mode 100755
index 0000000..1e9f8aa
--- /dev/null
+++ b/test/971-iface-super/build
@@ -0,0 +1,50 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+# We will be making more files than the ulimit is set to allow. Remove it temporarily.
+OLD_ULIMIT=`ulimit -S`
+ulimit -S unlimited
+
+restore_ulimit() {
+  ulimit -S "$OLD_ULIMIT"
+}
+trap 'restore_ulimit' ERR
+
+# TODO: Support running with jack.
+
+if [[ $@ == *"--jvm"* ]]; then
+  # Build the Java files if we are running a --jvm test
+  mkdir -p classes
+  mkdir -p src
+  echo "${JAVAC} \$@" >> ./javac_exec.sh
+  # This will use java_exec.sh to execute the javac compiler. It will place the
+  # compiled class files in ./classes and the expected values in expected.txt
+  #
+  # After this the src directory will contain the final versions of all files.
+  ./util-src/generate_java.py ./javac_exec.sh ./src ./classes ./expected.txt ./build_log
+else
+  mkdir -p ./smali
+  # Generate the smali files and expected.txt or fail
+  ./util-src/generate_smali.py ./smali ./expected.txt
+  # Use the default build script
+  ./default-build "$@" "$EXTRA_ARGS" --experimental default-methods
+fi
+
+# Reset the ulimit back to its initial value
+restore_ulimit
diff --git a/test/971-iface-super/expected.txt b/test/971-iface-super/expected.txt
new file mode 100644
index 0000000..1ddd65d
--- /dev/null
+++ b/test/971-iface-super/expected.txt
@@ -0,0 +1 @@
+This file is generated by util-src/generate_smali.py do not directly modify!
diff --git a/test/971-iface-super/info.txt b/test/971-iface-super/info.txt
new file mode 100644
index 0000000..bc1c428
--- /dev/null
+++ b/test/971-iface-super/info.txt
@@ -0,0 +1,17 @@
+Smali-based tests for experimental interface default methods.
+
+This tests that interface method resolution order is correct in the presence of
+partial compilation/illegal invokes.
+
+Obviously needs to run under ART or a Java 8 Language runtime and compiler.
+
+When run smali test files are generated by the util-src/generate_smali.py
+script.  If we run with --jvm we will use the util-src/generate_java.py script
+will generate equivalent java code based on the smali code.
+
+Care should be taken when updating the generate_smali.py script. It should always
+return equivalent output when run multiple times and the expected output should
+be valid.
+
+Do not modify the expected.txt file. It is generated on each run by
+util-src/generate_smali.py.
diff --git a/test/971-iface-super/run b/test/971-iface-super/run
new file mode 100755
index 0000000..6d2930d
--- /dev/null
+++ b/test/971-iface-super/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+${RUN} "$@" --experimental default-methods
diff --git a/test/971-iface-super/util-src/generate_java.py b/test/971-iface-super/util-src/generate_java.py
new file mode 100755
index 0000000..99b0479
--- /dev/null
+++ b/test/971-iface-super/util-src/generate_java.py
@@ -0,0 +1,138 @@
+#!/usr/bin/python3
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Generate java test files for test 966.
+"""
+
+import generate_smali as base
+import os
+import sys
+from pathlib import Path
+
+BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
+if BUILD_TOP is None:
+  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
+  sys.exit(1)
+
+# Allow us to import mixins.
+sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
+
+import testgen.mixins as mixins
+import functools
+import operator
+import subprocess
+
+class JavaConverter(mixins.DumpMixin, mixins.Named, mixins.JavaFileMixin):
+  """
+  A class that can convert a SmaliFile to a JavaFile.
+  """
+  def __init__(self, inner):
+    self.inner = inner
+
+  def get_name(self):
+    """Gets the name of this file."""
+    return self.inner.get_name()
+
+  def __str__(self):
+    out = ""
+    for line in str(self.inner).splitlines(keepends = True):
+      if line.startswith("#"):
+        out += line[1:]
+    return out
+
+class Compiler:
+  def __init__(self, sources, javac, temp_dir, classes_dir):
+    self.javac = javac
+    self.temp_dir = temp_dir
+    self.classes_dir = classes_dir
+    self.sources = sources
+
+  def compile_files(self, args, files):
+    """
+    Compile the files given with the arguments given.
+    """
+    args = args.split()
+    files = list(map(str, files))
+    cmd = ['sh', '-a', '-e', '--', str(self.javac)] + args + files
+    print("Running compile command: {}".format(cmd))
+    subprocess.check_call(cmd)
+    print("Compiled {} files".format(len(files)))
+
+  def execute(self):
+    """
+    Compiles this test, doing partial compilation as necessary.
+    """
+    # Compile Main and all classes first. Force all interfaces to be default so that there will be
+    # no compiler problems (works since classes only implement 1 interface).
+    for f in self.sources:
+      if isinstance(f, base.TestInterface):
+        JavaConverter(f.get_specific_version(base.InterfaceType.default)).dump(self.temp_dir)
+      else:
+        JavaConverter(f).dump(self.temp_dir)
+    self.compile_files("-d {}".format(self.classes_dir), self.temp_dir.glob("*.java"))
+
+    # Now we compile the interfaces
+    ifaces = set(i for i in self.sources if isinstance(i, base.TestInterface))
+    filters = (lambda a: a.is_default(), lambda a: not a.is_default())
+    converters = (lambda a: JavaConverter(a.get_specific_version(base.InterfaceType.default)),
+                  lambda a: JavaConverter(a.get_specific_version(base.InterfaceType.empty)))
+    while len(ifaces) != 0:
+      for iface_filter, iface_converter in zip(filters, converters):
+        # Find those ifaces where there are no (uncompiled) interfaces that are subtypes.
+        tops = set(filter(lambda a: iface_filter(a) and not any(map(lambda i: a in i.get_super_types(), ifaces)), ifaces))
+        files = []
+        # Dump these ones, they are getting compiled.
+        for f in tops:
+          out = JavaConverter(f)
+          out.dump(self.temp_dir)
+          files.append(self.temp_dir / out.get_file_name())
+        # Force all superinterfaces of these to be empty so there will be no conflicts
+        overrides = functools.reduce(operator.or_, map(lambda i: i.get_super_types(), tops), set())
+        for overridden in overrides:
+          out = iface_converter(overridden)
+          out.dump(self.temp_dir)
+          files.append(self.temp_dir / out.get_file_name())
+        self.compile_files("-d {outdir} -cp {outdir}".format(outdir = self.classes_dir), files)
+        # Remove these from the set of interfaces to be compiled.
+        ifaces -= tops
+    print("Finished compiling all files.")
+    return
+
+def main(argv):
+  javac_exec = Path(argv[1])
+  if not javac_exec.exists() or not javac_exec.is_file():
+    print("{} is not a shell script".format(javac_exec), file=sys.stderr)
+    sys.exit(1)
+  temp_dir = Path(argv[2])
+  if not temp_dir.exists() or not temp_dir.is_dir():
+    print("{} is not a valid source dir".format(temp_dir), file=sys.stderr)
+    sys.exit(1)
+  classes_dir = Path(argv[3])
+  if not classes_dir.exists() or not classes_dir.is_dir():
+    print("{} is not a valid classes directory".format(classes_dir), file=sys.stderr)
+    sys.exit(1)
+  expected_txt = Path(argv[4])
+  mainclass, all_files = base.create_all_test_files()
+
+  with expected_txt.open('w') as out:
+    print(mainclass.get_expected(), file=out)
+  print("Wrote expected output")
+
+  Compiler(all_files, javac_exec, temp_dir, classes_dir).execute()
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/test/971-iface-super/util-src/generate_smali.py b/test/971-iface-super/util-src/generate_smali.py
new file mode 100755
index 0000000..f01c904
--- /dev/null
+++ b/test/971-iface-super/util-src/generate_smali.py
@@ -0,0 +1,689 @@
+#!/usr/bin/python3
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Generate Smali test files for test 967.
+"""
+
+import os
+import sys
+from pathlib import Path
+
+BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
+if BUILD_TOP is None:
+  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
+  sys.exit(1)
+
+# Allow us to import utils and mixins.
+sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
+
+from testgen.utils import get_copyright, subtree_sizes, gensym, filter_blanks
+import testgen.mixins as mixins
+
+from enum import Enum
+from functools import total_ordering
+import itertools
+import string
+
+# The max depth the type tree can have.
+MAX_IFACE_DEPTH = 3
+
+class MainClass(mixins.DumpMixin, mixins.Named, mixins.SmaliFileMixin):
+  """
+  A Main.smali file containing the Main class and the main function. It will run
+  all the test functions we have.
+  """
+
+  MAIN_CLASS_TEMPLATE = """{copyright}
+
+.class public LMain;
+.super Ljava/lang/Object;
+
+# class Main {{
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+{test_funcs}
+
+{main_func}
+
+# }}
+"""
+
+  MAIN_FUNCTION_TEMPLATE = """
+#   public static void main(String[] args) {{
+.method public static main([Ljava/lang/String;)V
+    .locals 0
+
+    {test_group_invoke}
+
+    return-void
+.end method
+#   }}
+"""
+
+  TEST_GROUP_INVOKE_TEMPLATE = """
+#     {test_name}();
+    invoke-static {{}}, {test_name}()V
+"""
+
+  def __init__(self):
+    """
+    Initialize this MainClass. We start out with no tests.
+    """
+    self.tests = set()
+
+  def get_expected(self):
+    """
+    Get the expected output of this test.
+    """
+    all_tests = sorted(self.tests)
+    return filter_blanks("\n".join(a.get_expected() for a in all_tests))
+
+  def add_test(self, ty):
+    """
+    Add a test for the concrete type 'ty'
+    """
+    self.tests.add(Func(ty))
+
+  def get_name(self):
+    """
+    Get the name of this class
+    """
+    return "Main"
+
+  def __str__(self):
+    """
+    Print the MainClass smali code.
+    """
+    all_tests = sorted(self.tests)
+    test_invoke = ""
+    test_funcs = ""
+    for t in all_tests:
+      test_funcs += str(t)
+    for t in all_tests:
+      test_invoke += self.TEST_GROUP_INVOKE_TEMPLATE.format(test_name=t.get_name())
+    main_func = self.MAIN_FUNCTION_TEMPLATE.format(test_group_invoke=test_invoke)
+
+    return self.MAIN_CLASS_TEMPLATE.format(copyright = get_copyright("smali"),
+                                           test_funcs = test_funcs,
+                                           main_func = main_func)
+
+class Func(mixins.Named, mixins.NameComparableMixin):
+  """
+  A function that tests the functionality of a concrete type. Should only be
+  constructed by MainClass.add_test.
+  """
+
+  TEST_FUNCTION_TEMPLATE = """
+#   public static void {fname}() {{
+#     {farg} v = null;
+#     try {{
+#       v = new {farg}();
+#     }} catch (Throwable e) {{
+#       System.out.println("Unexpected error occurred which creating {farg} instance");
+#       e.printStackTrace(System.out);
+#       return;
+#     }}
+#     try {{
+#       v.callSupers();
+#       return;
+#     }} catch (Throwable e) {{
+#       e.printStackTrace(System.out);
+#       return;
+#     }}
+#   }}
+.method public static {fname}()V
+    .locals 7
+    sget-object v4, Ljava/lang/System;->out:Ljava/io/PrintStream;
+
+    :new_{fname}_try_start
+      new-instance v0, L{farg};
+      invoke-direct {{v0}}, L{farg};-><init>()V
+      goto :call_{fname}_try_start
+    :new_{fname}_try_end
+    .catch Ljava/lang/Throwable; {{:new_{fname}_try_start .. :new_{fname}_try_end}} :new_error_{fname}_start
+    :new_error_{fname}_start
+      move-exception v6
+      const-string v5, "Unexpected error occurred which creating {farg} instance"
+      invoke-virtual {{v4,v5}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      invoke-virtual {{v6,v4}}, Ljava/lang/Throwable;->printStackTrace(Ljava/io/PrintStream;)V
+      return-void
+    :call_{fname}_try_start
+      invoke-virtual {{v0}}, L{farg};->callSupers()V
+      return-void
+    :call_{fname}_try_end
+    .catch Ljava/lang/Throwable; {{:call_{fname}_try_start .. :call_{fname}_try_end}} :error_{fname}_start
+    :error_{fname}_start
+      move-exception v6
+      invoke-virtual {{v6,v4}}, Ljava/lang/Throwable;->printStackTrace(Ljava/io/PrintStream;)V
+      return-void
+.end method
+"""
+
+  def __init__(self, farg):
+    """
+    Initialize a test function for the given argument
+    """
+    self.farg = farg
+
+  def get_expected(self):
+    """
+    Get the expected output calling this function.
+    """
+    return "\n".join(self.farg.get_expected())
+
+  def get_name(self):
+    """
+    Get the name of this function
+    """
+    return "TEST_FUNC_{}".format(self.farg.get_name())
+
+  def __str__(self):
+    """
+    Print the smali code of this function.
+    """
+    return self.TEST_FUNCTION_TEMPLATE.format(fname = self.get_name(),
+                                              farg = self.farg.get_name())
+
+class InterfaceCallResponse(Enum):
+  """
+  An enumeration of all the different types of responses to an interface call we can have
+  """
+  NoError = 0
+  NoSuchMethodError = 1
+  AbstractMethodError = 2
+  IncompatibleClassChangeError = 3
+
+  def get_output_format(self):
+    if self == InterfaceCallResponse.NoError:
+      return "No exception thrown for {iface_name}.super.call() on {tree}\n"
+    elif self == InterfaceCallResponse.AbstractMethodError:
+      return "AbstractMethodError thrown for {iface_name}.super.call() on {tree}\n"
+    elif self == InterfaceCallResponse.NoSuchMethodError:
+      return "NoSuchMethodError thrown for {iface_name}.super.call() on {tree}\n"
+    else:
+      return "IncompatibleClassChangeError thrown for {iface_name}.super.call() on {tree}\n"
+
+class TestClass(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.SmaliFileMixin):
+  """
+  A class that will be instantiated to test interface super behavior.
+  """
+
+  TEST_CLASS_TEMPLATE = """{copyright}
+
+.class public L{class_name};
+.super Ljava/lang/Object;
+{implements_spec}
+
+# public class {class_name} implements {ifaces} {{
+
+.method public constructor <init>()V
+  .registers 1
+  invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
+  return-void
+.end method
+
+#   public void call() {{
+#     throw new Error("{class_name}.call(v) should never get called!");
+#   }}
+.method public call()V
+  .locals 2
+  new-instance v0, Ljava/lang/Error;
+  const-string v1, "{class_name}.call(v) should never get called!"
+  invoke-direct {{v0, v1}}, Ljava/lang/Error;-><init>(Ljava/lang/String;)V
+  throw v0
+.end method
+
+#   public void callSupers() {{
+.method public callSupers()V
+  .locals 4
+  sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+
+  {super_calls}
+
+  return-void
+.end method
+#   }}
+
+# }}
+"""
+  SUPER_CALL_TEMPLATE = """
+#     try {{
+#       System.out.println("Calling {iface_name}.super.call() on {tree}");
+#       {iface_name}.super.call();
+#       System.out.println("No exception thrown for {iface_name}.super.call() on {tree}");
+#     }} catch (AbstractMethodError ame) {{
+#       System.out.println("AbstractMethodError thrown for {iface_name}.super.call() on {tree}");
+#     }} catch (NoSuchMethodError nsme) {{
+#       System.out.println("NoSuchMethodError thrown for {iface_name}.super.call() on {tree}");
+#     }} catch (IncompatibleClassChangeError icce) {{
+#       System.out.println("IncompatibleClassChangeError thrown for {iface_name}.super.call() on {tree}");
+#     }} catch (Throwable t) {{
+#       System.out.println("Unknown error thrown for {iface_name}.super.call() on {tree}");
+#       throw t;
+#     }}
+    :call_{class_name}_{iface_name}_try_start
+      const-string v1, "Calling {iface_name}.super.call() on {tree}"
+      invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      invoke-super {{p0}}, L{iface_name};->call()V
+      const-string v1, "No exception thrown for {iface_name}.super.call() on {tree}"
+      invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      goto :call_{class_name}_{iface_name}_end
+    :call_{class_name}_{iface_name}_try_end
+    .catch Ljava/lang/AbstractMethodError; {{:call_{class_name}_{iface_name}_try_start .. :call_{class_name}_{iface_name}_try_end}} :AME_{class_name}_{iface_name}_start
+    .catch Ljava/lang/NoSuchMethodError; {{:call_{class_name}_{iface_name}_try_start .. :call_{class_name}_{iface_name}_try_end}} :NSME_{class_name}_{iface_name}_start
+    .catch Ljava/lang/IncompatibleClassChangeError; {{:call_{class_name}_{iface_name}_try_start .. :call_{class_name}_{iface_name}_try_end}} :ICCE_{class_name}_{iface_name}_start
+    .catch Ljava/lang/Throwable; {{:call_{class_name}_{iface_name}_try_start .. :call_{class_name}_{iface_name}_try_end}} :error_{class_name}_{iface_name}_start
+    :AME_{class_name}_{iface_name}_start
+      const-string v1, "AbstractMethodError thrown for {iface_name}.super.call() on {tree}"
+      invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      goto :call_{class_name}_{iface_name}_end
+    :NSME_{class_name}_{iface_name}_start
+      const-string v1, "NoSuchMethodError thrown for {iface_name}.super.call() on {tree}"
+      invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      goto :call_{class_name}_{iface_name}_end
+    :ICCE_{class_name}_{iface_name}_start
+      const-string v1, "IncompatibleClassChangeError thrown for {iface_name}.super.call() on {tree}"
+      invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      goto :call_{class_name}_{iface_name}_end
+    :error_{class_name}_{iface_name}_start
+      move-exception v2
+      const-string v1, "Unknown error thrown for {iface_name}.super.call() on {tree}"
+      invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      throw v2
+    :call_{class_name}_{iface_name}_end
+"""
+
+  IMPLEMENTS_TEMPLATE = """
+.implements L{iface_name};
+"""
+
+  OUTPUT_PREFIX = "Calling {iface_name}.super.call() on {tree}\n"
+
+  def __init__(self, ifaces):
+    """
+    Initialize this test class which implements the given interfaces
+    """
+    self.ifaces = ifaces
+    self.class_name = "CLASS_"+gensym()
+
+  def get_name(self):
+    """
+    Get the name of this class
+    """
+    return self.class_name
+
+  def get_tree(self):
+    """
+    Print out a representation of the type tree of this class
+    """
+    return "[{class_name} {iface_tree}]".format(class_name = self.class_name,
+                                                iface_tree = print_tree(self.ifaces))
+
+  def __iter__(self):
+    """
+    Step through all interfaces implemented transitively by this class
+    """
+    for i in self.ifaces:
+      yield i
+      yield from i
+
+  def get_expected(self):
+    for iface in self.ifaces:
+      yield self.OUTPUT_PREFIX.format(iface_name = iface.get_name(), tree = self.get_tree())
+      yield from iface.get_expected()
+      yield iface.get_response().get_output_format().format(iface_name = iface.get_name(),
+                                                            tree = self.get_tree())
+
+  def __str__(self):
+    """
+    Print the smali code of this class.
+    """
+    s_ifaces = '\n'.join(map(lambda a: self.IMPLEMENTS_TEMPLATE.format(iface_name = a.get_name()),
+                             self.ifaces))
+    j_ifaces = ', '.join(map(lambda a: a.get_name(), self.ifaces))
+    super_template = self.SUPER_CALL_TEMPLATE
+    super_calls = "\n".join(super_template.format(iface_name = iface.get_name(),
+                                                  class_name = self.get_name(),
+                                                  tree = self.get_tree()) for iface in self.ifaces)
+    return self.TEST_CLASS_TEMPLATE.format(copyright = get_copyright('smali'),
+                                           ifaces = j_ifaces,
+                                           implements_spec = s_ifaces,
+                                           tree = self.get_tree(),
+                                           class_name = self.class_name,
+                                           super_calls = super_calls)
+
+class InterfaceType(Enum):
+  """
+  An enumeration of all the different types of interfaces we can have.
+
+  default: It has a default method
+  abstract: It has a method declared but not defined
+  empty: It does not have the method
+  """
+  default = 0
+  abstract = 1
+  empty = 2
+
+  def get_suffix(self):
+    if self == InterfaceType.default:
+      return "_DEFAULT"
+    elif self == InterfaceType.abstract:
+      return "_ABSTRACT"
+    elif self == InterfaceType.empty:
+      return "_EMPTY"
+    else:
+      raise TypeError("Interface type had illegal value.")
+
+class ConflictInterface:
+  """
+  A singleton representing a conflict of default methods.
+  """
+
+  def is_conflict(self):
+    """
+    Returns true if this is a conflict interface and calling the method on this interface will
+    result in an IncompatibleClassChangeError.
+    """
+    return True
+
+  def is_abstract(self):
+    """
+    Returns true if this is an abstract interface and calling the method on this interface will
+    result in an AbstractMethodError.
+    """
+    return False
+
+  def is_empty(self):
+    """
+    Returns true if this is an abstract interface and calling the method on this interface will
+    result in a NoSuchMethodError.
+    """
+    return False
+
+  def is_default(self):
+    """
+    Returns true if this is a default interface and calling the method on this interface will
+    result in a method actually being called.
+    """
+    return False
+
+  def get_response(self):
+    return InterfaceCallResponse.IncompatibleClassChangeError
+
+CONFLICT_TYPE = ConflictInterface()
+
+class TestInterface(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.SmaliFileMixin):
+  """
+  An interface that will be used to test default method resolution order.
+  """
+
+  TEST_INTERFACE_TEMPLATE = """{copyright}
+.class public abstract interface L{class_name};
+.super Ljava/lang/Object;
+{implements_spec}
+
+# public interface {class_name} {extends} {ifaces} {{
+
+{func}
+
+# }}
+"""
+
+  SUPER_CALL_TEMPLATE = TestClass.SUPER_CALL_TEMPLATE
+  OUTPUT_PREFIX = TestClass.OUTPUT_PREFIX
+
+  DEFAULT_FUNC_TEMPLATE = """
+#   public default void call() {{
+.method public call()V
+  .locals 4
+  sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+
+  {super_calls}
+
+  return-void
+.end method
+#   }}
+"""
+
+  ABSTRACT_FUNC_TEMPLATE = """
+#   public void call();
+.method public abstract call()V
+.end method
+"""
+
+  EMPTY_FUNC_TEMPLATE = """"""
+
+  IMPLEMENTS_TEMPLATE = """
+.implements L{iface_name};
+"""
+
+  def __init__(self, ifaces, iface_type, full_name = None):
+    """
+    Initialize interface with the given super-interfaces
+    """
+    self.ifaces = sorted(ifaces)
+    self.iface_type = iface_type
+    if full_name is None:
+      end = self.iface_type.get_suffix()
+      self.class_name = "INTERFACE_"+gensym()+end
+    else:
+      self.class_name = full_name
+
+  def get_specific_version(self, v):
+    """
+    Returns a copy of this interface of the given type for use in partial compilation.
+    """
+    return TestInterface(self.ifaces, v, full_name = self.class_name)
+
+  def get_super_types(self):
+    """
+    Returns a set of all the supertypes of this interface
+    """
+    return set(i2 for i2 in self)
+
+  def is_conflict(self):
+    """
+    Returns true if this is a conflict interface and calling the method on this interface will
+    result in an IncompatibleClassChangeError.
+    """
+    return False
+
+  def is_abstract(self):
+    """
+    Returns true if this is an abstract interface and calling the method on this interface will
+    result in an AbstractMethodError.
+    """
+    return self.iface_type == InterfaceType.abstract
+
+  def is_empty(self):
+    """
+    Returns true if this is an abstract interface and calling the method on this interface will
+    result in a NoSuchMethodError.
+    """
+    return self.iface_type == InterfaceType.empty
+
+  def is_default(self):
+    """
+    Returns true if this is a default interface and calling the method on this interface will
+    result in a method actually being called.
+    """
+    return self.iface_type == InterfaceType.default
+
+  def get_expected(self):
+    response = self.get_response()
+    if response == InterfaceCallResponse.NoError:
+      for iface in self.ifaces:
+        if self.is_default():
+          yield self.OUTPUT_PREFIX.format(iface_name = iface.get_name(), tree = self.get_tree())
+        yield from iface.get_expected()
+        if self.is_default():
+          yield iface.get_response().get_output_format().format(iface_name = iface.get_name(),
+                                                                tree = self.get_tree())
+
+  def get_response(self):
+    if self.is_default():
+      return InterfaceCallResponse.NoError
+    elif self.is_abstract():
+      return InterfaceCallResponse.AbstractMethodError
+    elif len(self.ifaces) == 0:
+      return InterfaceCallResponse.NoSuchMethodError
+    else:
+      return self.get_called().get_response()
+
+  def get_called(self):
+    """
+    Returns the interface that will be called when the method on this class is invoked or
+    CONFLICT_TYPE if there is no interface that will be called.
+    """
+    if not self.is_empty() or len(self.ifaces) == 0:
+      return self
+    else:
+      best = self
+      for super_iface in self.ifaces:
+        super_best = super_iface.get_called()
+        if super_best.is_conflict():
+          return CONFLICT_TYPE
+        elif best.is_default():
+          if super_best.is_default():
+            return CONFLICT_TYPE
+        elif best.is_abstract():
+          if super_best.is_default():
+            best = super_best
+        else:
+          assert best.is_empty()
+          best = super_best
+      return best
+
+  def get_name(self):
+    """
+    Get the name of this class
+    """
+    return self.class_name
+
+  def get_tree(self):
+    """
+    Print out a representation of the type tree of this class
+    """
+    return "[{class_name} {iftree}]".format(class_name = self.get_name(),
+                                            iftree = print_tree(self.ifaces))
+
+  def __iter__(self):
+    """
+    Performs depth-first traversal of the interface tree this interface is the
+    root of. Does not filter out repeats.
+    """
+    for i in self.ifaces:
+      yield i
+      yield from i
+
+  def __str__(self):
+    """
+    Print the smali code of this interface.
+    """
+    s_ifaces = '\n'.join(map(lambda a: self.IMPLEMENTS_TEMPLATE.format(iface_name = a.get_name()),
+                             self.ifaces))
+    j_ifaces = ', '.join(map(lambda a: a.get_name(), self.ifaces))
+    if self.is_default():
+      super_template = self.SUPER_CALL_TEMPLATE
+      super_calls ="\n".join(super_template.format(iface_name = iface.get_name(),
+                                                   class_name = self.get_name(),
+                                                   tree = self.get_tree()) for iface in self.ifaces)
+      funcs = self.DEFAULT_FUNC_TEMPLATE.format(super_calls = super_calls)
+    elif self.is_abstract():
+      funcs = self.ABSTRACT_FUNC_TEMPLATE.format()
+    else:
+      funcs = ""
+    return self.TEST_INTERFACE_TEMPLATE.format(copyright = get_copyright('smali'),
+                                               implements_spec = s_ifaces,
+                                               extends = "extends" if len(self.ifaces) else "",
+                                               ifaces = j_ifaces,
+                                               func = funcs,
+                                               tree = self.get_tree(),
+                                               class_name = self.class_name)
+
+def print_tree(ifaces):
+  """
+  Prints a list of iface trees
+  """
+  return " ".join(i.get_tree() for i in ifaces)
+
+# The deduplicated output of subtree_sizes for each size up to
+# MAX_LEAF_IFACE_PER_OBJECT.
+SUBTREES = [set(tuple(sorted(l)) for l in subtree_sizes(i))
+            for i in range(MAX_IFACE_DEPTH + 1)]
+
+def create_test_classes():
+  """
+  Yield all the test classes with the different interface trees
+  """
+  for num in range(1, MAX_IFACE_DEPTH + 1):
+    for split in SUBTREES[num]:
+      ifaces = []
+      for sub in split:
+        ifaces.append(list(create_interface_trees(sub)))
+      for supers in itertools.product(*ifaces):
+        yield TestClass(supers)
+
+def create_interface_trees(num):
+  """
+  Yield all the interface trees up to 'num' depth.
+  """
+  if num == 0:
+    for iftype in InterfaceType:
+      yield TestInterface(tuple(), iftype)
+    return
+  for split in SUBTREES[num]:
+    ifaces = []
+    for sub in split:
+      ifaces.append(list(create_interface_trees(sub)))
+    for supers in itertools.product(*ifaces):
+      for iftype in InterfaceType:
+        yield TestInterface(supers, iftype)
+
+def create_all_test_files():
+  """
+  Creates all the objects representing the files in this test. They just need to
+  be dumped.
+  """
+  mc = MainClass()
+  classes = {mc}
+  for clazz in create_test_classes():
+    classes.add(clazz)
+    for i in clazz:
+      classes.add(i)
+    mc.add_test(clazz)
+  return mc, classes
+
+def main(argv):
+  smali_dir = Path(argv[1])
+  if not smali_dir.exists() or not smali_dir.is_dir():
+    print("{} is not a valid smali dir".format(smali_dir), file=sys.stderr)
+    sys.exit(1)
+  expected_txt = Path(argv[2])
+  mainclass, all_files = create_all_test_files()
+  with expected_txt.open('w') as out:
+    print(mainclass.get_expected(), file=out)
+  for f in all_files:
+    f.dump(smali_dir)
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index f74a516..b922b45 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -30,6 +30,7 @@
   051-thread/thread_test.cc \
   117-nopatchoat/nopatchoat.cc \
   1337-gc-coverage/gc_coverage.cc \
+  136-daemon-jni-shutdown/daemon_jni_shutdown.cc \
   137-cfi/cfi.cc \
   139-register-natives/regnative.cc \
   141-class-unload/jni_unload.cc \
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 31ab2f2..586c805 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -239,7 +239,10 @@
   960-default-smali \
   961-default-iface-resolution-generated \
   964-default-iface-init-generated \
-  968-default-partial-compile-generated
+  968-default-partial-compile-generated \
+  969-iface-super \
+  970-iface-super-resolution-generated \
+  971-iface-super
 
 # Check if we have python3 to run our tests.
 ifeq ($(wildcard /usr/bin/python3),)
@@ -453,7 +456,8 @@
 
 # Known broken tests for the default compiler (Quick).
 TEST_ART_BROKEN_DEFAULT_RUN_TESTS := \
-  457-regs
+  457-regs \
+  563-checker-fakestring
 
 ifneq (,$(filter default,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -532,12 +536,20 @@
 # Tests that should fail in the read barrier configuration with the Optimizing compiler.
 # 484: Baker's fast path based read barrier compiler instrumentation generates code containing
 #      more parallel moves on x86, thus some Checker assertions may fail.
+# 527: On ARM64, the read barrier instrumentation does not support the HArm64IntermediateAddress
+#      instruction yet (b/26601270).
 # 537: Expects an array copy to be intrinsified on x86-64, but calling-on-slowpath intrinsics are
 #      not yet handled in the read barrier configuration.
 TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS := \
   484-checker-register-hints \
+  527-checker-array-access-split \
   537-checker-arraycopy
 
+# Tests that should fail in the read barrier configuration with JIT.
+# 141: Disabled because of intermittent failures on the ART Builtbot (b/25866001).
+TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS := \
+  141-class-unload
+
 ifeq ($(ART_USE_READ_BARRIER),true)
   ifneq (,$(filter default,$(COMPILER_TYPES)))
     ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
@@ -552,10 +564,18 @@
         $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
         $(TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
   endif
+
+  ifneq (,$(filter jit,$(COMPILER_TYPES)))
+    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
+        $(PREBUILD_TYPES),jit,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
+        $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+        $(TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+  endif
 endif
 
 TEST_ART_BROKEN_DEFAULT_READ_BARRIER_RUN_TESTS :=
 TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS :=
+TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS :=
 
 # Tests that should fail in the heap poisoning configuration with the default (Quick) compiler.
 # 137: Quick has no support for read barriers and punts to the
@@ -662,7 +682,8 @@
   $(ART_HOST_OUT_SHARED_LIBRARIES)/libarttestd$(ART_HOST_SHLIB_EXTENSION) \
   $(ART_HOST_OUT_SHARED_LIBRARIES)/libnativebridgetest$(ART_HOST_SHLIB_EXTENSION) \
   $(ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
-  $(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdk$(ART_HOST_SHLIB_EXTENSION)
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdk$(ART_HOST_SHLIB_EXTENSION) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkd$(ART_HOST_SHLIB_EXTENSION)
 
 ifneq ($(HOST_PREFER_32_BIT),true)
 ART_TEST_HOST_RUN_TEST_DEPENDENCIES += \
@@ -670,7 +691,8 @@
   $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libarttestd$(ART_HOST_SHLIB_EXTENSION) \
   $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libnativebridgetest$(ART_HOST_SHLIB_EXTENSION) \
   $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
-  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdk$(ART_HOST_SHLIB_EXTENSION)
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdk$(ART_HOST_SHLIB_EXTENSION) \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkd$(ART_HOST_SHLIB_EXTENSION)
 endif
 
 # Create a rule to build and run a tests following the form:
@@ -861,20 +883,20 @@
         ifeq ($(9),multiimage)
           test_groups += ART_RUN_TEST_$$(uc_host_or_target)_IMAGE_RULES
           run_test_options += --multi-image
-      		ifeq ($(1),host)
-        		prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_no-pic_multi_$(13))
-      		else
-        		prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_no-pic_multi_$(13))
-      		endif
+                ifeq ($(1),host)
+                        prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_no-pic_multi_$(13))
+                else
+                        prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_no-pic_multi_$(13))
+                endif
         else
           ifeq ($(9),multipicimage)
             test_groups += ART_RUN_TEST_$$(uc_host_or_target)_PICIMAGE_RULES
-        		run_test_options += --pic-image --multi-image
-        		ifeq ($(1),host)
-          		prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_pic_multi_$(13))
-        		else
-          		prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_pic_multi_$(13))
-        		endif
+                        run_test_options += --pic-image --multi-image
+                        ifeq ($(1),host)
+                        prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_pic_multi_$(13))
+                        else
+                        prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_pic_multi_$(13))
+                        endif
           else
             $$(error found $(9) expected $(IMAGE_TYPES))
           endif
diff --git a/test/run-test b/test/run-test
index ec34e09..033e2c6 100755
--- a/test/run-test
+++ b/test/run-test
@@ -683,11 +683,6 @@
 # Tests named '<number>-checker-*' will also have their CFGs verified with
 # Checker when compiled with Optimizing on host.
 if [[ "$TEST_NAME" =~ ^[0-9]+-checker- ]]; then
-  # Build Checker DEX files without dx's optimizations so the input to dex2oat
-  # better resembles the Java source. We always build the DEX the same way, even
-  # if Checker is not invoked and the test only runs the program.
-  build_args="${build_args} --dx-option --no-optimize"
-
   # Jack does not necessarily generate the same DEX output than dx. Because these tests depend
   # on a particular DEX output, keep building them with dx for now (b/19467889).
   USE_JACK="false"
@@ -738,7 +733,7 @@
 # To cause tests to fail fast, limit the file sizes created by dx, dex2oat and ART output to 2MB.
 build_file_size_limit=2048
 run_file_size_limit=2048
-if echo "$test_dir" | grep -Eq "(083|089|964)" > /dev/null; then
+if echo "$test_dir" | grep -Eq "(083|089|964|971)" > /dev/null; then
   build_file_size_limit=5120
   run_file_size_limit=5120
 fi
diff --git a/tools/art b/tools/art
index 304a9d0..d91b451 100644
--- a/tools/art
+++ b/tools/art
@@ -75,6 +75,7 @@
 ANDROID_ROOT=$PROG_DIR/..
 LIBDIR=$(find_libdir)
 LD_LIBRARY_PATH=$ANDROID_ROOT/$LIBDIR
+DEBUG_OPTION=""
 
 DELETE_ANDROID_DATA=false
 # If ANDROID_DATA is the system ANDROID_DATA or is not set, use our own,
@@ -87,6 +88,7 @@
 
 if [ z"$PERF" != z ]; then
   invoke_with="perf record -o $ANDROID_DATA/perf.data -e cycles:u $invoke_with"
+  DEBUG_OPTION="-Xcompiler-option --generate-debug-info"
 fi
 
 # We use the PIC core image to work with perf.
@@ -99,7 +101,7 @@
     -XXlib:$LIBART \
     -Xnorelocate \
     -Ximage:$ANDROID_ROOT/framework/core-optimizing-pic.art \
-    -Xcompiler-option --generate-debug-info \
+    $DEBUG_OPTION \
     "$@"
 
 EXIT_STATUS=$?
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 4699907..351e99e 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -171,12 +171,6 @@
   bug: 25437292
 },
 {
-  description: "Assertion failing on the concurrent collector configuration.",
-  result: EXEC_FAILED,
-  names: ["jsr166.LinkedTransferQueueTest#testTransfer2"],
-  bug: 25883050
-},
-{
   description: "Failing tests after enso move.",
   result: EXEC_FAILED,
   bug: 26326992,
@@ -250,8 +244,26 @@
           "org.apache.harmony.tests.java.util.prefs.FilePreferencesImplTest#testPutGet"]
 },
 {
-  description: "Missing resource",
+  description: "libnativehelper_compat_libc++ loading issue",
   result: EXEC_FAILED,
-  names: ["libcore.java.util.LocaleTest#test_SerializationBug_26387905"]
+  modes: [device],
+  names: ["dalvik.system.JniTest#testGetSuperclass",
+          "dalvik.system.JniTest#testPassingBooleans",
+          "dalvik.system.JniTest#testPassingBytes",
+          "dalvik.system.JniTest#testPassingChars",
+          "dalvik.system.JniTest#testPassingClass",
+          "dalvik.system.JniTest#testPassingDoubles",
+          "dalvik.system.JniTest#testPassingFloats",
+          "dalvik.system.JniTest#testPassingInts",
+          "dalvik.system.JniTest#testPassingLongs",
+          "dalvik.system.JniTest#testPassingObjectReferences",
+          "dalvik.system.JniTest#testPassingShorts",
+          "dalvik.system.JniTest#testPassingThis",
+          "libcore.util.NativeAllocationRegistryTest#testBadSize",
+          "libcore.util.NativeAllocationRegistryTest#testNativeAllocationAllocatorAndNoSharedRegistry",
+          "libcore.util.NativeAllocationRegistryTest#testNativeAllocationAllocatorAndSharedRegistry",
+          "libcore.util.NativeAllocationRegistryTest#testNativeAllocationNoAllocatorAndNoSharedRegistry",
+          "libcore.util.NativeAllocationRegistryTest#testNativeAllocationNoAllocatorAndSharedRegistry",
+          "libcore.util.NativeAllocationRegistryTest#testNullArguments"]
 }
 ]
diff --git a/tools/libcore_failures_concurrent_collector.txt b/tools/libcore_failures_concurrent_collector.txt
new file mode 100644
index 0000000..6ea83d2
--- /dev/null
+++ b/tools/libcore_failures_concurrent_collector.txt
@@ -0,0 +1,38 @@
+/*
+ * This file contains expectations for ART's buildbot's concurrent collector
+ * configurations. The purpose of this file is to temporary and quickly list
+ * failing tests and not break the bots on the CC configurations, until they
+ * are fixed or until the libcore expectation files get properly updated. The
+ * script that uses this file is art/tools/run-libcore-tests.sh.
+ *
+ * It is also used to enable AOSP experiments, and not mess up with CTS's
+ * expectations.
+ */
+
+[
+{
+  description: "Assertion failing on the concurrent collector configuration.",
+  result: EXEC_FAILED,
+  names: ["jsr166.LinkedTransferQueueTest#testTransfer2",
+          "jsr166.LinkedTransferQueueTest#testWaitingConsumer"],
+  bug: 25883050
+},
+{
+  description: "libcore.java.lang.OldSystemTest#test_gc failure on armv8-concurrent-collector.",
+  result: EXEC_FAILED,
+  names: ["libcore.java.lang.OldSystemTest#test_gc"],
+  bug: 26155567
+},
+{
+  description: "TimeoutException on host-{x86,x86-64}-concurrent-collector",
+  result: EXEC_FAILED,
+  modes: [host],
+  names: ["libcore.java.util.zip.DeflaterOutputStreamTest#testSyncFlushDisabled",
+          "libcore.java.util.zip.GZIPOutputStreamTest#testSyncFlushEnabled",
+          "libcore.java.util.zip.OldAndroidGZIPStreamTest#testGZIPStream",
+          "libcore.java.util.zip.OldAndroidZipStreamTest#testZipStream",
+          "libcore.java.util.zip.ZipFileTest#testZipFileWithLotsOfEntries",
+          "libcore.java.util.zip.ZipInputStreamTest#testLongMessage"],
+  bug: 26507762
+}
+]
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
index 11ed8b9..f346239 100755
--- a/tools/run-libcore-tests.sh
+++ b/tools/run-libcore-tests.sh
@@ -32,6 +32,12 @@
   exit 1
 fi
 
+expectations="--expectations art/tools/libcore_failures.txt"
+if [ "x$ART_USE_READ_BARRIER" = xtrue ]; then
+  # Tolerate some more failures on the concurrent collector configurations.
+  expectations="$expectations --expectations art/tools/libcore_failures_concurrent_collector.txt"
+fi
+
 emulator="no"
 if [ "$ANDROID_SERIAL" = "emulator-5554" ]; then
   emulator="yes"
@@ -105,4 +111,4 @@
 # Run the tests using vogar.
 echo "Running tests for the following test packages:"
 echo ${working_packages[@]} | tr " " "\n"
-vogar $vogar_args --vm-arg -Xusejit:true --expectations art/tools/libcore_failures.txt --classpath $jsr166_test_jar --classpath $test_jar ${working_packages[@]}
+vogar $vogar_args --vm-arg -Xusejit:true $expectations --classpath $jsr166_test_jar --classpath $test_jar ${working_packages[@]}