Merge "Create helper class for DWARF expressions."
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index af64470..e3f0c24 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -39,6 +39,7 @@
   NonStaticLeafMethods \
   ProtoCompare \
   ProtoCompare2 \
+  ProfileTestMultiDex \
   StaticLeafMethods \
   Statics \
   StaticsFromCode \
@@ -65,7 +66,7 @@
 
 # Dex file dependencies for each gtest.
 ART_GTEST_class_linker_test_DEX_DEPS := Interfaces MultiDex MyClass Nested Statics StaticsFromCode
-ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods
+ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods ProfileTestMultiDex
 ART_GTEST_dex_cache_test_DEX_DEPS := Main
 ART_GTEST_dex_file_test_DEX_DEPS := GetMethodSignature Main Nested
 ART_GTEST_exception_test_DEX_DEPS := ExceptionHandle
@@ -78,6 +79,8 @@
 ART_GTEST_object_test_DEX_DEPS := ProtoCompare ProtoCompare2 StaticsFromCode XandY
 ART_GTEST_proxy_test_DEX_DEPS := Interfaces
 ART_GTEST_reflection_test_DEX_DEPS := Main NonStaticLeafMethods StaticLeafMethods
+ART_GTEST_profile_assistant_test_DEX_DEPS := ProfileTestMultiDex
+ART_GTEST_profile_compilation_info_test_DEX_DEPS := ProfileTestMultiDex
 ART_GTEST_stub_test_DEX_DEPS := AllFields
 ART_GTEST_transaction_test_DEX_DEPS := Transaction
 ART_GTEST_type_lookup_table_test_DEX_DEPS := Lookup
@@ -191,13 +194,12 @@
   runtime/gc/collector/immune_spaces_test.cc \
   runtime/gc/heap_test.cc \
   runtime/gc/reference_queue_test.cc \
-  runtime/gc/space/dlmalloc_space_base_test.cc \
   runtime/gc/space/dlmalloc_space_static_test.cc \
   runtime/gc/space/dlmalloc_space_random_test.cc \
-  runtime/gc/space/rosalloc_space_base_test.cc \
+  runtime/gc/space/large_object_space_test.cc \
   runtime/gc/space/rosalloc_space_static_test.cc \
   runtime/gc/space/rosalloc_space_random_test.cc \
-  runtime/gc/space/large_object_space_test.cc \
+  runtime/gc/space/space_create_test.cc \
   runtime/gc/task_processor_test.cc \
   runtime/gtest_test.cc \
   runtime/handle_scope_test.cc \
@@ -208,6 +210,7 @@
   runtime/interpreter/safe_math_test.cc \
   runtime/interpreter/unstarted_runtime_test.cc \
   runtime/java_vm_ext_test.cc \
+  runtime/jit/profile_compilation_info_test.cc \
   runtime/lambda/closure_test.cc \
   runtime/lambda/shorty_field_type_test.cc \
   runtime/leb128_test.cc \
@@ -267,6 +270,7 @@
   compiler/optimizing/ssa_test.cc \
   compiler/optimizing/stack_map_test.cc \
   compiler/optimizing/suspend_check_test.cc \
+  compiler/profile_assistant_test.cc \
   compiler/utils/arena_allocator_test.cc \
   compiler/utils/dedupe_set_test.cc \
   compiler/utils/swap_space_test.cc \
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index b5fd1e0..afc8463 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -168,6 +168,12 @@
   return nullptr;
 }
 
+// Get ProfileCompilationInfo that should be passed to the driver.
+ProfileCompilationInfo* CommonCompilerTest::GetProfileCompilationInfo() {
+  // Null, profile information will not be taken into account.
+  return nullptr;
+}
+
 void CommonCompilerTest::SetUp() {
   CommonRuntimeTest::SetUp();
   {
@@ -204,12 +210,10 @@
                                             2,
                                             true,
                                             true,
-                                            "",
-                                            false,
                                             timer_.get(),
                                             -1,
                                             /* dex_to_oat_map */ nullptr,
-                                            /* profile_compilation_info */ nullptr));
+                                            GetProfileCompilationInfo()));
   // We typically don't generate an image in unit tests, disable this optimization by default.
   compiler_driver_->SetSupportBootImageFixup(false);
 }
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index b491946..7e0fbab 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -23,6 +23,7 @@
 
 #include "common_runtime_test.h"
 #include "compiler.h"
+#include "jit/offline_profiling_info.h"
 #include "oat_file.h"
 
 namespace art {
@@ -75,6 +76,8 @@
   // driver assumes ownership of the set, so the test should properly release the set.
   virtual std::unordered_set<std::string>* GetCompiledMethods();
 
+  virtual ProfileCompilationInfo* GetProfileCompilationInfo();
+
   virtual void TearDown();
 
   void CompileClass(mirror::ClassLoader* class_loader, const char* class_name)
diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc
index 658e7d6..c250bd9 100644
--- a/compiler/dex/mir_method_info.cc
+++ b/compiler/dex/mir_method_info.cc
@@ -100,8 +100,12 @@
     } else {
       // The method index is actually the dex PC in this case.
       // Calculate the proper dex file and target method idx.
+
+      // We must be in JIT mode if we get here.
       CHECK(use_jit);
-      CHECK_EQ(invoke_type, kVirtual);
+
+      // The invoke type better be virtual, except for the string init special case above.
+      CHECK_EQ(invoke_type, string_init ? kDirect : kVirtual);
       // Don't devirt if we are in a different dex file since we can't have direct invokes in
       // another dex file unless we always put a direct / patch pointer.
       devirt_target = nullptr;
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 32d7518..3766093 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -39,6 +39,7 @@
     true,   // kIntrinsicFloatCvt
     true,   // kIntrinsicReverseBits
     true,   // kIntrinsicReverseBytes
+    true,   // kIntrinsicBitCount
     true,   // kIntrinsicNumberOfLeadingZeros
     true,   // kIntrinsicNumberOfTrailingZeros
     true,   // kIntrinsicRotateRight
@@ -99,6 +100,7 @@
 static_assert(kIntrinsicIsStatic[kIntrinsicFloatCvt], "FloatCvt must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicReverseBits], "ReverseBits must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicReverseBytes], "ReverseBytes must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicBitCount], "BitCount must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfLeadingZeros],
               "NumberOfLeadingZeros must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfTrailingZeros],
@@ -110,9 +112,9 @@
 static_assert(kIntrinsicIsStatic[kIntrinsicAbsFloat], "AbsFloat must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicAbsDouble], "AbsDouble must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxInt], "MinMaxInt must be static");
-static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxLong], "MinMaxLong_must_be_static");
-static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxFloat], "MinMaxFloat_must_be_static");
-static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxDouble], "MinMaxDouble_must_be_static");
+static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxLong], "MinMaxLong must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxFloat], "MinMaxFloat must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxDouble], "MinMaxDouble must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicCos], "Cos must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicSin], "Sin must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicAcos], "Acos must be static");
@@ -153,7 +155,7 @@
 static_assert(kIntrinsicIsStatic[kIntrinsicPeek], "Peek must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicPoke], "Poke must be static");
 static_assert(!kIntrinsicIsStatic[kIntrinsicCas], "Cas must not be static");
-static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGet], "UnsafeGet_must_not_be_static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGet], "UnsafeGet must not be static");
 static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafePut], "UnsafePut must not be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicSystemArrayCopyCharArray],
               "SystemArrayCopyCharArray must be static");
@@ -293,6 +295,7 @@
     "putObjectVolatile",     // kNameCachePutObjectVolatile
     "putOrderedObject",      // kNameCachePutOrderedObject
     "arraycopy",             // kNameCacheArrayCopy
+    "bitCount",              // kNameCacheBitCount
     "numberOfLeadingZeros",  // kNameCacheNumberOfLeadingZeros
     "numberOfTrailingZeros",  // kNameCacheNumberOfTrailingZeros
     "rotateRight",           // kNameCacheRotateRight
@@ -447,6 +450,8 @@
     INTRINSIC(JavaLangInteger, Reverse, I_I, kIntrinsicReverseBits, k32),
     INTRINSIC(JavaLangLong, Reverse, J_J, kIntrinsicReverseBits, k64),
 
+    INTRINSIC(JavaLangInteger, BitCount, I_I, kIntrinsicBitCount, k32),
+    INTRINSIC(JavaLangLong, BitCount, J_I, kIntrinsicBitCount, k64),
     INTRINSIC(JavaLangInteger, NumberOfLeadingZeros, I_I, kIntrinsicNumberOfLeadingZeros, k32),
     INTRINSIC(JavaLangLong, NumberOfLeadingZeros, J_I, kIntrinsicNumberOfLeadingZeros, k64),
     INTRINSIC(JavaLangInteger, NumberOfTrailingZeros, I_I, kIntrinsicNumberOfTrailingZeros, k32),
@@ -745,6 +750,7 @@
                                           intrinsic.d.data & kIntrinsicFlagIsOrdered);
     case kIntrinsicSystemArrayCopyCharArray:
       return backend->GenInlinedArrayCopyCharArray(info);
+    case kIntrinsicBitCount:
     case kIntrinsicNumberOfLeadingZeros:
     case kIntrinsicNumberOfTrailingZeros:
     case kIntrinsicRotateRight:
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index ac70577..2803623 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -224,6 +224,7 @@
       kNameCachePutObjectVolatile,
       kNameCachePutOrderedObject,
       kNameCacheArrayCopy,
+      kNameCacheBitCount,
       kNameCacheNumberOfLeadingZeros,
       kNameCacheNumberOfTrailingZeros,
       kNameCacheRotateRight,
diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc
index 12568a4..c5df134 100644
--- a/compiler/dex/quick/quick_cfi_test.cc
+++ b/compiler/dex/quick/quick_cfi_test.cc
@@ -69,6 +69,8 @@
       false,
       nullptr,
       nullptr,
+      false,
+      "",
       false);
     VerificationResults verification_results(&compiler_options);
     DexFileToMethodInlinerMap method_inliner_map;
@@ -88,8 +90,6 @@
                           0,
                           false,
                           false,
-                          "",
-                          false,
                           0,
                           -1,
                           nullptr,
diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
index b39fe4d..d63878d 100644
--- a/compiler/dex/quick/x86/quick_assemble_x86_test.cc
+++ b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
@@ -52,6 +52,8 @@
         false,
         nullptr,
         nullptr,
+        false,
+        "",
         false));
     verification_results_.reset(new VerificationResults(compiler_options_.get()));
     method_inliner_map_.reset(new DexFileToMethodInlinerMap());
@@ -69,8 +71,6 @@
         0,
         false,
         false,
-        "",
-        false,
         0,
         -1,
         nullptr,
diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc
index f18fa67..2e2d1f9 100644
--- a/compiler/driver/compiled_method_storage_test.cc
+++ b/compiler/driver/compiled_method_storage_test.cc
@@ -41,8 +41,6 @@
                         1u,
                         false,
                         false,
-                        "",
-                        false,
                         nullptr,
                         -1,
                         nullptr,
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 043bd93..d021525 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -345,7 +345,6 @@
     std::unordered_set<std::string>* compiled_classes,
     std::unordered_set<std::string>* compiled_methods,
     size_t thread_count, bool dump_stats, bool dump_passes,
-    const std::string& dump_cfg_file_name, bool dump_cfg_append,
     CumulativeLogger* timer, int swap_fd,
     const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map,
     const ProfileCompilationInfo* profile_compilation_info)
@@ -370,8 +369,6 @@
       stats_(new AOTCompilationStats),
       dump_stats_(dump_stats),
       dump_passes_(dump_passes),
-      dump_cfg_file_name_(dump_cfg_file_name),
-      dump_cfg_append_(dump_cfg_append),
       timings_logger_(timer),
       compiler_context_(nullptr),
       support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64),
@@ -1197,15 +1194,18 @@
   if (equals_referrers_class != nullptr) {
     *equals_referrers_class = (method_id.class_idx_ == type_idx);
   }
-  mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
-  if (referrer_class == nullptr) {
-    stats_->TypeNeedsAccessCheck();
-    return false;  // Incomplete referrer knowledge needs access check.
+  bool is_accessible = resolved_class->IsPublic();  // Public classes are always accessible.
+  if (!is_accessible) {
+    mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
+    if (referrer_class == nullptr) {
+      stats_->TypeNeedsAccessCheck();
+      return false;  // Incomplete referrer knowledge needs access check.
+    }
+    // Perform access check, will return true if access is ok or false if we're going to have to
+    // check this at runtime (for example for class loaders).
+    is_accessible = referrer_class->CanAccess(resolved_class);
   }
-  // Perform access check, will return true if access is ok or false if we're going to have to
-  // check this at runtime (for example for class loaders).
-  bool result = referrer_class->CanAccess(resolved_class);
-  if (result) {
+  if (is_accessible) {
     stats_->TypeDoesntNeedAccessCheck();
     if (type_known_final != nullptr) {
       *type_known_final = resolved_class->IsFinal() && !resolved_class->IsArrayClass();
@@ -1216,7 +1216,7 @@
   } else {
     stats_->TypeNeedsAccessCheck();
   }
-  return result;
+  return is_accessible;
 }
 
 bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx,
@@ -1236,14 +1236,18 @@
   }
   *finalizable = resolved_class->IsFinalizable();
   const DexFile::MethodId& method_id = dex_file.GetMethodId(referrer_idx);
-  mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
-  if (referrer_class == nullptr) {
-    stats_->TypeNeedsAccessCheck();
-    return false;  // Incomplete referrer knowledge needs access check.
+  bool is_accessible = resolved_class->IsPublic();  // Public classes are always accessible.
+  if (!is_accessible) {
+    mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
+    if (referrer_class == nullptr) {
+      stats_->TypeNeedsAccessCheck();
+      return false;  // Incomplete referrer knowledge needs access check.
+    }
+    // Perform access and instantiable checks, will return true if access is ok or false if we're
+    // going to have to check this at runtime (for example for class loaders).
+    is_accessible = referrer_class->CanAccess(resolved_class);
   }
-  // Perform access and instantiable checks, will return true if access is ok or false if we're
-  // going to have to check this at runtime (for example for class loaders).
-  bool result = referrer_class->CanAccess(resolved_class) && resolved_class->IsInstantiable();
+  bool result = is_accessible && resolved_class->IsInstantiable();
   if (result) {
     stats_->TypeDoesntNeedAccessCheck();
   } else {
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 17b2f5e..6a2f7bf 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -95,7 +95,6 @@
                  std::unordered_set<std::string>* compiled_classes,
                  std::unordered_set<std::string>* compiled_methods,
                  size_t thread_count, bool dump_stats, bool dump_passes,
-                 const std::string& dump_cfg_file_name, bool dump_cfg_append,
                  CumulativeLogger* timer, int swap_fd,
                  const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map,
                  const ProfileCompilationInfo* profile_compilation_info);
@@ -423,14 +422,6 @@
     return dump_passes_;
   }
 
-  const std::string& GetDumpCfgFileName() const {
-    return dump_cfg_file_name_;
-  }
-
-  bool GetDumpCfgAppend() const {
-    return dump_cfg_append_;
-  }
-
   CumulativeLogger* GetTimingsLogger() const {
     return timings_logger_;
   }
@@ -668,8 +659,6 @@
 
   bool dump_stats_;
   const bool dump_passes_;
-  const std::string dump_cfg_file_name_;
-  const bool dump_cfg_append_;
 
   CumulativeLogger* const timings_logger_;
 
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 82c0e86..4c03e5d 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -31,6 +31,7 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "handle_scope-inl.h"
+#include "jit/offline_profiling_info.h"
 #include "scoped_thread_state_change.h"
 
 namespace art {
@@ -240,6 +241,94 @@
   EXPECT_TRUE(expected->empty());
 }
 
+class CompilerDriverProfileTest : public CompilerDriverTest {
+ protected:
+  ProfileCompilationInfo* GetProfileCompilationInfo() OVERRIDE {
+    ScopedObjectAccess soa(Thread::Current());
+    std::vector<std::unique_ptr<const DexFile>> dex_files = OpenTestDexFiles("ProfileTestMultiDex");
+
+    ProfileCompilationInfo info;
+    for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
+      std::cout << std::string(dex_file->GetLocation());
+      profile_info_.AddData(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 1);
+      profile_info_.AddData(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 2);
+    }
+    return &profile_info_;
+  }
+
+  std::unordered_set<std::string> GetExpectedMethodsForClass(const std::string& clazz) {
+    if (clazz == "Main") {
+      return std::unordered_set<std::string>({
+          "java.lang.String Main.getA()",
+          "java.lang.String Main.getB()"});
+    } else if (clazz == "Second") {
+      return std::unordered_set<std::string>({
+          "java.lang.String Second.getX()",
+          "java.lang.String Second.getY()"});
+    } else {
+      return std::unordered_set<std::string>();
+    }
+  }
+
+  void CheckCompiledMethods(jobject class_loader,
+                            const std::string& clazz,
+                            const std::unordered_set<std::string>& expected_methods) {
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    Thread* self = Thread::Current();
+    ScopedObjectAccess soa(self);
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ClassLoader> h_loader(hs.NewHandle(
+        reinterpret_cast<mirror::ClassLoader*>(self->DecodeJObject(class_loader))));
+    mirror::Class* klass = class_linker->FindClass(self, clazz.c_str(), h_loader);
+    ASSERT_NE(klass, nullptr);
+
+    const auto pointer_size = class_linker->GetImagePointerSize();
+    size_t number_of_compiled_methods = 0;
+    for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+      std::string name = PrettyMethod(&m, true);
+      const void* code = m.GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
+      ASSERT_NE(code, nullptr);
+      if (expected_methods.find(name) != expected_methods.end()) {
+        number_of_compiled_methods++;
+        EXPECT_FALSE(class_linker->IsQuickToInterpreterBridge(code));
+      } else {
+        EXPECT_TRUE(class_linker->IsQuickToInterpreterBridge(code));
+      }
+    }
+    EXPECT_EQ(expected_methods.size(), number_of_compiled_methods);
+  }
+
+ private:
+  ProfileCompilationInfo profile_info_;
+};
+
+TEST_F(CompilerDriverProfileTest, ProfileGuidedCompilation) {
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
+  TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK();
+  TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
+  Thread* self = Thread::Current();
+  jobject class_loader;
+  {
+    ScopedObjectAccess soa(self);
+    class_loader = LoadDex("ProfileTestMultiDex");
+  }
+  ASSERT_NE(class_loader, nullptr);
+
+  // Need to enable dex-file writability. Methods rejected to be compiled will run through the
+  // dex-to-dex compiler.
+  ProfileCompilationInfo info;
+  for (const DexFile* dex_file : GetDexFiles(class_loader)) {
+    ASSERT_TRUE(dex_file->EnableWrite());
+  }
+
+  CompileAll(class_loader);
+
+  std::unordered_set<std::string> m = GetExpectedMethodsForClass("Main");
+  std::unordered_set<std::string> s = GetExpectedMethodsForClass("Second");
+  CheckCompiledMethods(class_loader, "LMain;", m);
+  CheckCompiledMethods(class_loader, "LSecond;", s);
+}
+
 // TODO: need check-cast test (when stub complete & we can throw/catch
 
 }  // namespace art
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 385f34a..2644528 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -44,7 +44,9 @@
       verbose_methods_(nullptr),
       pass_manager_options_(),
       abort_on_hard_verifier_failure_(false),
-      init_failure_output_(nullptr) {
+      init_failure_output_(nullptr),
+      dump_cfg_file_name_(""),
+      dump_cfg_append_(false) {
 }
 
 CompilerOptions::~CompilerOptions() {
@@ -71,7 +73,9 @@
                                  bool compile_pic,
                                  const std::vector<std::string>* verbose_methods,
                                  std::ostream* init_failure_output,
-                                 bool abort_on_hard_verifier_failure
+                                 bool abort_on_hard_verifier_failure,
+                                 const std::string& dump_cfg_file_name,
+                                 bool dump_cfg_append
                                  ) :  // NOLINT(whitespace/parens)
     compiler_filter_(compiler_filter),
     huge_method_threshold_(huge_method_threshold),
@@ -94,7 +98,9 @@
     verbose_methods_(verbose_methods),
     pass_manager_options_(),
     abort_on_hard_verifier_failure_(abort_on_hard_verifier_failure),
-    init_failure_output_(init_failure_output) {
+    init_failure_output_(init_failure_output),
+    dump_cfg_file_name_(dump_cfg_file_name),
+    dump_cfg_append_(dump_cfg_append) {
 }
 
 void CompilerOptions::ParseHugeMethodMax(const StringPiece& option, UsageFn Usage) {
@@ -238,6 +244,10 @@
     ParsePassOptions(option, Usage);
   } else if (option.starts_with("--dump-init-failures=")) {
     ParseDumpInitFailures(option, Usage);
+  } else if (option.starts_with("--dump-cfg=")) {
+    dump_cfg_file_name_ = option.substr(strlen("--dump-cfg=")).data();
+  } else if (option.starts_with("--dump-cfg-append")) {
+    dump_cfg_append_ = true;
   } else {
     // Option not recognized.
     return false;
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index f14bdc4..d47fc2a 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -83,7 +83,9 @@
                   bool compile_pic,
                   const std::vector<std::string>* verbose_methods,
                   std::ostream* init_failure_output,
-                  bool abort_on_hard_verifier_failure);
+                  bool abort_on_hard_verifier_failure,
+                  const std::string& dump_cfg_file_name,
+                  bool dump_cfg_append);
 
   CompilerFilter GetCompilerFilter() const {
     return compiler_filter_;
@@ -224,6 +226,14 @@
 
   bool ParseCompilerOption(const StringPiece& option, UsageFn Usage);
 
+  const std::string& GetDumpCfgFileName() const {
+    return dump_cfg_file_name_;
+  }
+
+  bool GetDumpCfgAppend() const {
+    return dump_cfg_append_;
+  }
+
  private:
   void ParseDumpInitFailures(const StringPiece& option, UsageFn Usage);
   void ParsePassOptions(const StringPiece& option, UsageFn Usage);
@@ -273,6 +283,9 @@
   // Log initialization of initialization failures to this stream if not null.
   std::unique_ptr<std::ostream> init_failure_output_;
 
+  std::string dump_cfg_file_name_;
+  bool dump_cfg_append_;
+
   friend class Dex2Oat;
 
   DISALLOW_COPY_AND_ASSIGN(CompilerOptions);
diff --git a/compiler/dwarf/register.h b/compiler/dwarf/register.h
index b67e8dd..35b3e15 100644
--- a/compiler/dwarf/register.h
+++ b/compiler/dwarf/register.h
@@ -29,7 +29,7 @@
   // TODO: Arm S0–S31 register mapping is obsolescent.
   //   We should use VFP-v3/Neon D0-D31 mapping instead.
   //   However, D0 is aliased to pair of S0 and S1, so using that
-  //   mapping we can not easily say S0 is spilled and S1 is not.
+  //   mapping we cannot easily say S0 is spilled and S1 is not.
   //   There are ways around this in DWARF but they are complex.
   //   It would be much simpler to always spill whole D registers.
   //   Arm64 mapping is correct since we already do this there.
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index a7461a5..46484b1 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -100,12 +100,6 @@
       header_.sh_entsize = entsize;
     }
 
-    ~Section() OVERRIDE {
-      if (started_) {
-        CHECK(finished_);
-      }
-    }
-
     // Start writing of this section.
     void Start() {
       CHECK(!started_);
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index 99d2b84..f3baf67 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -213,7 +213,7 @@
     case kNone:
       break;
   }
-  LOG(FATAL) << "Can not write CIE frame for ISA " << isa;
+  LOG(FATAL) << "Cannot write CIE frame for ISA " << isa;
   UNREACHABLE();
 }
 
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 6859605..12132c0 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -95,25 +95,37 @@
 
       t.NewTiming("WriteElf");
       SafeMap<std::string, std::string> key_value_store;
-      OatWriter oat_writer(class_linker->GetBootClassPath(),
-                           0,
-                           0,
-                           0,
-                           compiler_driver_.get(),
-                           writer.get(),
-                           /*compiling_boot_image*/true,
-                           &timings,
-                           &key_value_store);
+      const std::vector<const DexFile*>& dex_files = class_linker->GetBootClassPath();
       std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
           compiler_driver_->GetInstructionSet(),
           &compiler_driver_->GetCompilerOptions(),
           oat_file.GetFile());
-      bool success = writer->PrepareImageAddressSpace();
-      ASSERT_TRUE(success);
-
       elf_writer->Start();
-
+      OatWriter oat_writer(/*compiling_boot_image*/true, &timings);
       OutputStream* rodata = elf_writer->StartRoData();
+      for (const DexFile* dex_file : dex_files) {
+        ArrayRef<const uint8_t> raw_dex_file(
+            reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()),
+            dex_file->GetHeader().file_size_);
+        oat_writer.AddRawDexFileSource(raw_dex_file,
+                                       dex_file->GetLocation().c_str(),
+                                       dex_file->GetLocationChecksum());
+      }
+      std::unique_ptr<MemMap> opened_dex_files_map;
+      std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+      bool dex_files_ok = oat_writer.WriteAndOpenDexFiles(
+          rodata,
+          oat_file.GetFile(),
+          compiler_driver_->GetInstructionSet(),
+          compiler_driver_->GetInstructionSetFeatures(),
+          &key_value_store,
+          &opened_dex_files_map,
+          &opened_dex_files);
+      ASSERT_TRUE(dex_files_ok);
+      oat_writer.PrepareLayout(compiler_driver_.get(), writer.get(), dex_files);
+      bool image_space_ok = writer->PrepareImageAddressSpace();
+      ASSERT_TRUE(image_space_ok);
+
       bool rodata_ok = oat_writer.WriteRodata(rodata);
       ASSERT_TRUE(rodata_ok);
       elf_writer->EndRoData(rodata);
@@ -123,12 +135,15 @@
       ASSERT_TRUE(text_ok);
       elf_writer->EndText(text);
 
+      bool header_ok = oat_writer.WriteHeader(elf_writer->GetStream(), 0u, 0u, 0u);
+      ASSERT_TRUE(header_ok);
+
       elf_writer->SetBssSize(oat_writer.GetBssSize());
       elf_writer->WriteDynamicSection();
       elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo());
       elf_writer->WritePatchLocations(oat_writer.GetAbsolutePatchLocations());
 
-      success = elf_writer->End();
+      bool success = elf_writer->End();
 
       ASSERT_TRUE(success);
     }
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 8fdbf4a..3a3275a 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -110,7 +110,9 @@
       /* pic */ true,  // TODO: Support non-PIC in optimizing.
       /* verbose_methods */ nullptr,
       /* init_failure_output */ nullptr,
-      /* abort_on_hard_verifier_failure */ false));
+      /* abort_on_hard_verifier_failure */ false,
+      /* dump_cfg_file_name */ "",
+      /* dump_cfg_append */ false));
   for (const std::string& argument : Runtime::Current()->GetCompilerOptions()) {
     compiler_options_->ParseCompilerOption(argument, Usage);
   }
@@ -166,8 +168,6 @@
       /* thread_count */ 1,
       /* dump_stats */ false,
       /* dump_passes */ false,
-      /* dump_cfg_file_name */ "",
-      /* dump_cfg_append */ false,
       cumulative_logger_.get(),
       /* swap_fd */ -1,
       /* dex to oat map */ nullptr,
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index b10cc35..bf8e786 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -47,7 +47,7 @@
         driver_(&compiler_options_, &verification_results_, &inliner_map_,
                 Compiler::kQuick, instruction_set, nullptr,
                 false, nullptr, nullptr, nullptr, 1u,
-                false, false, "", false, nullptr, -1, nullptr, nullptr),
+                false, false, nullptr, -1, nullptr, nullptr),
         error_msg_(),
         instruction_set_(instruction_set),
         features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)),
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 9f7ffa5..c0d15f3 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -38,6 +38,7 @@
 #include "oat_file-inl.h"
 #include "oat_writer.h"
 #include "scoped_thread_state_change.h"
+#include "utils/test_dex_file_builder.h"
 
 namespace art {
 
@@ -117,8 +118,6 @@
                                               2,
                                               true,
                                               true,
-                                              "",
-                                              false,
                                               timer_.get(),
                                               -1,
                                               nullptr,
@@ -129,23 +128,74 @@
                 const std::vector<const DexFile*>& dex_files,
                 SafeMap<std::string, std::string>& key_value_store) {
     TimingLogger timings("WriteElf", false, false);
-    OatWriter oat_writer(dex_files,
-                         42U,
-                         4096U,
-                         0,
-                         compiler_driver_.get(),
-                         nullptr,
-                         /*compiling_boot_image*/false,
-                         &timings,
-                         &key_value_store);
+    OatWriter oat_writer(/*compiling_boot_image*/false, &timings);
+    for (const DexFile* dex_file : dex_files) {
+      ArrayRef<const uint8_t> raw_dex_file(
+          reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()),
+          dex_file->GetHeader().file_size_);
+      if (!oat_writer.AddRawDexFileSource(raw_dex_file,
+                                          dex_file->GetLocation().c_str(),
+                                          dex_file->GetLocationChecksum())) {
+        return false;
+      }
+    }
+    return DoWriteElf(file, oat_writer, key_value_store);
+  }
+
+  bool WriteElf(File* file,
+                const std::vector<const char*>& dex_filenames,
+                SafeMap<std::string, std::string>& key_value_store) {
+    TimingLogger timings("WriteElf", false, false);
+    OatWriter oat_writer(/*compiling_boot_image*/false, &timings);
+    for (const char* dex_filename : dex_filenames) {
+      if (!oat_writer.AddDexFileSource(dex_filename, dex_filename)) {
+        return false;
+      }
+    }
+    return DoWriteElf(file, oat_writer, key_value_store);
+  }
+
+  bool WriteElf(File* file,
+                ScopedFd&& zip_fd,
+                const char* location,
+                SafeMap<std::string, std::string>& key_value_store) {
+    TimingLogger timings("WriteElf", false, false);
+    OatWriter oat_writer(/*compiling_boot_image*/false, &timings);
+    if (!oat_writer.AddZippedDexFilesSource(std::move(zip_fd), location)) {
+      return false;
+    }
+    return DoWriteElf(file, oat_writer, key_value_store);
+  }
+
+  bool DoWriteElf(File* file,
+                  OatWriter& oat_writer,
+                  SafeMap<std::string, std::string>& key_value_store) {
     std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
         compiler_driver_->GetInstructionSet(),
         &compiler_driver_->GetCompilerOptions(),
         file);
-
     elf_writer->Start();
-
     OutputStream* rodata = elf_writer->StartRoData();
+    std::unique_ptr<MemMap> opened_dex_files_map;
+    std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+    if (!oat_writer.WriteAndOpenDexFiles(rodata,
+                                         file,
+                                         compiler_driver_->GetInstructionSet(),
+                                         compiler_driver_->GetInstructionSetFeatures(),
+                                         &key_value_store,
+                                         &opened_dex_files_map,
+                                         &opened_dex_files)) {
+      return false;
+    }
+    Runtime* runtime = Runtime::Current();
+    ClassLinker* const class_linker = runtime->GetClassLinker();
+    std::vector<const DexFile*> dex_files;
+    for (const std::unique_ptr<const DexFile>& dex_file : opened_dex_files) {
+      dex_files.push_back(dex_file.get());
+      ScopedObjectAccess soa(Thread::Current());
+      class_linker->RegisterDexFile(*dex_file, runtime->GetLinearAlloc());
+    }
+    oat_writer.PrepareLayout(compiler_driver_.get(), nullptr, dex_files);
     if (!oat_writer.WriteRodata(rodata)) {
       return false;
     }
@@ -157,6 +207,10 @@
     }
     elf_writer->EndText(text);
 
+    if (!oat_writer.WriteHeader(elf_writer->GetStream(), 42U, 4096U, 0)) {
+      return false;
+    }
+
     elf_writer->SetBssSize(oat_writer.GetBssSize());
     elf_writer->WriteDynamicSection();
     elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo());
@@ -169,6 +223,117 @@
   std::unique_ptr<QuickCompilerCallbacks> callbacks_;
 };
 
+class ZipBuilder {
+ public:
+  explicit ZipBuilder(File* zip_file) : zip_file_(zip_file) { }
+
+  bool AddFile(const char* location, const void* data, size_t size) {
+    off_t offset = lseek(zip_file_->Fd(), 0, SEEK_CUR);
+    if (offset == static_cast<off_t>(-1)) {
+      return false;
+    }
+
+    ZipFileHeader file_header;
+    file_header.crc32 = crc32(0u, reinterpret_cast<const Bytef*>(data), size);
+    file_header.compressed_size = size;
+    file_header.uncompressed_size = size;
+    file_header.filename_length = strlen(location);
+
+    if (!zip_file_->WriteFully(&file_header, sizeof(file_header)) ||
+        !zip_file_->WriteFully(location, file_header.filename_length) ||
+        !zip_file_->WriteFully(data, size)) {
+      return false;
+    }
+
+    CentralDirectoryFileHeader cdfh;
+    cdfh.crc32 = file_header.crc32;
+    cdfh.compressed_size = size;
+    cdfh.uncompressed_size = size;
+    cdfh.filename_length = file_header.filename_length;
+    cdfh.relative_offset_of_local_file_header = offset;
+    file_data_.push_back(FileData { cdfh, location });
+    return true;
+  }
+
+  bool Finish() {
+    off_t offset = lseek(zip_file_->Fd(), 0, SEEK_CUR);
+    if (offset == static_cast<off_t>(-1)) {
+      return false;
+    }
+
+    size_t central_directory_size = 0u;
+    for (const FileData& file_data : file_data_) {
+      if (!zip_file_->WriteFully(&file_data.cdfh, sizeof(file_data.cdfh)) ||
+          !zip_file_->WriteFully(file_data.location, file_data.cdfh.filename_length)) {
+        return false;
+      }
+      central_directory_size += sizeof(file_data.cdfh) + file_data.cdfh.filename_length;
+    }
+    EndOfCentralDirectoryRecord eocd_record;
+    eocd_record.number_of_central_directory_records_on_this_disk = file_data_.size();
+    eocd_record.total_number_of_central_directory_records = file_data_.size();
+    eocd_record.size_of_central_directory = central_directory_size;
+    eocd_record.offset_of_start_of_central_directory = offset;
+    return
+        zip_file_->WriteFully(&eocd_record, sizeof(eocd_record)) &&
+        zip_file_->Flush() == 0;
+  }
+
+ private:
+  struct PACKED(1) ZipFileHeader {
+    uint32_t signature = 0x04034b50;
+    uint16_t version_needed_to_extract = 10;
+    uint16_t general_purpose_bit_flag = 0;
+    uint16_t compression_method = 0;            // 0 = store only.
+    uint16_t file_last_modification_time = 0u;
+    uint16_t file_last_modification_date = 0u;
+    uint32_t crc32;
+    uint32_t compressed_size;
+    uint32_t uncompressed_size;
+    uint16_t filename_length;
+    uint16_t extra_field_length = 0u;           // No extra fields.
+  };
+
+  struct PACKED(1) CentralDirectoryFileHeader {
+    uint32_t signature = 0x02014b50;
+    uint16_t version_made_by = 10;
+    uint16_t version_needed_to_extract = 10;
+    uint16_t general_purpose_bit_flag = 0;
+    uint16_t compression_method = 0;            // 0 = store only.
+    uint16_t file_last_modification_time = 0u;
+    uint16_t file_last_modification_date = 0u;
+    uint32_t crc32;
+    uint32_t compressed_size;
+    uint32_t uncompressed_size;
+    uint16_t filename_length;
+    uint16_t extra_field_length = 0u;           // No extra fields.
+    uint16_t file_comment_length = 0u;          // No file comment.
+    uint16_t disk_number_where_file_starts = 0u;
+    uint16_t internal_file_attributes = 0u;
+    uint32_t external_file_attributes = 0u;
+    uint32_t relative_offset_of_local_file_header;
+  };
+
+  struct PACKED(1) EndOfCentralDirectoryRecord {
+    uint32_t signature = 0x06054b50;
+    uint16_t number_of_this_disk = 0u;
+    uint16_t disk_where_central_directory_starts = 0u;
+    uint16_t number_of_central_directory_records_on_this_disk;
+    uint16_t total_number_of_central_directory_records;
+    uint32_t size_of_central_directory;
+    uint32_t offset_of_start_of_central_directory;
+    uint16_t comment_length = 0u;               // No file comment.
+  };
+
+  struct FileData {
+    CentralDirectoryFileHeader cdfh;
+    const char* location;
+  };
+
+  File* zip_file_;
+  std::vector<FileData> file_data_;
+};
+
 TEST_F(OatTest, WriteRead) {
   TimingLogger timings("OatTest::WriteRead", false, false);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
@@ -329,4 +494,189 @@
   EXPECT_LT(static_cast<size_t>(oat_file->Size()), static_cast<size_t>(tmp.GetFile()->GetLength()));
 }
 
+TEST_F(OatTest, DexFileInput) {
+  TimingLogger timings("OatTest::DexFileInput", false, false);
+
+  std::vector<const char*> input_filenames;
+
+  ScratchFile dex_file1;
+  TestDexFileBuilder builder1;
+  builder1.AddField("Lsome.TestClass;", "int", "someField");
+  builder1.AddMethod("Lsome.TestClass;", "()I", "foo");
+  std::unique_ptr<const DexFile> dex_file1_data = builder1.Build(dex_file1.GetFilename());
+  bool success = dex_file1.GetFile()->WriteFully(&dex_file1_data->GetHeader(),
+                                                 dex_file1_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+  success = dex_file1.GetFile()->Flush() == 0;
+  ASSERT_TRUE(success);
+  input_filenames.push_back(dex_file1.GetFilename().c_str());
+
+  ScratchFile dex_file2;
+  TestDexFileBuilder builder2;
+  builder2.AddField("Land.AnotherTestClass;", "boolean", "someOtherField");
+  builder2.AddMethod("Land.AnotherTestClass;", "()J", "bar");
+  std::unique_ptr<const DexFile> dex_file2_data = builder2.Build(dex_file2.GetFilename());
+  success = dex_file2.GetFile()->WriteFully(&dex_file2_data->GetHeader(),
+                                            dex_file2_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+  success = dex_file2.GetFile()->Flush() == 0;
+  ASSERT_TRUE(success);
+  input_filenames.push_back(dex_file2.GetFilename().c_str());
+
+  ScratchFile oat_file;
+  SafeMap<std::string, std::string> key_value_store;
+  key_value_store.Put(OatHeader::kImageLocationKey, "test.art");
+  success = WriteElf(oat_file.GetFile(), input_filenames, key_value_store);
+  ASSERT_TRUE(success);
+
+  std::string error_msg;
+  std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(),
+                                                         oat_file.GetFilename(),
+                                                         nullptr,
+                                                         nullptr,
+                                                         false,
+                                                         nullptr,
+                                                         &error_msg));
+  ASSERT_TRUE(opened_oat_file != nullptr);
+  ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size());
+  std::unique_ptr<const DexFile> opened_dex_file1 =
+      opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg);
+  std::unique_ptr<const DexFile> opened_dex_file2 =
+      opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg);
+
+  ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_);
+  ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(),
+                      &opened_dex_file1->GetHeader(),
+                      dex_file1_data->GetHeader().file_size_));
+  ASSERT_EQ(dex_file1_data->GetLocation(), opened_dex_file1->GetLocation());
+
+  ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_);
+  ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(),
+                      &opened_dex_file2->GetHeader(),
+                      dex_file2_data->GetHeader().file_size_));
+  ASSERT_EQ(dex_file2_data->GetLocation(), opened_dex_file2->GetLocation());
+}
+
+TEST_F(OatTest, ZipFileInput) {
+  TimingLogger timings("OatTest::DexFileInput", false, false);
+
+  ScratchFile zip_file;
+  ZipBuilder zip_builder(zip_file.GetFile());
+
+  ScratchFile dex_file1;
+  TestDexFileBuilder builder1;
+  builder1.AddField("Lsome.TestClass;", "long", "someField");
+  builder1.AddMethod("Lsome.TestClass;", "()D", "foo");
+  std::unique_ptr<const DexFile> dex_file1_data = builder1.Build(dex_file1.GetFilename());
+  bool success = dex_file1.GetFile()->WriteFully(&dex_file1_data->GetHeader(),
+                                                 dex_file1_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+  success = dex_file1.GetFile()->Flush() == 0;
+  ASSERT_TRUE(success);
+  success = zip_builder.AddFile("classes.dex",
+                                &dex_file1_data->GetHeader(),
+                                dex_file1_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+
+  ScratchFile dex_file2;
+  TestDexFileBuilder builder2;
+  builder2.AddField("Land.AnotherTestClass;", "boolean", "someOtherField");
+  builder2.AddMethod("Land.AnotherTestClass;", "()J", "bar");
+  std::unique_ptr<const DexFile> dex_file2_data = builder2.Build(dex_file2.GetFilename());
+  success = dex_file2.GetFile()->WriteFully(&dex_file2_data->GetHeader(),
+                                            dex_file2_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+  success = dex_file2.GetFile()->Flush() == 0;
+  ASSERT_TRUE(success);
+  success = zip_builder.AddFile("classes2.dex",
+                                &dex_file2_data->GetHeader(),
+                                dex_file2_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+
+  success = zip_builder.Finish();
+  ASSERT_TRUE(success) << strerror(errno);
+
+  SafeMap<std::string, std::string> key_value_store;
+  key_value_store.Put(OatHeader::kImageLocationKey, "test.art");
+  {
+    // Test using the AddDexFileSource() interface with the zip file.
+    std::vector<const char*> input_filenames { zip_file.GetFilename().c_str() };  // NOLINT [readability/braces] [4]
+
+    ScratchFile oat_file;
+    success = WriteElf(oat_file.GetFile(), input_filenames, key_value_store);
+    ASSERT_TRUE(success);
+
+    std::string error_msg;
+    std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(),
+                                                           oat_file.GetFilename(),
+                                                           nullptr,
+                                                           nullptr,
+                                                           false,
+                                                           nullptr,
+                                                           &error_msg));
+    ASSERT_TRUE(opened_oat_file != nullptr);
+    ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size());
+    std::unique_ptr<const DexFile> opened_dex_file1 =
+        opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg);
+    std::unique_ptr<const DexFile> opened_dex_file2 =
+        opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg);
+
+    ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_);
+    ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(),
+                        &opened_dex_file1->GetHeader(),
+                        dex_file1_data->GetHeader().file_size_));
+    ASSERT_EQ(DexFile::GetMultiDexLocation(0, zip_file.GetFilename().c_str()),
+              opened_dex_file1->GetLocation());
+
+    ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_);
+    ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(),
+                        &opened_dex_file2->GetHeader(),
+                        dex_file2_data->GetHeader().file_size_));
+    ASSERT_EQ(DexFile::GetMultiDexLocation(1, zip_file.GetFilename().c_str()),
+              opened_dex_file2->GetLocation());
+  }
+
+  {
+    // Test using the AddZipDexFileSource() interface with the zip file handle.
+    ScopedFd zip_fd(dup(zip_file.GetFd()));
+    ASSERT_NE(-1, zip_fd.get());
+
+    ScratchFile oat_file;
+    success = WriteElf(oat_file.GetFile(),
+                       std::move(zip_fd),
+                       zip_file.GetFilename().c_str(),
+                       key_value_store);
+    ASSERT_TRUE(success);
+
+    std::string error_msg;
+    std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(),
+                                                           oat_file.GetFilename(),
+                                                           nullptr,
+                                                           nullptr,
+                                                           false,
+                                                           nullptr,
+                                                           &error_msg));
+    ASSERT_TRUE(opened_oat_file != nullptr);
+    ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size());
+    std::unique_ptr<const DexFile> opened_dex_file1 =
+        opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg);
+    std::unique_ptr<const DexFile> opened_dex_file2 =
+        opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg);
+
+    ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_);
+    ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(),
+                        &opened_dex_file1->GetHeader(),
+                        dex_file1_data->GetHeader().file_size_));
+    ASSERT_EQ(DexFile::GetMultiDexLocation(0, zip_file.GetFilename().c_str()),
+              opened_dex_file1->GetLocation());
+
+    ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_);
+    ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(),
+                        &opened_dex_file2->GetHeader(),
+                        dex_file2_data->GetHeader().file_size_));
+    ASSERT_EQ(DexFile::GetMultiDexLocation(1, zip_file.GetFilename().c_str()),
+              opened_dex_file2->GetLocation());
+  }
+}
+
 }  // namespace art
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 025e35e..c74c41f 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -16,12 +16,14 @@
 
 #include "oat_writer.h"
 
+#include <unistd.h>
 #include <zlib.h>
 
 #include "arch/arm64/instruction_set_features_arm64.h"
 #include "art_method-inl.h"
 #include "base/allocator.h"
 #include "base/bit_vector.h"
+#include "base/file_magic.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
@@ -49,9 +51,77 @@
 #include "type_lookup_table.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 #include "verifier/method_verifier.h"
+#include "zip_archive.h"
 
 namespace art {
 
+namespace {  // anonymous namespace
+
+typedef DexFile::Header __attribute__((aligned(1))) UnalignedDexFileHeader;
+
+const UnalignedDexFileHeader* AsUnalignedDexFileHeader(const uint8_t* raw_data) {
+    return reinterpret_cast<const UnalignedDexFileHeader*>(raw_data);
+}
+
+}  // anonymous namespace
+
+// Defines the location of the raw dex file to write.
+class OatWriter::DexFileSource {
+ public:
+  explicit DexFileSource(ZipEntry* zip_entry)
+      : type_(kZipEntry), source_(zip_entry) {
+    DCHECK(source_ != nullptr);
+  }
+
+  explicit DexFileSource(File* raw_file)
+      : type_(kRawFile), source_(raw_file) {
+    DCHECK(source_ != nullptr);
+  }
+
+  explicit DexFileSource(const uint8_t* dex_file)
+      : type_(kRawData), source_(dex_file) {
+    DCHECK(source_ != nullptr);
+  }
+
+  bool IsZipEntry() const { return type_ == kZipEntry; }
+  bool IsRawFile() const { return type_ == kRawFile; }
+  bool IsRawData() const { return type_ == kRawData; }
+
+  ZipEntry* GetZipEntry() const {
+    DCHECK(IsZipEntry());
+    DCHECK(source_ != nullptr);
+    return static_cast<ZipEntry*>(const_cast<void*>(source_));
+  }
+
+  File* GetRawFile() const {
+    DCHECK(IsRawFile());
+    DCHECK(source_ != nullptr);
+    return static_cast<File*>(const_cast<void*>(source_));
+  }
+
+  const uint8_t* GetRawData() const {
+    DCHECK(IsRawData());
+    DCHECK(source_ != nullptr);
+    return static_cast<const uint8_t*>(source_);
+  }
+
+  void Clear() {
+    type_ = kNone;
+    source_ = nullptr;
+  }
+
+ private:
+  enum Type {
+    kNone,
+    kZipEntry,
+    kRawFile,
+    kRawData,
+  };
+
+  Type type_;
+  const void* source_;
+};
+
 class OatWriter::OatClass {
  public:
   OatClass(size_t offset,
@@ -116,11 +186,30 @@
 
 class OatWriter::OatDexFile {
  public:
-  OatDexFile(size_t offset, const DexFile& dex_file);
+  OatDexFile(const char* dex_file_location,
+             DexFileSource source,
+             CreateTypeLookupTable create_type_lookup_table);
   OatDexFile(OatDexFile&& src) = default;
 
+  const char* GetLocation() const {
+    return dex_file_location_data_;
+  }
+
+  void ReserveTypeLookupTable(OatWriter* oat_writer);
+  void ReserveClassOffsets(OatWriter* oat_writer);
+
   size_t SizeOf() const;
-  bool Write(OatWriter* oat_writer, OutputStream* out, const size_t file_offset) const;
+  bool Write(OatWriter* oat_writer, OutputStream* out) const;
+  bool WriteClassOffsets(OatWriter* oat_writer, OutputStream* out);
+
+  // The source of the dex file.
+  DexFileSource source_;
+
+  // Whether to create the type lookup table.
+  CreateTypeLookupTable create_type_lookup_table_;
+
+  // Dex file size. Initialized when writing the dex file.
+  size_t dex_file_size_;
 
   // Offset of start of OatDexFile from beginning of OatHeader. It is
   // used to validate file position when writing.
@@ -128,11 +217,13 @@
 
   // Data to write.
   uint32_t dex_file_location_size_;
-  const uint8_t* dex_file_location_data_;
+  const char* dex_file_location_data_;
   uint32_t dex_file_location_checksum_;
   uint32_t dex_file_offset_;
+  uint32_t class_offsets_offset_;
   uint32_t lookup_table_offset_;
-  TypeLookupTable* lookup_table_;  // Owned by the dex file.
+
+  // Data to write to a separate section.
   dchecked_vector<uint32_t> class_offsets_;
 
  private:
@@ -151,26 +242,20 @@
   DCHECK_EQ(static_cast<off_t>(file_offset + offset_), out->Seek(0, kSeekCurrent)) \
     << "file_offset=" << file_offset << " offset_=" << offset_
 
-OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
-                     uint32_t image_file_location_oat_checksum,
-                     uintptr_t image_file_location_oat_begin,
-                     int32_t image_patch_delta,
-                     const CompilerDriver* compiler,
-                     ImageWriter* image_writer,
-                     bool compiling_boot_image,
-                     TimingLogger* timings,
-                     SafeMap<std::string, std::string>* key_value_store)
-  : compiler_driver_(compiler),
-    image_writer_(image_writer),
+OatWriter::OatWriter(bool compiling_boot_image, TimingLogger* timings)
+  : write_state_(WriteState::kAddingDexFileSources),
+    timings_(timings),
+    raw_dex_files_(),
+    zip_archives_(),
+    zipped_dex_files_(),
+    zipped_dex_file_locations_(),
+    compiler_driver_(nullptr),
+    image_writer_(nullptr),
     compiling_boot_image_(compiling_boot_image),
-    dex_files_(&dex_files),
+    dex_files_(nullptr),
     size_(0u),
     bss_size_(0u),
     oat_data_offset_(0u),
-    image_file_location_oat_checksum_(image_file_location_oat_checksum),
-    image_file_location_oat_begin_(image_file_location_oat_begin),
-    image_patch_delta_(image_patch_delta),
-    key_value_store_(key_value_store),
     oat_header_(nullptr),
     size_dex_file_alignment_(0),
     size_executable_offset_alignment_(0),
@@ -197,55 +282,192 @@
     size_oat_dex_file_location_data_(0),
     size_oat_dex_file_location_checksum_(0),
     size_oat_dex_file_offset_(0),
+    size_oat_dex_file_class_offsets_offset_(0),
     size_oat_dex_file_lookup_table_offset_(0),
-    size_oat_dex_file_class_offsets_(0),
     size_oat_lookup_table_alignment_(0),
     size_oat_lookup_table_(0),
+    size_oat_class_offsets_alignment_(0),
+    size_oat_class_offsets_(0),
     size_oat_class_type_(0),
     size_oat_class_status_(0),
     size_oat_class_method_bitmaps_(0),
     size_oat_class_method_offsets_(0),
     method_offset_map_() {
-  CHECK(key_value_store != nullptr);
-  if (compiling_boot_image) {
-    CHECK(image_writer != nullptr);
+}
+
+bool OatWriter::AddDexFileSource(const char* filename,
+                                 const char* location,
+                                 CreateTypeLookupTable create_type_lookup_table) {
+  DCHECK(write_state_ == WriteState::kAddingDexFileSources);
+  uint32_t magic;
+  std::string error_msg;
+  ScopedFd fd(OpenAndReadMagic(filename, &magic, &error_msg));
+  if (fd.get() == -1) {
+    PLOG(ERROR) << "Failed to read magic number from dex file: '" << filename << "'";
+    return false;
+  } else if (IsDexMagic(magic)) {
+    // The file is open for reading, not writing, so it's OK to let the File destructor
+    // close it without checking for explicit Close(), so pass checkUsage = false.
+    raw_dex_files_.emplace_back(new File(fd.release(), location, /* checkUsage */ false));
+    oat_dex_files_.emplace_back(location,
+                                DexFileSource(raw_dex_files_.back().get()),
+                                create_type_lookup_table);
+  } else if (IsZipMagic(magic)) {
+    if (!AddZippedDexFilesSource(std::move(fd), location, create_type_lookup_table)) {
+      return false;
+    }
+  } else {
+    LOG(ERROR) << "Expected valid zip or dex file: '" << filename << "'";
+    return false;
+  }
+  return true;
+}
+
+// Add dex file source(s) from a zip file specified by a file handle.
+bool OatWriter::AddZippedDexFilesSource(ScopedFd&& zip_fd,
+                                        const char* location,
+                                        CreateTypeLookupTable create_type_lookup_table) {
+  DCHECK(write_state_ == WriteState::kAddingDexFileSources);
+  std::string error_msg;
+  zip_archives_.emplace_back(ZipArchive::OpenFromFd(zip_fd.release(), location, &error_msg));
+  ZipArchive* zip_archive = zip_archives_.back().get();
+  if (zip_archive == nullptr) {
+    LOG(ERROR) << "Failed to open zip from file descriptor for '" << location << "': "
+        << error_msg;
+    return false;
+  }
+  for (size_t i = 0; ; ++i) {
+    std::string entry_name = DexFile::GetMultiDexClassesDexName(i);
+    std::unique_ptr<ZipEntry> entry(zip_archive->Find(entry_name.c_str(), &error_msg));
+    if (entry == nullptr) {
+      break;
+    }
+    zipped_dex_files_.push_back(std::move(entry));
+    zipped_dex_file_locations_.push_back(DexFile::GetMultiDexLocation(i, location));
+    const char* full_location = zipped_dex_file_locations_.back().c_str();
+    oat_dex_files_.emplace_back(full_location,
+                                DexFileSource(zipped_dex_files_.back().get()),
+                                create_type_lookup_table);
+  }
+  if (zipped_dex_file_locations_.empty()) {
+    LOG(ERROR) << "No dex files in zip file '" << location << "': " << error_msg;
+    return false;
+  }
+  return true;
+}
+
+// Add dex file source from raw memory.
+bool OatWriter::AddRawDexFileSource(const ArrayRef<const uint8_t>& data,
+                                    const char* location,
+                                    uint32_t location_checksum,
+                                    CreateTypeLookupTable create_type_lookup_table) {
+  DCHECK(write_state_ == WriteState::kAddingDexFileSources);
+  if (data.size() < sizeof(DexFile::Header)) {
+    LOG(ERROR) << "Provided data is shorter than dex file header. size: "
+               << data.size() << " File: " << location;
+    return false;
+  }
+  if (!ValidateDexFileHeader(data.data(), location)) {
+    return false;
+  }
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(data.data());
+  if (data.size() < header->file_size_) {
+    LOG(ERROR) << "Truncated dex file data. Data size: " << data.size()
+               << " file size from header: " << header->file_size_ << " File: " << location;
+    return false;
+  }
+
+  oat_dex_files_.emplace_back(location, DexFileSource(data.data()), create_type_lookup_table);
+  oat_dex_files_.back().dex_file_location_checksum_ = location_checksum;
+  return true;
+}
+
+dchecked_vector<const char*> OatWriter::GetSourceLocations() const {
+  dchecked_vector<const char*> locations;
+  locations.reserve(oat_dex_files_.size());
+  for (const OatDexFile& oat_dex_file : oat_dex_files_) {
+    locations.push_back(oat_dex_file.GetLocation());
+  }
+  return locations;
+}
+
+bool OatWriter::WriteAndOpenDexFiles(
+    OutputStream* rodata,
+    File* file,
+    InstructionSet instruction_set,
+    const InstructionSetFeatures* instruction_set_features,
+    SafeMap<std::string, std::string>* key_value_store,
+    /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+    /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
+  CHECK(write_state_ == WriteState::kAddingDexFileSources);
+
+  size_t offset = InitOatHeader(instruction_set,
+                                instruction_set_features,
+                                dchecked_integral_cast<uint32_t>(oat_dex_files_.size()),
+                                key_value_store);
+  offset = InitOatDexFiles(offset);
+  size_ = offset;
+
+  std::unique_ptr<MemMap> dex_files_map;
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  if (!WriteDexFiles(rodata, file)) {
+    return false;
+  }
+  // Reserve space for type lookup tables and update type_lookup_table_offset_.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    oat_dex_file.ReserveTypeLookupTable(this);
+  }
+  size_t size_after_type_lookup_tables = size_;
+  // Reserve space for class offsets and update class_offsets_offset_.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    oat_dex_file.ReserveClassOffsets(this);
+  }
+  if (!WriteOatDexFiles(rodata) ||
+      !ExtendForTypeLookupTables(rodata, file, size_after_type_lookup_tables) ||
+      !OpenDexFiles(file, &dex_files_map, &dex_files) ||
+      !WriteTypeLookupTables(dex_files_map.get(), dex_files)) {
+    return false;
+  }
+
+  *opened_dex_files_map = std::move(dex_files_map);
+  *opened_dex_files = std::move(dex_files);
+  write_state_ = WriteState::kPrepareLayout;
+  return true;
+}
+
+void OatWriter::PrepareLayout(const CompilerDriver* compiler,
+                              ImageWriter* image_writer,
+                              const std::vector<const DexFile*>& dex_files) {
+  CHECK(write_state_ == WriteState::kPrepareLayout);
+
+  dex_files_ = &dex_files;
+
+  compiler_driver_ = compiler;
+  image_writer_ = image_writer;
+  if (compiling_boot_image_) {
+    CHECK(image_writer_ != nullptr);
   }
   InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
+  CHECK_EQ(instruction_set, oat_header_->GetInstructionSet());
   const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures();
   relative_patcher_ = linker::RelativePatcher::Create(instruction_set, features,
                                                       &method_offset_map_);
 
-  size_t offset;
+  uint32_t offset = size_;
   {
-    TimingLogger::ScopedTiming split("InitOatHeader", timings);
-    offset = InitOatHeader();
-  }
-  {
-    TimingLogger::ScopedTiming split("InitOatDexFiles", timings);
-    offset = InitOatDexFiles(offset);
-  }
-  {
-    TimingLogger::ScopedTiming split("InitDexFiles", timings);
-    offset = InitDexFiles(offset);
-  }
-  {
-    TimingLogger::ScopedTiming split("InitLookupTables", timings);
-    offset = InitLookupTables(offset);
-  }
-  {
-    TimingLogger::ScopedTiming split("InitOatClasses", timings);
+    TimingLogger::ScopedTiming split("InitOatClasses", timings_);
     offset = InitOatClasses(offset);
   }
   {
-    TimingLogger::ScopedTiming split("InitOatMaps", timings);
+    TimingLogger::ScopedTiming split("InitOatMaps", timings_);
     offset = InitOatMaps(offset);
   }
   {
-    TimingLogger::ScopedTiming split("InitOatCode", timings);
+    TimingLogger::ScopedTiming split("InitOatCode", timings_);
     offset = InitOatCode(offset);
   }
   {
-    TimingLogger::ScopedTiming split("InitOatCodeDexFiles", timings);
+    TimingLogger::ScopedTiming split("InitOatCodeDexFiles", timings_);
     offset = InitOatCodeDexFiles(offset);
   }
   size_ = offset;
@@ -255,7 +477,7 @@
     size_t bss_start = RoundUp(size_, kPageSize);
     size_t pointer_size = GetInstructionSetPointerSize(instruction_set);
     bss_size_ = 0u;
-    for (const DexFile* dex_file : dex_files) {
+    for (const DexFile* dex_file : *dex_files_) {
       dex_cache_arrays_offsets_.Put(dex_file, bss_start + bss_size_);
       DexCacheArraysLayout layout(pointer_size, dex_file);
       bss_size_ += layout.Size();
@@ -265,9 +487,10 @@
   CHECK_EQ(dex_files_->size(), oat_dex_files_.size());
   if (compiling_boot_image_) {
     CHECK_EQ(image_writer_ != nullptr,
-             key_value_store_->find(OatHeader::kImageLocationKey) == key_value_store_->end());
+             oat_header_->GetStoreValueByKey(OatHeader::kImageLocationKey) == nullptr);
   }
-  CHECK_ALIGNED(image_patch_delta_, kPageSize);
+
+  write_state_ = WriteState::kWriteRoData;
 }
 
 OatWriter::~OatWriter() {
@@ -1134,59 +1357,26 @@
   return true;
 }
 
-size_t OatWriter::InitOatHeader() {
-  oat_header_.reset(OatHeader::Create(compiler_driver_->GetInstructionSet(),
-                                      compiler_driver_->GetInstructionSetFeatures(),
-                                      dchecked_integral_cast<uint32_t>(dex_files_->size()),
-                                      key_value_store_));
-  oat_header_->SetImageFileLocationOatChecksum(image_file_location_oat_checksum_);
-  oat_header_->SetImageFileLocationOatDataBegin(image_file_location_oat_begin_);
-
+size_t OatWriter::InitOatHeader(InstructionSet instruction_set,
+                                const InstructionSetFeatures* instruction_set_features,
+                                uint32_t num_dex_files,
+                                SafeMap<std::string, std::string>* key_value_store) {
+  TimingLogger::ScopedTiming split("InitOatHeader", timings_);
+  oat_header_.reset(OatHeader::Create(instruction_set,
+                                      instruction_set_features,
+                                      num_dex_files,
+                                      key_value_store));
+  size_oat_header_ += sizeof(OatHeader);
+  size_oat_header_key_value_store_ += oat_header_->GetHeaderSize() - sizeof(OatHeader);
   return oat_header_->GetHeaderSize();
 }
 
 size_t OatWriter::InitOatDexFiles(size_t offset) {
-  // create the OatDexFiles
-  for (size_t i = 0; i != dex_files_->size(); ++i) {
-    const DexFile* dex_file = (*dex_files_)[i];
-    CHECK(dex_file != nullptr);
-    oat_dex_files_.emplace_back(offset, *dex_file);
-    offset += oat_dex_files_.back().SizeOf();
-  }
-  return offset;
-}
-
-size_t OatWriter::InitDexFiles(size_t offset) {
-  // calculate the offsets within OatDexFiles to the DexFiles
-  for (size_t i = 0; i != dex_files_->size(); ++i) {
-    // dex files are required to be 4 byte aligned
-    size_t original_offset = offset;
-    offset = RoundUp(offset, 4);
-    size_dex_file_alignment_ += offset - original_offset;
-
-    // set offset in OatDexFile to DexFile
-    oat_dex_files_[i].dex_file_offset_ = offset;
-
-    const DexFile* dex_file = (*dex_files_)[i];
-
-    // Initialize type lookup table
-    oat_dex_files_[i].lookup_table_ = dex_file->GetTypeLookupTable();
-
-    offset += dex_file->GetHeader().file_size_;
-  }
-  return offset;
-}
-
-size_t OatWriter::InitLookupTables(size_t offset) {
+  TimingLogger::ScopedTiming split("InitOatDexFiles", timings_);
+  // Initialize offsets of dex files.
   for (OatDexFile& oat_dex_file : oat_dex_files_) {
-    if (oat_dex_file.lookup_table_ != nullptr) {
-      uint32_t aligned_offset = RoundUp(offset, 4);
-      oat_dex_file.lookup_table_offset_ = aligned_offset;
-      size_oat_lookup_table_alignment_ += aligned_offset - offset;
-      offset = aligned_offset + oat_dex_file.lookup_table_->RawDataLength();
-    } else {
-      oat_dex_file.lookup_table_offset_ = 0;
-    }
+    oat_dex_file.offset_ = offset;
+    offset += oat_dex_file.SizeOf();
   }
   return offset;
 }
@@ -1239,7 +1429,6 @@
   oat_header_->SetExecutableOffset(offset);
   size_executable_offset_alignment_ = offset - old_offset;
   if (compiler_driver_->IsBootImage()) {
-    CHECK_EQ(image_patch_delta_, 0);
     InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
 
     #define DO_TRAMPOLINE(field, fn_name) \
@@ -1264,7 +1453,6 @@
     oat_header_->SetQuickImtConflictTrampolineOffset(0);
     oat_header_->SetQuickResolutionTrampolineOffset(0);
     oat_header_->SetQuickToInterpreterBridgeOffset(0);
-    oat_header_->SetImagePatchDelta(image_patch_delta_);
   }
   return offset;
 }
@@ -1289,22 +1477,15 @@
 }
 
 bool OatWriter::WriteRodata(OutputStream* out) {
-  if (!GetOatDataOffset(out)) {
+  CHECK(write_state_ == WriteState::kWriteRoData);
+
+  if (!WriteClassOffsets(out)) {
+    LOG(ERROR) << "Failed to write class offsets to " << out->GetLocation();
     return false;
   }
-  const size_t file_offset = oat_data_offset_;
 
-  // Reserve space for header. It will be written last - after updating the checksum.
-  size_t header_size = oat_header_->GetHeaderSize();
-  if (out->Seek(header_size, kSeekCurrent) == static_cast<off_t>(-1)) {
-    PLOG(ERROR) << "Failed to reserve space for oat header in " << out->GetLocation();
-    return false;
-  }
-  size_oat_header_ += sizeof(OatHeader);
-  size_oat_header_key_value_store_ += oat_header_->GetHeaderSize() - sizeof(OatHeader);
-
-  if (!WriteTables(out, file_offset)) {
-    LOG(ERROR) << "Failed to write oat tables to " << out->GetLocation();
+  if (!WriteClasses(out)) {
+    LOG(ERROR) << "Failed to write classes to " << out->GetLocation();
     return false;
   }
 
@@ -1313,6 +1494,7 @@
     LOG(ERROR) << "Failed to seek to oat code position in " << out->GetLocation();
     return false;
   }
+  size_t file_offset = oat_data_offset_;
   size_t relative_offset = static_cast<size_t>(tables_end_offset) - file_offset;
   relative_offset = WriteMaps(out, file_offset, relative_offset);
   if (relative_offset == 0) {
@@ -1332,11 +1514,13 @@
   }
   DCHECK_OFFSET();
 
+  write_state_ = WriteState::kWriteText;
   return true;
 }
 
 bool OatWriter::WriteCode(OutputStream* out) {
-  size_t header_size = oat_header_->GetHeaderSize();
+  CHECK(write_state_ == WriteState::kWriteText);
+
   const size_t file_offset = oat_data_offset_;
   size_t relative_offset = oat_header_->GetExecutableOffset();
   DCHECK_OFFSET();
@@ -1390,10 +1574,12 @@
     DO_STAT(size_oat_dex_file_location_data_);
     DO_STAT(size_oat_dex_file_location_checksum_);
     DO_STAT(size_oat_dex_file_offset_);
+    DO_STAT(size_oat_dex_file_class_offsets_offset_);
     DO_STAT(size_oat_dex_file_lookup_table_offset_);
-    DO_STAT(size_oat_dex_file_class_offsets_);
     DO_STAT(size_oat_lookup_table_alignment_);
     DO_STAT(size_oat_lookup_table_);
+    DO_STAT(size_oat_class_offsets_alignment_);
+    DO_STAT(size_oat_class_offsets_);
     DO_STAT(size_oat_class_type_);
     DO_STAT(size_oat_class_status_);
     DO_STAT(size_oat_class_method_bitmaps_);
@@ -1408,88 +1594,90 @@
   CHECK_EQ(file_offset + size_, static_cast<size_t>(oat_end_file_offset));
   CHECK_EQ(size_, relative_offset);
 
-  // Finalize the header checksum.
+  write_state_ = WriteState::kWriteHeader;
+  return true;
+}
+
+bool OatWriter::WriteHeader(OutputStream* out,
+                            uint32_t image_file_location_oat_checksum,
+                            uintptr_t image_file_location_oat_begin,
+                            int32_t image_patch_delta) {
+  CHECK(write_state_ == WriteState::kWriteHeader);
+
+  oat_header_->SetImageFileLocationOatChecksum(image_file_location_oat_checksum);
+  oat_header_->SetImageFileLocationOatDataBegin(image_file_location_oat_begin);
+  if (compiler_driver_->IsBootImage()) {
+    CHECK_EQ(image_patch_delta, 0);
+    CHECK_EQ(oat_header_->GetImagePatchDelta(), 0);
+  } else {
+    CHECK_ALIGNED(image_patch_delta, kPageSize);
+    oat_header_->SetImagePatchDelta(image_patch_delta);
+  }
   oat_header_->UpdateChecksumWithHeaderData();
 
-  // Write the header now that the checksum is final.
+  const size_t file_offset = oat_data_offset_;
+
+  off_t current_offset = out->Seek(0, kSeekCurrent);
+  if (current_offset == static_cast<off_t>(-1)) {
+    PLOG(ERROR) << "Failed to get current offset from " << out->GetLocation();
+    return false;
+  }
   if (out->Seek(file_offset, kSeekSet) == static_cast<off_t>(-1)) {
     PLOG(ERROR) << "Failed to seek to oat header position in " << out->GetLocation();
     return false;
   }
   DCHECK_EQ(file_offset, static_cast<size_t>(out->Seek(0, kSeekCurrent)));
+
+  // Flush all other data before writing the header.
+  if (!out->Flush()) {
+    PLOG(ERROR) << "Failed to flush before writing oat header to " << out->GetLocation();
+    return false;
+  }
+  // Write the header.
+  size_t header_size = oat_header_->GetHeaderSize();
   if (!out->WriteFully(oat_header_.get(), header_size)) {
     PLOG(ERROR) << "Failed to write oat header to " << out->GetLocation();
     return false;
   }
-  if (out->Seek(oat_end_file_offset, kSeekSet) == static_cast<off_t>(-1)) {
-    PLOG(ERROR) << "Failed to seek to end after writing oat header to " << out->GetLocation();
+  // Flush the header data.
+  if (!out->Flush()) {
+    PLOG(ERROR) << "Failed to flush after writing oat header to " << out->GetLocation();
     return false;
   }
-  DCHECK_EQ(oat_end_file_offset, out->Seek(0, kSeekCurrent));
 
+  if (out->Seek(current_offset, kSeekSet) == static_cast<off_t>(-1)) {
+    PLOG(ERROR) << "Failed to seek back after writing oat header to " << out->GetLocation();
+    return false;
+  }
+  DCHECK_EQ(current_offset, out->Seek(0, kSeekCurrent));
+
+  write_state_ = WriteState::kDone;
   return true;
 }
 
-bool OatWriter::WriteTables(OutputStream* out, const size_t file_offset) {
-  for (size_t i = 0; i != oat_dex_files_.size(); ++i) {
-    if (!oat_dex_files_[i].Write(this, out, file_offset)) {
-      PLOG(ERROR) << "Failed to write oat dex information to " << out->GetLocation();
-      return false;
-    }
-  }
-  for (size_t i = 0; i != oat_dex_files_.size(); ++i) {
-    uint32_t expected_offset = file_offset + oat_dex_files_[i].dex_file_offset_;
-    off_t actual_offset = out->Seek(expected_offset, kSeekSet);
-    if (static_cast<uint32_t>(actual_offset) != expected_offset) {
-      const DexFile* dex_file = (*dex_files_)[i];
-      PLOG(ERROR) << "Failed to seek to dex file section. Actual: " << actual_offset
-                  << " Expected: " << expected_offset << " File: " << dex_file->GetLocation();
-      return false;
-    }
-    const DexFile* dex_file = (*dex_files_)[i];
-    if (!out->WriteFully(&dex_file->GetHeader(), dex_file->GetHeader().file_size_)) {
-      PLOG(ERROR) << "Failed to write dex file " << dex_file->GetLocation()
-                  << " to " << out->GetLocation();
-      return false;
-    }
-    size_dex_file_ += dex_file->GetHeader().file_size_;
-  }
-  if (!WriteLookupTables(out, file_offset)) {
-    return false;
-  }
-  for (size_t i = 0; i != oat_classes_.size(); ++i) {
-    if (!oat_classes_[i].Write(this, out, file_offset)) {
-      PLOG(ERROR) << "Failed to write oat methods information to " << out->GetLocation();
-      return false;
-    }
-  }
-  return true;
-}
-
-bool OatWriter::WriteLookupTables(OutputStream* out, const size_t file_offset) {
-  for (size_t i = 0; i < oat_dex_files_.size(); ++i) {
-    const uint32_t lookup_table_offset = oat_dex_files_[i].lookup_table_offset_;
-    const TypeLookupTable* table = oat_dex_files_[i].lookup_table_;
-    DCHECK_EQ(lookup_table_offset == 0, table == nullptr);
-    if (lookup_table_offset == 0) {
-      continue;
-    }
-    const uint32_t expected_offset = file_offset + lookup_table_offset;
-    off_t actual_offset = out->Seek(expected_offset, kSeekSet);
-    if (static_cast<uint32_t>(actual_offset) != expected_offset) {
-      const DexFile* dex_file = (*dex_files_)[i];
-      PLOG(ERROR) << "Failed to seek to lookup table section. Actual: " << actual_offset
-                  << " Expected: " << expected_offset << " File: " << dex_file->GetLocation();
-      return false;
-    }
-    if (table != nullptr) {
-      if (!WriteData(out, table->RawData(), table->RawDataLength())) {
-        const DexFile* dex_file = (*dex_files_)[i];
-        PLOG(ERROR) << "Failed to write lookup table for " << dex_file->GetLocation()
-                    << " to " << out->GetLocation();
+bool OatWriter::WriteClassOffsets(OutputStream* out) {
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    if (oat_dex_file.class_offsets_offset_ != 0u) {
+      uint32_t expected_offset = oat_data_offset_ + oat_dex_file.class_offsets_offset_;
+      off_t actual_offset = out->Seek(expected_offset, kSeekSet);
+      if (static_cast<uint32_t>(actual_offset) != expected_offset) {
+        PLOG(ERROR) << "Failed to seek to oat class offsets section. Actual: " << actual_offset
+                    << " Expected: " << expected_offset << " File: " << oat_dex_file.GetLocation();
         return false;
       }
-      size_oat_lookup_table_ += table->RawDataLength();
+      if (!oat_dex_file.WriteClassOffsets(this, out)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+bool OatWriter::WriteClasses(OutputStream* out) {
+  for (OatClass& oat_class : oat_classes_) {
+    if (!oat_class.Write(this, out, oat_data_offset_)) {
+      PLOG(ERROR) << "Failed to write oat methods information to " << out->GetLocation();
+      return false;
     }
   }
   return true;
@@ -1585,6 +1773,455 @@
   return true;
 }
 
+bool OatWriter::ReadDexFileHeader(File* file, OatDexFile* oat_dex_file) {
+  // Read the dex file header and perform minimal verification.
+  uint8_t raw_header[sizeof(DexFile::Header)];
+  if (!file->ReadFully(&raw_header, sizeof(DexFile::Header))) {
+    PLOG(ERROR) << "Failed to read dex file header. Actual: "
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (!ValidateDexFileHeader(raw_header, oat_dex_file->GetLocation())) {
+    return false;
+  }
+
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_header);
+  oat_dex_file->dex_file_size_ = header->file_size_;
+  oat_dex_file->dex_file_location_checksum_ = header->checksum_;
+  oat_dex_file->class_offsets_.resize(header->class_defs_size_);
+  return true;
+}
+
+bool OatWriter::ValidateDexFileHeader(const uint8_t* raw_header, const char* location) {
+  if (!DexFile::IsMagicValid(raw_header)) {
+    LOG(ERROR) << "Invalid magic number in dex file header. " << " File: " << location;
+    return false;
+  }
+  if (!DexFile::IsVersionValid(raw_header)) {
+    LOG(ERROR) << "Invalid version number in dex file header. " << " File: " << location;
+    return false;
+  }
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_header);
+  if (header->file_size_ < sizeof(DexFile::Header)) {
+    LOG(ERROR) << "Dex file header specifies file size insufficient to contain the header."
+               << " File: " << location;
+    return false;
+  }
+  return true;
+}
+
+bool OatWriter::WriteDexFiles(OutputStream* rodata, File* file) {
+  TimingLogger::ScopedTiming split("WriteDexFiles", timings_);
+
+  // Get the elf file offset of the oat file.
+  if (!GetOatDataOffset(rodata)) {
+    return false;
+  }
+
+  // Write dex files.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    if (!WriteDexFile(rodata, file, &oat_dex_file)) {
+      return false;
+    }
+  }
+
+  // Close sources.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    oat_dex_file.source_.Clear();  // Get rid of the reference, it's about to be invalidated.
+  }
+  zipped_dex_files_.clear();
+  zip_archives_.clear();
+  raw_dex_files_.clear();
+  return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file) {
+  if (!SeekToDexFile(rodata, file, oat_dex_file)) {
+    return false;
+  }
+  if (oat_dex_file->source_.IsZipEntry()) {
+    if (!WriteDexFile(rodata, file, oat_dex_file, oat_dex_file->source_.GetZipEntry())) {
+      return false;
+    }
+  } else if (oat_dex_file->source_.IsRawFile()) {
+    if (!WriteDexFile(rodata, file, oat_dex_file, oat_dex_file->source_.GetRawFile())) {
+      return false;
+    }
+  } else {
+    DCHECK(oat_dex_file->source_.IsRawData());
+    if (!WriteDexFile(rodata, oat_dex_file, oat_dex_file->source_.GetRawData())) {
+      return false;
+    }
+  }
+
+  // Update current size and account for the written data.
+  DCHECK_EQ(size_, oat_dex_file->dex_file_offset_);
+  size_ += oat_dex_file->dex_file_size_;
+  size_dex_file_ += oat_dex_file->dex_file_size_;
+  return true;
+}
+
+bool OatWriter::SeekToDexFile(OutputStream* out, File* file, OatDexFile* oat_dex_file) {
+  // Dex files are required to be 4 byte aligned.
+  size_t original_offset = size_;
+  size_t offset = RoundUp(original_offset, 4);
+  size_dex_file_alignment_ += offset - original_offset;
+
+  // Seek to the start of the dex file and flush any pending operations in the stream.
+  // Verify that, after flushing the stream, the file is at the same offset as the stream.
+  uint32_t start_offset = oat_data_offset_ + offset;
+  off_t actual_offset = out->Seek(start_offset, kSeekSet);
+  if (actual_offset != static_cast<off_t>(start_offset)) {
+    PLOG(ERROR) << "Failed to seek to dex file section. Actual: " << actual_offset
+                << " Expected: " << start_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (!out->Flush()) {
+    PLOG(ERROR) << "Failed to flush before writing dex file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  actual_offset = lseek(file->Fd(), 0, SEEK_CUR);
+  if (actual_offset != static_cast<off_t>(start_offset)) {
+    PLOG(ERROR) << "Stream/file position mismatch! Actual: " << actual_offset
+                << " Expected: " << start_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+
+  size_ = offset;
+  oat_dex_file->dex_file_offset_ = offset;
+  return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata,
+                             File* file,
+                             OatDexFile* oat_dex_file,
+                             ZipEntry* dex_file) {
+  size_t start_offset = oat_data_offset_ + size_;
+  DCHECK_EQ(static_cast<off_t>(start_offset), rodata->Seek(0, kSeekCurrent));
+
+  // Extract the dex file and get the extracted size.
+  std::string error_msg;
+  if (!dex_file->ExtractToFile(*file, &error_msg)) {
+    LOG(ERROR) << "Failed to extract dex file from ZIP entry: " << error_msg
+               << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (file->Flush() != 0) {
+    PLOG(ERROR) << "Failed to flush dex file from ZIP entry."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  off_t extracted_end = lseek(file->Fd(), 0, SEEK_CUR);
+  if (extracted_end == static_cast<off_t>(-1)) {
+    PLOG(ERROR) << "Failed get end offset after writing dex file from ZIP entry."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (extracted_end < static_cast<off_t>(start_offset)) {
+    LOG(ERROR) << "Dex file end position is before start position! End: " << extracted_end
+               << " Start: " << start_offset
+               << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  uint64_t extracted_size = static_cast<uint64_t>(extracted_end - start_offset);
+  if (extracted_size < sizeof(DexFile::Header)) {
+    LOG(ERROR) << "Extracted dex file is shorter than dex file header. size: "
+               << extracted_size << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+
+  // Read the dex file header and extract required data to OatDexFile.
+  off_t actual_offset = lseek(file->Fd(), start_offset, SEEK_SET);
+  if (actual_offset != static_cast<off_t>(start_offset)) {
+    PLOG(ERROR) << "Failed to seek back to dex file header. Actual: " << actual_offset
+                << " Expected: " << start_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (!ReadDexFileHeader(file, oat_dex_file)) {
+    return false;
+  }
+  if (extracted_size < oat_dex_file->dex_file_size_) {
+    LOG(ERROR) << "Extracted truncated dex file. Extracted size: " << extracted_size
+               << " file size from header: " << oat_dex_file->dex_file_size_
+               << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+
+  // Override the checksum from header with the CRC from ZIP entry.
+  oat_dex_file->dex_file_location_checksum_ = dex_file->GetCrc32();
+
+  // Seek both file and stream to the end offset.
+  size_t end_offset = start_offset + oat_dex_file->dex_file_size_;
+  actual_offset = lseek(file->Fd(), end_offset, SEEK_SET);
+  if (actual_offset != static_cast<off_t>(end_offset)) {
+    PLOG(ERROR) << "Failed to seek to end of dex file. Actual: " << actual_offset
+                << " Expected: " << end_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  actual_offset = rodata->Seek(end_offset, kSeekSet);
+  if (actual_offset != static_cast<off_t>(end_offset)) {
+    PLOG(ERROR) << "Failed to seek stream to end of dex file. Actual: " << actual_offset
+                << " Expected: " << end_offset << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+  if (!rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after seeking over dex file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+
+  // If we extracted more than the size specified in the header, truncate the file.
+  if (extracted_size > oat_dex_file->dex_file_size_) {
+    if (file->SetLength(end_offset) != 0) {
+      PLOG(ERROR) << "Failed to truncate excessive dex file length."
+                  << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata,
+                             File* file,
+                             OatDexFile* oat_dex_file,
+                             File* dex_file) {
+  size_t start_offset = oat_data_offset_ + size_;
+  DCHECK_EQ(static_cast<off_t>(start_offset), rodata->Seek(0, kSeekCurrent));
+
+  off_t input_offset = lseek(dex_file->Fd(), 0, SEEK_SET);
+  if (input_offset != static_cast<off_t>(0)) {
+    PLOG(ERROR) << "Failed to seek to dex file header. Actual: " << input_offset
+                << " Expected: 0"
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (!ReadDexFileHeader(dex_file, oat_dex_file)) {
+    return false;
+  }
+
+  // Copy the input dex file using sendfile().
+  if (!file->Copy(dex_file, 0, oat_dex_file->dex_file_size_)) {
+    PLOG(ERROR) << "Failed to copy dex file to oat file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (file->Flush() != 0) {
+    PLOG(ERROR) << "Failed to flush dex file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+
+  // Check file position and seek the stream to the end offset.
+  size_t end_offset = start_offset + oat_dex_file->dex_file_size_;
+  off_t actual_offset = lseek(file->Fd(), 0, SEEK_CUR);
+  if (actual_offset != static_cast<off_t>(end_offset)) {
+    PLOG(ERROR) << "Unexpected file position after copying dex file. Actual: " << actual_offset
+                << " Expected: " << end_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  actual_offset = rodata->Seek(end_offset, kSeekSet);
+  if (actual_offset != static_cast<off_t>(end_offset)) {
+    PLOG(ERROR) << "Failed to seek stream to end of dex file. Actual: " << actual_offset
+                << " Expected: " << end_offset << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+  if (!rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after seeking over dex file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+
+  return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata,
+                             OatDexFile* oat_dex_file,
+                             const uint8_t* dex_file) {
+  // Note: The raw data has already been checked to contain the header
+  // and all the data that the header specifies as the file size.
+  DCHECK(dex_file != nullptr);
+  DCHECK(ValidateDexFileHeader(dex_file, oat_dex_file->GetLocation()));
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(dex_file);
+
+  if (!rodata->WriteFully(dex_file, header->file_size_)) {
+    PLOG(ERROR) << "Failed to write dex file " << oat_dex_file->GetLocation()
+                << " to " << rodata->GetLocation();
+    return false;
+  }
+  if (!rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after writing dex file."
+                << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+
+  // Update dex file size and resize class offsets in the OatDexFile.
+  // Note: For raw data, the checksum is passed directly to AddRawDexFileSource().
+  oat_dex_file->dex_file_size_ = header->file_size_;
+  oat_dex_file->class_offsets_.resize(header->class_defs_size_);
+  return true;
+}
+
+bool OatWriter::WriteOatDexFiles(OutputStream* rodata) {
+  TimingLogger::ScopedTiming split("WriteOatDexFiles", timings_);
+
+  // Seek to the start of OatDexFiles, i.e. to the end of the OatHeader.  If there are
+  // no OatDexFiles, no data is actually written to .rodata before WriteHeader() and
+  // this Seek() ensures that we reserve the space for OatHeader in .rodata.
+  DCHECK(oat_dex_files_.empty() || oat_dex_files_[0u].offset_ == oat_header_->GetHeaderSize());
+  uint32_t expected_offset = oat_data_offset_ + oat_header_->GetHeaderSize();
+  off_t actual_offset = rodata->Seek(expected_offset, kSeekSet);
+  if (static_cast<uint32_t>(actual_offset) != expected_offset) {
+    PLOG(ERROR) << "Failed to seek to OatDexFile table section. Actual: " << actual_offset
+                << " Expected: " << expected_offset << " File: " << rodata->GetLocation();
+    return false;
+  }
+
+  for (size_t i = 0, size = oat_dex_files_.size(); i != size; ++i) {
+    OatDexFile* oat_dex_file = &oat_dex_files_[i];
+
+    DCHECK_EQ(oat_data_offset_ + oat_dex_file->offset_,
+              static_cast<size_t>(rodata->Seek(0, kSeekCurrent)));
+
+    // Write OatDexFile.
+    if (!oat_dex_file->Write(this, rodata)) {
+      PLOG(ERROR) << "Failed to write oat dex information to " << rodata->GetLocation();
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool OatWriter::ExtendForTypeLookupTables(OutputStream* rodata, File* file, size_t offset) {
+  TimingLogger::ScopedTiming split("ExtendForTypeLookupTables", timings_);
+
+  int64_t new_length = oat_data_offset_ + dchecked_integral_cast<int64_t>(offset);
+  if (file->SetLength(new_length) != 0) {
+    PLOG(ERROR) << "Failed to extend file for type lookup tables. new_length: " << new_length
+        << "File: " << file->GetPath();
+    return false;
+  }
+  off_t actual_offset = rodata->Seek(new_length, kSeekSet);
+  if (actual_offset != static_cast<off_t>(new_length)) {
+    PLOG(ERROR) << "Failed to seek stream after extending file for type lookup tables."
+                << " Actual: " << actual_offset << " Expected: " << new_length
+                << " File: " << rodata->GetLocation();
+    return false;
+  }
+  if (!rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after extending for type lookup tables."
+                << " File: " << rodata->GetLocation();
+    return false;
+  }
+  return true;
+}
+
+bool OatWriter::OpenDexFiles(
+    File* file,
+    /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+    /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
+  TimingLogger::ScopedTiming split("OpenDexFiles", timings_);
+
+  if (oat_dex_files_.empty()) {
+    // Nothing to do.
+    return true;
+  }
+
+  size_t map_offset = oat_dex_files_[0].dex_file_offset_;
+  size_t length = size_ - map_offset;
+  std::string error_msg;
+  std::unique_ptr<MemMap> dex_files_map(MemMap::MapFile(length,
+                                                        PROT_READ | PROT_WRITE,
+                                                        MAP_SHARED,
+                                                        file->Fd(),
+                                                        oat_data_offset_ + map_offset,
+                                                        /* low_4gb */ false,
+                                                        file->GetPath().c_str(),
+                                                        &error_msg));
+  if (dex_files_map == nullptr) {
+    LOG(ERROR) << "Failed to mmap() dex files from oat file. File: " << file->GetPath()
+               << " error: " << error_msg;
+    return false;
+  }
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    // Make sure no one messed with input files while we were copying data.
+    // At the very least we need consistent file size and number of class definitions.
+    const uint8_t* raw_dex_file =
+        dex_files_map->Begin() + oat_dex_file.dex_file_offset_ - map_offset;
+    if (!ValidateDexFileHeader(raw_dex_file, oat_dex_file.GetLocation())) {
+      // Note: ValidateDexFileHeader() already logged an error message.
+      LOG(ERROR) << "Failed to verify written dex file header!"
+          << " Output: " << file->GetPath() << " ~ " << std::hex << map_offset
+          << " ~ " << static_cast<const void*>(raw_dex_file);
+      return false;
+    }
+    const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_dex_file);
+    if (header->file_size_ != oat_dex_file.dex_file_size_) {
+      LOG(ERROR) << "File size mismatch in written dex file header! Expected: "
+          << oat_dex_file.dex_file_size_ << " Actual: " << header->file_size_
+          << " Output: " << file->GetPath();
+      return false;
+    }
+    if (header->class_defs_size_ != oat_dex_file.class_offsets_.size()) {
+      LOG(ERROR) << "Class defs size mismatch in written dex file header! Expected: "
+          << oat_dex_file.class_offsets_.size() << " Actual: " << header->class_defs_size_
+          << " Output: " << file->GetPath();
+      return false;
+    }
+
+    // Now, open the dex file.
+    dex_files.emplace_back(DexFile::Open(raw_dex_file,
+                                         oat_dex_file.dex_file_size_,
+                                         oat_dex_file.GetLocation(),
+                                         oat_dex_file.dex_file_location_checksum_,
+                                         /* oat_dex_file */ nullptr,
+                                         &error_msg));
+    if (dex_files.back() == nullptr) {
+      LOG(ERROR) << "Failed to open dex file from oat file. File:" << oat_dex_file.GetLocation();
+      return false;
+    }
+  }
+
+  *opened_dex_files_map = std::move(dex_files_map);
+  *opened_dex_files = std::move(dex_files);
+  return true;
+}
+
+bool OatWriter::WriteTypeLookupTables(
+    MemMap* opened_dex_files_map,
+    const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files) {
+  TimingLogger::ScopedTiming split("WriteTypeLookupTables", timings_);
+
+  DCHECK_EQ(opened_dex_files.size(), oat_dex_files_.size());
+  for (size_t i = 0, size = opened_dex_files.size(); i != size; ++i) {
+    OatDexFile* oat_dex_file = &oat_dex_files_[i];
+    if (oat_dex_file->lookup_table_offset_ != 0u) {
+      DCHECK(oat_dex_file->create_type_lookup_table_ == CreateTypeLookupTable::kCreate);
+      DCHECK_NE(oat_dex_file->class_offsets_.size(), 0u);
+      size_t map_offset = oat_dex_files_[0].dex_file_offset_;
+      size_t lookup_table_offset = oat_dex_file->lookup_table_offset_;
+      uint8_t* lookup_table = opened_dex_files_map->Begin() + (lookup_table_offset - map_offset);
+      opened_dex_files[i]->CreateTypeLookupTable(lookup_table);
+    }
+  }
+
+  DCHECK_EQ(opened_dex_files_map == nullptr, opened_dex_files.empty());
+  if (opened_dex_files_map != nullptr && !opened_dex_files_map->Sync()) {
+    PLOG(ERROR) << "Failed to Sync() type lookup tables. Map: " << opened_dex_files_map->GetName();
+    return false;
+  }
+
+  return true;
+}
+
 bool OatWriter::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta) {
   static const uint8_t kPadding[] = {
       0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u
@@ -1611,15 +2248,20 @@
   }
 }
 
-OatWriter::OatDexFile::OatDexFile(size_t offset, const DexFile& dex_file) {
-  offset_ = offset;
-  const std::string& location(dex_file.GetLocation());
-  dex_file_location_size_ = location.size();
-  dex_file_location_data_ = reinterpret_cast<const uint8_t*>(location.data());
-  dex_file_location_checksum_ = dex_file.GetLocationChecksum();
-  dex_file_offset_ = 0;
-  lookup_table_offset_ = 0;
-  class_offsets_.resize(dex_file.NumClassDefs());
+OatWriter::OatDexFile::OatDexFile(const char* dex_file_location,
+                                  DexFileSource source,
+                                  CreateTypeLookupTable create_type_lookup_table)
+    : source_(source),
+      create_type_lookup_table_(create_type_lookup_table),
+      dex_file_size_(0),
+      offset_(0),
+      dex_file_location_size_(strlen(dex_file_location)),
+      dex_file_location_data_(dex_file_location),
+      dex_file_location_checksum_(0u),
+      dex_file_offset_(0u),
+      class_offsets_offset_(0u),
+      lookup_table_offset_(0u),
+      class_offsets_() {
 }
 
 size_t OatWriter::OatDexFile::SizeOf() const {
@@ -1627,24 +2269,54 @@
           + dex_file_location_size_
           + sizeof(dex_file_location_checksum_)
           + sizeof(dex_file_offset_)
-          + sizeof(lookup_table_offset_)
-          + (sizeof(class_offsets_[0]) * class_offsets_.size());
+          + sizeof(class_offsets_offset_)
+          + sizeof(lookup_table_offset_);
 }
 
-bool OatWriter::OatDexFile::Write(OatWriter* oat_writer,
-                                  OutputStream* out,
-                                  const size_t file_offset) const {
+void OatWriter::OatDexFile::ReserveTypeLookupTable(OatWriter* oat_writer) {
+  DCHECK_EQ(lookup_table_offset_, 0u);
+  if (create_type_lookup_table_ == CreateTypeLookupTable::kCreate && !class_offsets_.empty()) {
+    size_t table_size = TypeLookupTable::RawDataLength(class_offsets_.size());
+    if (table_size != 0u) {
+      // Type tables are required to be 4 byte aligned.
+      size_t original_offset = oat_writer->size_;
+      size_t offset = RoundUp(original_offset, 4);
+      oat_writer->size_oat_lookup_table_alignment_ += offset - original_offset;
+      lookup_table_offset_ = offset;
+      oat_writer->size_ = offset + table_size;
+      oat_writer->size_oat_lookup_table_ += table_size;
+    }
+  }
+}
+
+void OatWriter::OatDexFile::ReserveClassOffsets(OatWriter* oat_writer) {
+  DCHECK_EQ(class_offsets_offset_, 0u);
+  if (!class_offsets_.empty()) {
+    // Class offsets are required to be 4 byte aligned.
+    size_t original_offset = oat_writer->size_;
+    size_t offset = RoundUp(original_offset, 4);
+    oat_writer->size_oat_class_offsets_alignment_ += offset - original_offset;
+    class_offsets_offset_ = offset;
+    oat_writer->size_ = offset + GetClassOffsetsRawSize();
+  }
+}
+
+bool OatWriter::OatDexFile::Write(OatWriter* oat_writer, OutputStream* out) const {
+  const size_t file_offset = oat_writer->oat_data_offset_;
   DCHECK_OFFSET_();
+
   if (!oat_writer->WriteData(out, &dex_file_location_size_, sizeof(dex_file_location_size_))) {
     PLOG(ERROR) << "Failed to write dex file location length to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_size_ += sizeof(dex_file_location_size_);
+
   if (!oat_writer->WriteData(out, dex_file_location_data_, dex_file_location_size_)) {
     PLOG(ERROR) << "Failed to write dex file location data to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_data_ += dex_file_location_size_;
+
   if (!oat_writer->WriteData(out,
                              &dex_file_location_checksum_,
                              sizeof(dex_file_location_checksum_))) {
@@ -1652,21 +2324,35 @@
     return false;
   }
   oat_writer->size_oat_dex_file_location_checksum_ += sizeof(dex_file_location_checksum_);
+
   if (!oat_writer->WriteData(out, &dex_file_offset_, sizeof(dex_file_offset_))) {
     PLOG(ERROR) << "Failed to write dex file offset to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_offset_ += sizeof(dex_file_offset_);
+
+  if (!oat_writer->WriteData(out, &class_offsets_offset_, sizeof(class_offsets_offset_))) {
+    PLOG(ERROR) << "Failed to write class offsets offset to " << out->GetLocation();
+    return false;
+  }
+  oat_writer->size_oat_dex_file_class_offsets_offset_ += sizeof(class_offsets_offset_);
+
   if (!oat_writer->WriteData(out, &lookup_table_offset_, sizeof(lookup_table_offset_))) {
     PLOG(ERROR) << "Failed to write lookup table offset to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_lookup_table_offset_ += sizeof(lookup_table_offset_);
+
+  return true;
+}
+
+bool OatWriter::OatDexFile::WriteClassOffsets(OatWriter* oat_writer, OutputStream* out) {
   if (!oat_writer->WriteData(out, class_offsets_.data(), GetClassOffsetsRawSize())) {
-    PLOG(ERROR) << "Failed to write methods offsets to " << out->GetLocation();
+    PLOG(ERROR) << "Failed to write oat class offsets for " << GetLocation()
+                << " to " << out->GetLocation();
     return false;
   }
-  oat_writer->size_oat_dex_file_class_offsets_ += GetClassOffsetsRawSize();
+  oat_writer->size_oat_class_offsets_ += GetClassOffsetsRawSize();
   return true;
 }
 
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 5feb5fc..d681998 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -27,7 +27,9 @@
 #include "method_reference.h"
 #include "mirror/class.h"
 #include "oat.h"
+#include "os.h"
 #include "safe_map.h"
+#include "ScopedFd.h"
 #include "utils/array_ref.h"
 
 namespace art {
@@ -39,6 +41,7 @@
 class OutputStream;
 class TimingLogger;
 class TypeLookupTable;
+class ZipEntry;
 
 namespace dwarf {
 struct MethodDebugInfo;
@@ -61,6 +64,11 @@
 // ...
 // TypeLookupTable[D]
 //
+// ClassOffsets[0]   one table of OatClass offsets for each class def for each OatDexFile.
+// ClassOffsets[1]
+// ...
+// ClassOffsets[D]
+//
 // OatClass[0]       one variable sized OatClass for each of C DexFile::ClassDefs
 // OatClass[1]       contains OatClass entries with class status, offsets to code, etc.
 // ...
@@ -93,15 +101,65 @@
 //
 class OatWriter {
  public:
-  OatWriter(const std::vector<const DexFile*>& dex_files,
-            uint32_t image_file_location_oat_checksum,
-            uintptr_t image_file_location_oat_begin,
-            int32_t image_patch_delta,
-            const CompilerDriver* compiler,
-            ImageWriter* image_writer,
-            bool compiling_boot_image,
-            TimingLogger* timings,
-            SafeMap<std::string, std::string>* key_value_store);
+  enum class CreateTypeLookupTable {
+    kCreate,
+    kDontCreate,
+    kDefault = kCreate
+  };
+
+  OatWriter(bool compiling_boot_image, TimingLogger* timings);
+
+  // To produce a valid oat file, the user must first add sources with any combination of
+  //   - AddDexFileSource(),
+  //   - AddZippedDexFilesSource(),
+  //   - AddRawDexFileSource().
+  // Then the user must call in order
+  //   - WriteAndOpenDexFiles()
+  //   - PrepareLayout(),
+  //   - WriteRodata(),
+  //   - WriteCode(),
+  //   - WriteHeader().
+
+  // Add dex file source(s) from a file, either a plain dex file or
+  // a zip file with one or more dex files.
+  bool AddDexFileSource(
+      const char* filename,
+      const char* location,
+      CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
+  // Add dex file source(s) from a zip file specified by a file handle.
+  bool AddZippedDexFilesSource(
+      ScopedFd&& zip_fd,
+      const char* location,
+      CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
+  // Add dex file source from raw memory.
+  bool AddRawDexFileSource(
+      const ArrayRef<const uint8_t>& data,
+      const char* location,
+      uint32_t location_checksum,
+      CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
+  dchecked_vector<const char*> GetSourceLocations() const;
+
+  // Write raw dex files to the .rodata section and open them from the oat file.
+  bool WriteAndOpenDexFiles(OutputStream* rodata,
+                            File* file,
+                            InstructionSet instruction_set,
+                            const InstructionSetFeatures* instruction_set_features,
+                            SafeMap<std::string, std::string>* key_value_store,
+                            /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+                            /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files);
+  // Prepare layout of remaining data.
+  void PrepareLayout(const CompilerDriver* compiler,
+                     ImageWriter* image_writer,
+                     const std::vector<const DexFile*>& dex_files);
+  // Write the rest of .rodata section (ClassOffsets[], OatClass[], maps).
+  bool WriteRodata(OutputStream* out);
+  // Write the code to the .text section.
+  bool WriteCode(OutputStream* out);
+  // Write the oat header. This finalizes the oat file.
+  bool WriteHeader(OutputStream* out,
+                   uint32_t image_file_location_oat_checksum,
+                   uintptr_t image_file_location_oat_begin,
+                   int32_t image_patch_delta);
 
   // Returns whether the oat file has an associated image.
   bool HasImage() const {
@@ -130,9 +188,6 @@
     return ArrayRef<const uintptr_t>(absolute_patch_locations_);
   }
 
-  bool WriteRodata(OutputStream* out);
-  bool WriteCode(OutputStream* out);
-
   ~OatWriter();
 
   ArrayRef<const dwarf::MethodDebugInfo> GetMethodDebugInfo() const {
@@ -144,6 +199,7 @@
   }
 
  private:
+  class DexFileSource;
   class OatClass;
   class OatDexFile;
 
@@ -174,29 +230,65 @@
   // with a given DexMethodVisitor.
   bool VisitDexMethods(DexMethodVisitor* visitor);
 
-  size_t InitOatHeader();
+  size_t InitOatHeader(InstructionSet instruction_set,
+                       const InstructionSetFeatures* instruction_set_features,
+                       uint32_t num_dex_files,
+                       SafeMap<std::string, std::string>* key_value_store);
   size_t InitOatDexFiles(size_t offset);
-  size_t InitLookupTables(size_t offset);
-  size_t InitDexFiles(size_t offset);
   size_t InitOatClasses(size_t offset);
   size_t InitOatMaps(size_t offset);
   size_t InitOatCode(size_t offset);
   size_t InitOatCodeDexFiles(size_t offset);
 
-  bool WriteTables(OutputStream* out, const size_t file_offset);
-  bool WriteLookupTables(OutputStream* out, const size_t file_offset);
+  bool WriteClassOffsets(OutputStream* out);
+  bool WriteClasses(OutputStream* out);
   size_t WriteMaps(OutputStream* out, const size_t file_offset, size_t relative_offset);
   size_t WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset);
   size_t WriteCodeDexFiles(OutputStream* out, const size_t file_offset, size_t relative_offset);
 
   bool GetOatDataOffset(OutputStream* out);
+  bool ReadDexFileHeader(File* file, OatDexFile* oat_dex_file);
+  bool ValidateDexFileHeader(const uint8_t* raw_header, const char* location);
+  bool WriteDexFiles(OutputStream* rodata, File* file);
+  bool WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file);
+  bool SeekToDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file);
+  bool WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file, ZipEntry* dex_file);
+  bool WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file, File* dex_file);
+  bool WriteDexFile(OutputStream* rodata, OatDexFile* oat_dex_file, const uint8_t* dex_file);
+  bool WriteOatDexFiles(OutputStream* rodata);
+  bool ExtendForTypeLookupTables(OutputStream* rodata, File* file, size_t offset);
+  bool OpenDexFiles(File* file,
+                    /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+                    /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files);
+  bool WriteTypeLookupTables(MemMap* opened_dex_files_map,
+                             const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files);
   bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta);
   bool WriteData(OutputStream* out, const void* data, size_t size);
 
+  enum class WriteState {
+    kAddingDexFileSources,
+    kPrepareLayout,
+    kWriteRoData,
+    kWriteText,
+    kWriteHeader,
+    kDone
+  };
+
+  WriteState write_state_;
+  TimingLogger* timings_;
+
+  std::vector<std::unique_ptr<File>> raw_dex_files_;
+  std::vector<std::unique_ptr<ZipArchive>> zip_archives_;
+  std::vector<std::unique_ptr<ZipEntry>> zipped_dex_files_;
+
+  // Using std::list<> which doesn't move elements around on push/emplace_back().
+  // We need this because we keep plain pointers to the strings' c_str().
+  std::list<std::string> zipped_dex_file_locations_;
+
   dchecked_vector<dwarf::MethodDebugInfo> method_info_;
 
-  const CompilerDriver* const compiler_driver_;
-  ImageWriter* const image_writer_;
+  const CompilerDriver* compiler_driver_;
+  ImageWriter* image_writer_;
   const bool compiling_boot_image_;
 
   // note OatFile does not take ownership of the DexFiles
@@ -215,13 +307,7 @@
   // Offset of the oat data from the start of the mmapped region of the elf file.
   size_t oat_data_offset_;
 
-  // dependencies on the image.
-  uint32_t image_file_location_oat_checksum_;
-  uintptr_t image_file_location_oat_begin_;
-  int32_t image_patch_delta_;
-
   // data to write
-  SafeMap<std::string, std::string>* key_value_store_;
   std::unique_ptr<OatHeader> oat_header_;
   dchecked_vector<OatDexFile> oat_dex_files_;
   dchecked_vector<OatClass> oat_classes_;
@@ -257,10 +343,12 @@
   uint32_t size_oat_dex_file_location_data_;
   uint32_t size_oat_dex_file_location_checksum_;
   uint32_t size_oat_dex_file_offset_;
+  uint32_t size_oat_dex_file_class_offsets_offset_;
   uint32_t size_oat_dex_file_lookup_table_offset_;
-  uint32_t size_oat_dex_file_class_offsets_;
   uint32_t size_oat_lookup_table_alignment_;
   uint32_t size_oat_lookup_table_;
+  uint32_t size_oat_class_offsets_alignment_;
+  uint32_t size_oat_class_offsets_;
   uint32_t size_oat_class_type_;
   uint32_t size_oat_class_status_;
   uint32_t size_oat_class_method_bitmaps_;
@@ -269,7 +357,7 @@
   std::unique_ptr<linker::RelativePatcher> relative_patcher_;
 
   // The locations of absolute patches relative to the start of the executable section.
-  std::vector<uintptr_t> absolute_patch_locations_;
+  dchecked_vector<uintptr_t> absolute_patch_locations_;
 
   // Map method reference to assigned offset.
   // Wrap the map in a class implementing linker::RelativePatcherTargetProvider.
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 26bf1cb..1d604e7 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -56,7 +56,6 @@
         return_type_(Primitive::GetType(dex_compilation_unit_->GetShorty()[0])),
         code_start_(nullptr),
         latest_result_(nullptr),
-        can_use_baseline_for_string_init_(true),
         compilation_stats_(compiler_stats),
         interpreter_metadata_(interpreter_metadata),
         dex_cache_(dex_cache) {}
@@ -77,7 +76,6 @@
         return_type_(return_type),
         code_start_(nullptr),
         latest_result_(nullptr),
-        can_use_baseline_for_string_init_(true),
         compilation_stats_(nullptr),
         interpreter_metadata_(nullptr),
         null_dex_cache_(),
@@ -85,10 +83,6 @@
 
   bool BuildGraph(const DexFile::CodeItem& code);
 
-  bool CanUseBaselineForStringInit() const {
-    return can_use_baseline_for_string_init_;
-  }
-
   static constexpr const char* kBuilderPassName = "builder";
 
   // The number of entries in a packed switch before we use a jump table or specified
@@ -363,11 +357,6 @@
   // used by move-result instructions.
   HInstruction* latest_result_;
 
-  // We need to know whether we have built a graph that has calls to StringFactory
-  // and hasn't gone through the verifier. If the following flag is `false`, then
-  // we cannot compile with baseline.
-  bool can_use_baseline_for_string_init_;
-
   OptimizingCompilerStats* compilation_stats_;
 
   const uint8_t* interpreter_metadata_;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index ea0b9ec..a3bbfdb 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -142,23 +142,6 @@
   return pointer_size * index;
 }
 
-void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
-  Initialize();
-  if (!is_leaf) {
-    MarkNotLeaf();
-  }
-  const bool is_64_bit = Is64BitInstructionSet(GetInstructionSet());
-  InitializeCodeGeneration(GetGraph()->GetNumberOfLocalVRegs()
-                             + GetGraph()->GetTemporariesVRegSlots()
-                             + 1 /* filler */,
-                           0, /* the baseline compiler does not have live registers at slow path */
-                           0, /* the baseline compiler does not have live registers at slow path */
-                           GetGraph()->GetMaximumNumberOfOutVRegs()
-                             + (is_64_bit ? 2 : 1) /* current method */,
-                           GetGraph()->GetBlocks());
-  CompileInternal(allocator, /* is_baseline */ true);
-}
-
 bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const {
   DCHECK_EQ((*block_order_)[current_block_index_], current);
   return GetNextBlockToEmit() == FirstNonEmptyBlock(next);
@@ -220,8 +203,12 @@
   current_slow_path_ = nullptr;
 }
 
-void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) {
-  is_baseline_ = is_baseline;
+void CodeGenerator::Compile(CodeAllocator* allocator) {
+  // The register allocator already called `InitializeCodeGeneration`,
+  // where the frame size has been computed.
+  DCHECK(block_order_ != nullptr);
+  Initialize();
+
   HGraphVisitor* instruction_visitor = GetInstructionVisitor();
   DCHECK_EQ(current_block_index_, 0u);
 
@@ -242,9 +229,6 @@
     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* current = it.Current();
       DisassemblyScope disassembly_scope(current, *this);
-      if (is_baseline) {
-        InitLocationsBaseline(current);
-      }
       DCHECK(CheckTypeConsistency(current));
       current->Accept(instruction_visitor);
     }
@@ -254,7 +238,7 @@
 
   // Emit catch stack maps at the end of the stack map stream as expected by the
   // runtime exception handler.
-  if (!is_baseline && graph_->HasTryCatch()) {
+  if (graph_->HasTryCatch()) {
     RecordCatchBlockInfo();
   }
 
@@ -262,14 +246,6 @@
   Finalize(allocator);
 }
 
-void CodeGenerator::CompileOptimized(CodeAllocator* allocator) {
-  // The register allocator already called `InitializeCodeGeneration`,
-  // where the frame size has been computed.
-  DCHECK(block_order_ != nullptr);
-  Initialize();
-  CompileInternal(allocator, /* is_baseline */ false);
-}
-
 void CodeGenerator::Finalize(CodeAllocator* allocator) {
   size_t code_size = GetAssembler()->CodeSize();
   uint8_t* buffer = allocator->Allocate(code_size);
@@ -282,29 +258,6 @@
   // No linker patches by default.
 }
 
-size_t CodeGenerator::FindFreeEntry(bool* array, size_t length) {
-  for (size_t i = 0; i < length; ++i) {
-    if (!array[i]) {
-      array[i] = true;
-      return i;
-    }
-  }
-  LOG(FATAL) << "Could not find a register in baseline register allocator";
-  UNREACHABLE();
-}
-
-size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length) {
-  for (size_t i = 0; i < length - 1; i += 2) {
-    if (!array[i] && !array[i + 1]) {
-      array[i] = true;
-      array[i + 1] = true;
-      return i;
-    }
-  }
-  LOG(FATAL) << "Could not find a register in baseline register allocator";
-  UNREACHABLE();
-}
-
 void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
                                              size_t maximum_number_of_live_core_registers,
                                              size_t maximum_number_of_live_fpu_registers,
@@ -592,123 +545,6 @@
   }
 }
 
-void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const {
-  LocationSummary* locations = instruction->GetLocations();
-  if (locations == nullptr) return;
-
-  for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) {
-    blocked_core_registers_[i] = false;
-  }
-
-  for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) {
-    blocked_fpu_registers_[i] = false;
-  }
-
-  for (size_t i = 0, e = number_of_register_pairs_; i < e; ++i) {
-    blocked_register_pairs_[i] = false;
-  }
-
-  // Mark all fixed input, temp and output registers as used.
-  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
-    BlockIfInRegister(locations->InAt(i));
-  }
-
-  for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) {
-    Location loc = locations->GetTemp(i);
-    BlockIfInRegister(loc);
-  }
-  Location result_location = locations->Out();
-  if (locations->OutputCanOverlapWithInputs()) {
-    BlockIfInRegister(result_location, /* is_out */ true);
-  }
-
-  SetupBlockedRegisters(/* is_baseline */ true);
-
-  // Allocate all unallocated input locations.
-  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
-    Location loc = locations->InAt(i);
-    HInstruction* input = instruction->InputAt(i);
-    if (loc.IsUnallocated()) {
-      if ((loc.GetPolicy() == Location::kRequiresRegister)
-          || (loc.GetPolicy() == Location::kRequiresFpuRegister)) {
-        loc = AllocateFreeRegister(input->GetType());
-      } else {
-        DCHECK_EQ(loc.GetPolicy(), Location::kAny);
-        HLoadLocal* load = input->AsLoadLocal();
-        if (load != nullptr) {
-          loc = GetStackLocation(load);
-        } else {
-          loc = AllocateFreeRegister(input->GetType());
-        }
-      }
-      locations->SetInAt(i, loc);
-    }
-  }
-
-  // Allocate all unallocated temp locations.
-  for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) {
-    Location loc = locations->GetTemp(i);
-    if (loc.IsUnallocated()) {
-      switch (loc.GetPolicy()) {
-        case Location::kRequiresRegister:
-          // Allocate a core register (large enough to fit a 32-bit integer).
-          loc = AllocateFreeRegister(Primitive::kPrimInt);
-          break;
-
-        case Location::kRequiresFpuRegister:
-          // Allocate a core register (large enough to fit a 64-bit double).
-          loc = AllocateFreeRegister(Primitive::kPrimDouble);
-          break;
-
-        default:
-          LOG(FATAL) << "Unexpected policy for temporary location "
-                     << loc.GetPolicy();
-      }
-      locations->SetTempAt(i, loc);
-    }
-  }
-  if (result_location.IsUnallocated()) {
-    switch (result_location.GetPolicy()) {
-      case Location::kAny:
-      case Location::kRequiresRegister:
-      case Location::kRequiresFpuRegister:
-        result_location = AllocateFreeRegister(instruction->GetType());
-        break;
-      case Location::kSameAsFirstInput:
-        result_location = locations->InAt(0);
-        break;
-    }
-    locations->UpdateOut(result_location);
-  }
-}
-
-void CodeGenerator::InitLocationsBaseline(HInstruction* instruction) {
-  AllocateLocations(instruction);
-  if (instruction->GetLocations() == nullptr) {
-    if (instruction->IsTemporary()) {
-      HInstruction* previous = instruction->GetPrevious();
-      Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-      Move(previous, temp_location, instruction);
-    }
-    return;
-  }
-  AllocateRegistersLocally(instruction);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
-    Location location = instruction->GetLocations()->InAt(i);
-    HInstruction* input = instruction->InputAt(i);
-    if (location.IsValid()) {
-      // Move the input to the desired location.
-      if (input->GetNext()->IsTemporary()) {
-        // If the input was stored in a temporary, use that temporary to
-        // perform the move.
-        Move(input->GetNext(), location, instruction);
-      } else {
-        Move(input, location, instruction);
-      }
-    }
-  }
-}
-
 void CodeGenerator::AllocateLocations(HInstruction* instruction) {
   instruction->Accept(GetLocationBuilder());
   DCHECK(CheckTypeConsistency(instruction));
@@ -789,132 +625,6 @@
   }
 }
 
-void CodeGenerator::BuildNativeGCMap(
-    ArenaVector<uint8_t>* data, const CompilerDriver& compiler_driver) const {
-  const std::vector<uint8_t>& gc_map_raw =
-      compiler_driver.GetVerifiedMethod(&GetGraph()->GetDexFile(), GetGraph()->GetMethodIdx())
-          ->GetDexGcMap();
-  verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]);
-
-  uint32_t max_native_offset = stack_map_stream_.ComputeMaxNativePcOffset();
-
-  size_t num_stack_maps = stack_map_stream_.GetNumberOfStackMaps();
-  GcMapBuilder builder(data, num_stack_maps, max_native_offset, dex_gc_map.RegWidth());
-  for (size_t i = 0; i != num_stack_maps; ++i) {
-    const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
-    uint32_t native_offset = stack_map_entry.native_pc_offset;
-    uint32_t dex_pc = stack_map_entry.dex_pc;
-    const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false);
-    CHECK(references != nullptr) << "Missing ref for dex pc 0x" << std::hex << dex_pc;
-    builder.AddEntry(native_offset, references);
-  }
-}
-
-void CodeGenerator::BuildMappingTable(ArenaVector<uint8_t>* data) const {
-  uint32_t pc2dex_data_size = 0u;
-  uint32_t pc2dex_entries = stack_map_stream_.GetNumberOfStackMaps();
-  uint32_t pc2dex_offset = 0u;
-  int32_t pc2dex_dalvik_offset = 0;
-  uint32_t dex2pc_data_size = 0u;
-  uint32_t dex2pc_entries = 0u;
-  uint32_t dex2pc_offset = 0u;
-  int32_t dex2pc_dalvik_offset = 0;
-
-  for (size_t i = 0; i < pc2dex_entries; i++) {
-    const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
-    pc2dex_data_size += UnsignedLeb128Size(stack_map_entry.native_pc_offset - pc2dex_offset);
-    pc2dex_data_size += SignedLeb128Size(stack_map_entry.dex_pc - pc2dex_dalvik_offset);
-    pc2dex_offset = stack_map_entry.native_pc_offset;
-    pc2dex_dalvik_offset = stack_map_entry.dex_pc;
-  }
-
-  // Walk over the blocks and find which ones correspond to catch block entries.
-  for (HBasicBlock* block : graph_->GetBlocks()) {
-    if (block->IsCatchBlock()) {
-      intptr_t native_pc = GetAddressOf(block);
-      ++dex2pc_entries;
-      dex2pc_data_size += UnsignedLeb128Size(native_pc - dex2pc_offset);
-      dex2pc_data_size += SignedLeb128Size(block->GetDexPc() - dex2pc_dalvik_offset);
-      dex2pc_offset = native_pc;
-      dex2pc_dalvik_offset = block->GetDexPc();
-    }
-  }
-
-  uint32_t total_entries = pc2dex_entries + dex2pc_entries;
-  uint32_t hdr_data_size = UnsignedLeb128Size(total_entries) + UnsignedLeb128Size(pc2dex_entries);
-  uint32_t data_size = hdr_data_size + pc2dex_data_size + dex2pc_data_size;
-  data->resize(data_size);
-
-  uint8_t* data_ptr = &(*data)[0];
-  uint8_t* write_pos = data_ptr;
-
-  write_pos = EncodeUnsignedLeb128(write_pos, total_entries);
-  write_pos = EncodeUnsignedLeb128(write_pos, pc2dex_entries);
-  DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size);
-  uint8_t* write_pos2 = write_pos + pc2dex_data_size;
-
-  pc2dex_offset = 0u;
-  pc2dex_dalvik_offset = 0u;
-  dex2pc_offset = 0u;
-  dex2pc_dalvik_offset = 0u;
-
-  for (size_t i = 0; i < pc2dex_entries; i++) {
-    const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
-    DCHECK(pc2dex_offset <= stack_map_entry.native_pc_offset);
-    write_pos = EncodeUnsignedLeb128(write_pos, stack_map_entry.native_pc_offset - pc2dex_offset);
-    write_pos = EncodeSignedLeb128(write_pos, stack_map_entry.dex_pc - pc2dex_dalvik_offset);
-    pc2dex_offset = stack_map_entry.native_pc_offset;
-    pc2dex_dalvik_offset = stack_map_entry.dex_pc;
-  }
-
-  for (HBasicBlock* block : graph_->GetBlocks()) {
-    if (block->IsCatchBlock()) {
-      intptr_t native_pc = GetAddressOf(block);
-      write_pos2 = EncodeUnsignedLeb128(write_pos2, native_pc - dex2pc_offset);
-      write_pos2 = EncodeSignedLeb128(write_pos2, block->GetDexPc() - dex2pc_dalvik_offset);
-      dex2pc_offset = native_pc;
-      dex2pc_dalvik_offset = block->GetDexPc();
-    }
-  }
-
-
-  DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size + pc2dex_data_size);
-  DCHECK_EQ(static_cast<size_t>(write_pos2 - data_ptr), data_size);
-
-  if (kIsDebugBuild) {
-    // Verify the encoded table holds the expected data.
-    MappingTable table(data_ptr);
-    CHECK_EQ(table.TotalSize(), total_entries);
-    CHECK_EQ(table.PcToDexSize(), pc2dex_entries);
-    auto it = table.PcToDexBegin();
-    auto it2 = table.DexToPcBegin();
-    for (size_t i = 0; i < pc2dex_entries; i++) {
-      const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
-      CHECK_EQ(stack_map_entry.native_pc_offset, it.NativePcOffset());
-      CHECK_EQ(stack_map_entry.dex_pc, it.DexPc());
-      ++it;
-    }
-    for (HBasicBlock* block : graph_->GetBlocks()) {
-      if (block->IsCatchBlock()) {
-        CHECK_EQ(GetAddressOf(block), it2.NativePcOffset());
-        CHECK_EQ(block->GetDexPc(), it2.DexPc());
-        ++it2;
-      }
-    }
-    CHECK(it == table.PcToDexEnd());
-    CHECK(it2 == table.DexToPcEnd());
-  }
-}
-
-void CodeGenerator::BuildVMapTable(ArenaVector<uint8_t>* data) const {
-  Leb128Encoder<ArenaVector<uint8_t>> vmap_encoder(data);
-  // We currently don't use callee-saved registers.
-  size_t size = 0 + 1 /* marker */ + 0;
-  vmap_encoder.Reserve(size + 1u);  // All values are likely to be one byte in ULEB128 (<128).
-  vmap_encoder.PushBackUnsigned(size);
-  vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker);
-}
-
 size_t CodeGenerator::ComputeStackMapsSize() {
   return stack_map_stream_.PrepareForFillIn();
 }
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 5958cd8..4f8f146 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -158,10 +158,8 @@
 
 class CodeGenerator {
  public:
-  // Compiles the graph to executable instructions. Returns whether the compilation
-  // succeeded.
-  void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false);
-  void CompileOptimized(CodeAllocator* allocator);
+  // Compiles the graph to executable instructions.
+  void Compile(CodeAllocator* allocator);
   static CodeGenerator* Create(HGraph* graph,
                                InstructionSet instruction_set,
                                const InstructionSetFeatures& isa_features,
@@ -214,7 +212,7 @@
 
   size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
   size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
-  virtual void SetupBlockedRegisters(bool is_baseline) const = 0;
+  virtual void SetupBlockedRegisters() const = 0;
 
   virtual void ComputeSpillMask() {
     core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
@@ -290,17 +288,9 @@
     slow_paths_.push_back(slow_path);
   }
 
-  void BuildMappingTable(ArenaVector<uint8_t>* vector) const;
-  void BuildVMapTable(ArenaVector<uint8_t>* vector) const;
-  void BuildNativeGCMap(
-      ArenaVector<uint8_t>* vector, const CompilerDriver& compiler_driver) const;
   void BuildStackMaps(MemoryRegion region);
   size_t ComputeStackMapsSize();
 
-  bool IsBaseline() const {
-    return is_baseline_;
-  }
-
   bool IsLeafMethod() const {
     return is_leaf_;
   }
@@ -489,7 +479,6 @@
         fpu_callee_save_mask_(fpu_callee_save_mask),
         stack_map_stream_(graph->GetArena()),
         block_order_(nullptr),
-        is_baseline_(false),
         disasm_info_(nullptr),
         stats_(stats),
         graph_(graph),
@@ -502,15 +491,6 @@
     slow_paths_.reserve(8);
   }
 
-  // Register allocation logic.
-  void AllocateRegistersLocally(HInstruction* instruction) const;
-
-  // Backend specific implementation for allocating a register.
-  virtual Location AllocateFreeRegister(Primitive::Type type) const = 0;
-
-  static size_t FindFreeEntry(bool* array, size_t length);
-  static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length);
-
   virtual Location GetStackLocation(HLoadLocal* load) const = 0;
 
   virtual HGraphVisitor* GetLocationBuilder() = 0;
@@ -593,16 +573,11 @@
   // The order to use for code generation.
   const ArenaVector<HBasicBlock*>* block_order_;
 
-  // Whether we are using baseline.
-  bool is_baseline_;
-
   DisassemblyInformation* disasm_info_;
 
  private:
-  void InitLocationsBaseline(HInstruction* instruction);
   size_t GetStackOffsetOfSavedRegister(size_t index);
   void GenerateSlowPaths();
-  void CompileInternal(CodeAllocator* allocator, bool is_baseline);
   void BlockIfInRegister(Location location, bool is_out = false) const;
   void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index a11ceb9..2725792 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -47,9 +47,7 @@
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = R0;
 
-// We unconditionally allocate R5 to ensure we can do long operations
-// with baseline.
-static constexpr Register kCoreSavedRegisterForBaseline = R5;
+static constexpr Register kCoreAlwaysSpillRegister = R5;
 static constexpr Register kCoreCalleeSaves[] =
     { R5, R6, R7, R8, R10, R11, LR };
 static constexpr SRegister kFpuCalleeSaves[] =
@@ -728,6 +726,24 @@
   UNREACHABLE();
 }
 
+inline Condition ARMFPCondition(IfCondition cond, bool gt_bias) {
+  // The ARM condition codes can express all the necessary branches, see the
+  // "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual.
+  // There is no dex instruction or HIR that would need the missing conditions
+  // "equal or unordered" or "not equal".
+  switch (cond) {
+    case kCondEQ: return EQ;
+    case kCondNE: return NE /* unordered */;
+    case kCondLT: return gt_bias ? CC : LT /* unordered */;
+    case kCondLE: return gt_bias ? LS : LE /* unordered */;
+    case kCondGT: return gt_bias ? HI /* unordered */ : GT;
+    case kCondGE: return gt_bias ? CS /* unordered */ : GE;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+  }
+}
+
 void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << Register(reg);
 }
@@ -815,58 +831,7 @@
   CodeGenerator::Finalize(allocator);
 }
 
-Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const {
-  switch (type) {
-    case Primitive::kPrimLong: {
-      size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
-      ArmManagedRegister pair =
-          ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
-
-      blocked_core_registers_[pair.AsRegisterPairLow()] = true;
-      blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
-      UpdateBlockedPairRegisters();
-      return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
-    }
-
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      int reg = FindFreeEntry(blocked_core_registers_, kNumberOfCoreRegisters);
-      // Block all register pairs that contain `reg`.
-      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
-        ArmManagedRegister current =
-            ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
-        if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
-          blocked_register_pairs_[i] = true;
-        }
-      }
-      return Location::RegisterLocation(reg);
-    }
-
-    case Primitive::kPrimFloat: {
-      int reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfSRegisters);
-      return Location::FpuRegisterLocation(reg);
-    }
-
-    case Primitive::kPrimDouble: {
-      int reg = FindTwoFreeConsecutiveAlignedEntries(blocked_fpu_registers_, kNumberOfSRegisters);
-      DCHECK_EQ(reg % 2, 0);
-      return Location::FpuRegisterPairLocation(reg, reg + 1);
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  return Location::NoLocation();
-}
-
-void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorARM::SetupBlockedRegisters() const {
   // Don't allocate the dalvik style register pair passing.
   blocked_register_pairs_[R1_R2] = true;
 
@@ -881,15 +846,7 @@
   // Reserve temp register.
   blocked_core_registers_[IP] = true;
 
-  if (is_baseline) {
-    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-      blocked_core_registers_[kCoreCalleeSaves[i]] = true;
-    }
-
-    blocked_core_registers_[kCoreSavedRegisterForBaseline] = false;
-  }
-
-  if (is_baseline || GetGraph()->IsDebuggable()) {
+  if (GetGraph()->IsDebuggable()) {
     // Stubs do not save callee-save floating point registers. If the graph
     // is debuggable, we need to deal with these registers differently. For
     // now, just block them.
@@ -919,11 +876,10 @@
 
 void CodeGeneratorARM::ComputeSpillMask() {
   core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
-  // Save one extra register for baseline. Note that on thumb2, there is no easy
-  // instruction to restore just the PC, so this actually helps both baseline
-  // and non-baseline to save and restore at least two registers at entry and exit.
-  core_spill_mask_ |= (1 << kCoreSavedRegisterForBaseline);
   DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
+  // There is no easy instruction to restore just the PC on thumb2. We spill and
+  // restore another arbitrary register.
+  core_spill_mask_ |= (1 << kCoreAlwaysSpillRegister);
   fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
   // We use vpush and vpop for saving and restoring floating point registers, which take
   // a SRegister and the number of registers to save/restore after that SRegister. We
@@ -1416,15 +1372,9 @@
 
 void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond,
                                                   Label* true_label,
-                                                  Label* false_label) {
+                                                  Label* false_label ATTRIBUTE_UNUSED) {
   __ vmstat();  // transfer FP status register to ARM APSR.
-  // TODO: merge into a single branch (except "equal or unordered" and "not equal")
-  if (cond->IsFPConditionTrueIfNaN()) {
-    __ b(true_label, VS);  // VS for unordered.
-  } else if (cond->IsFPConditionFalseIfNaN()) {
-    __ b(false_label, VS);  // VS for unordered.
-  }
-  __ b(true_label, ARMCondition(cond->GetCondition()));
+  __ b(true_label, ARMFPCondition(cond->GetCondition(), cond->IsGtBias()));
 }
 
 void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
@@ -1972,9 +1922,9 @@
 }
 
 void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
                                          codegen_->GetAssembler(),
@@ -2004,9 +1954,9 @@
 }
 
 void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -3803,6 +3753,7 @@
 
   Label less, greater, done;
   Primitive::Type type = compare->InputAt(0)->GetType();
+  Condition less_cond;
   switch (type) {
     case Primitive::kPrimLong: {
       __ cmp(left.AsRegisterPairHigh<Register>(),
@@ -3813,6 +3764,7 @@
       __ LoadImmediate(out, 0);
       __ cmp(left.AsRegisterPairLow<Register>(),
              ShifterOperand(right.AsRegisterPairLow<Register>()));  // Unsigned compare.
+      less_cond = LO;
       break;
     }
     case Primitive::kPrimFloat:
@@ -3825,14 +3777,15 @@
                  FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
       }
       __ vmstat();  // transfer FP status register to ARM APSR.
-      __ b(compare->IsGtBias() ? &greater : &less, VS);  // VS for unordered.
+      less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
       break;
     }
     default:
       LOG(FATAL) << "Unexpected compare type " << type;
+      UNREACHABLE();
   }
   __ b(&done, EQ);
-  __ b(&less, LO);  // LO is for both: unsigned compare for longs and 'less than' for floats.
+  __ b(&less, less_cond);
 
   __ Bind(&greater);
   __ LoadImmediate(out, 1);
@@ -5530,7 +5483,7 @@
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck: {
       // Note that we indeed only call on slow path, but we always go
-      // into the slow path for the unresolved & interface check
+      // into the slow path for the unresolved and interface check
       // cases.
       //
       // We cannot directly call the InstanceofNonTrivial runtime
@@ -5740,8 +5693,8 @@
 
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved &
-      // interface check cases.
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
       //
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
@@ -6027,6 +5980,7 @@
           new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root, root);
       codegen_->AddSlowPath(slow_path);
 
+      // IP = Thread::Current()->GetIsGcMarking()
       __ LoadFromOffset(
           kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmWordSize>().Int32Value());
       __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
@@ -6105,11 +6059,8 @@
   //   }
   //
   // Note: the original implementation in ReadBarrier::Barrier is
-  // slightly more complex as:
-  // - it implements the load-load fence using a data dependency on
-  //   the high-bits of rb_state, which are expected to be all zeroes;
-  // - it performs additional checks that we do not do here for
-  //   performance reasons.
+  // slightly more complex as it performs additional checks that we do
+  // not do here for performance reasons.
 
   Register ref_reg = ref.AsRegister<Register>();
   Register temp_reg = temp.AsRegister<Register>();
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 26d6d63..d45ea97 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -340,9 +340,7 @@
     return GetLabelOf(block)->Position();
   }
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
@@ -444,7 +442,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              Register obj,
                                              uint32_t offset,
                                              Location temp,
@@ -452,7 +450,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference array load when Baker's read barriers are used.
   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              Register obj,
                                              uint32_t data_offset,
                                              Location index,
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 6ed2c5a..c0e3959 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -93,6 +93,24 @@
   UNREACHABLE();
 }
 
+inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
+  // The ARM64 condition codes can express all the necessary branches, see the
+  // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
+  // There is no dex instruction or HIR that would need the missing conditions
+  // "equal or unordered" or "not equal".
+  switch (cond) {
+    case kCondEQ: return eq;
+    case kCondNE: return ne /* unordered */;
+    case kCondLT: return gt_bias ? cc : lt /* unordered */;
+    case kCondLE: return gt_bias ? ls : le /* unordered */;
+    case kCondGT: return gt_bias ? hi /* unordered */ : gt;
+    case kCondGE: return gt_bias ? cs /* unordered */ : ge;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+  }
+}
+
 Location ARM64ReturnLocation(Primitive::Type return_type) {
   // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
   // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
@@ -566,6 +584,56 @@
   }
 }
 
+// Slow path marking an object during a read barrier.
+class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location out, Location obj)
+      : instruction_(instruction), out_(out), obj_(obj) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Primitive::Type type = Primitive::kPrimNot;
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsLoadClass() ||
+           instruction_->IsLoadString() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast())
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)), obj_, type);
+    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+    CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
+    arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+ private:
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location obj_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
+};
+
 // Slow path generating a read barrier for a heap reference.
 class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
  public:
@@ -587,7 +655,7 @@
     // to be instrumented, e.g.:
     //
     //   __ Ldr(out, HeapOperand(out, class_offset);
-    //   codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
     //
     // In that case, we have lost the information about the original
     // object, and the emitted read barrier cannot work properly.
@@ -603,7 +671,9 @@
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
     DCHECK(!instruction_->IsInvoke() ||
            (instruction_->IsInvokeStaticOrDirect() &&
-            instruction_->GetLocations()->Intrinsified()));
+            instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier for heap reference slow path: "
+        << instruction_->DebugName();
     // The read barrier instrumentation does not support the
     // HArm64IntermediateAddress instruction yet.
     DCHECK(!(instruction_->IsArrayGet() &&
@@ -751,14 +821,18 @@
 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
  public:
   ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
-      : instruction_(instruction), out_(out), root_(root) {}
+      : instruction_(instruction), out_(out), root_(root) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
     Primitive::Type type = Primitive::kPrimNot;
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
-    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier for GC root slow path: "
+        << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -1094,7 +1168,7 @@
   }
 }
 
-void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorARM64::SetupBlockedRegisters() const {
   // Blocked core registers:
   //      lr        : Runtime reserved.
   //      tr        : Runtime reserved.
@@ -1115,40 +1189,17 @@
     blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().code()] = true;
   }
 
-  if (is_baseline) {
-    CPURegList reserved_core_baseline_registers = callee_saved_core_registers;
-    while (!reserved_core_baseline_registers.IsEmpty()) {
-      blocked_core_registers_[reserved_core_baseline_registers.PopLowestIndex().code()] = true;
-    }
-  }
-
-  if (is_baseline || GetGraph()->IsDebuggable()) {
+  if (GetGraph()->IsDebuggable()) {
     // Stubs do not save callee-save floating point registers. If the graph
     // is debuggable, we need to deal with these registers differently. For
     // now, just block them.
-    CPURegList reserved_fp_baseline_registers = callee_saved_fp_registers;
-    while (!reserved_fp_baseline_registers.IsEmpty()) {
-      blocked_fpu_registers_[reserved_fp_baseline_registers.PopLowestIndex().code()] = true;
+    CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
+    while (!reserved_fp_registers_debuggable.IsEmpty()) {
+      blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().code()] = true;
     }
   }
 }
 
-Location CodeGeneratorARM64::AllocateFreeRegister(Primitive::Type type) const {
-  if (type == Primitive::kPrimVoid) {
-    LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  if (Primitive::IsFloatingPointType(type)) {
-    ssize_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfAllocatableFPRegisters);
-    DCHECK_NE(reg, -1);
-    return Location::FpuRegisterLocation(reg);
-  } else {
-    ssize_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfAllocatableRegisters);
-    DCHECK_NE(reg, -1);
-    return Location::RegisterLocation(reg);
-  }
-}
-
 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
   __ Str(reg, MemOperand(sp, stack_index));
@@ -1343,7 +1394,8 @@
 
 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
                                      CPURegister dst,
-                                     const MemOperand& src) {
+                                     const MemOperand& src,
+                                     bool needs_null_check) {
   MacroAssembler* masm = GetVIXLAssembler();
   BlockPoolsScope block_pools(masm);
   UseScratchRegisterScope temps(masm);
@@ -1359,20 +1411,28 @@
   switch (type) {
     case Primitive::kPrimBoolean:
       __ Ldarb(Register(dst), base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       break;
     case Primitive::kPrimByte:
       __ Ldarb(Register(dst), base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
       break;
     case Primitive::kPrimChar:
       __ Ldarh(Register(dst), base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       break;
     case Primitive::kPrimShort:
       __ Ldarh(Register(dst), base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
       break;
     case Primitive::kPrimInt:
@@ -1380,7 +1440,9 @@
     case Primitive::kPrimLong:
       DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
       __ Ldar(Register(dst), base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       break;
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
@@ -1389,7 +1451,9 @@
 
       Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
       __ Ldar(temp, base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       __ Fmov(FPRegister(dst), temp);
       break;
     }
@@ -1510,7 +1574,7 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
-void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
+void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
   BarrierType type = BarrierAll;
 
   switch (kind) {
@@ -1646,33 +1710,62 @@
 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
                                                    const FieldInfo& field_info) {
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+  LocationSummary* locations = instruction->GetLocations();
+  Location base_loc = locations->InAt(0);
+  Location out = locations->Out();
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
   Primitive::Type field_type = field_info.GetFieldType();
   BlockPoolsScope block_pools(GetVIXLAssembler());
 
   MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
   bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
-  if (field_info.IsVolatile()) {
-    if (use_acquire_release) {
-      // NB: LoadAcquire will record the pc info if needed.
-      codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field);
+  if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // Object FieldGet with Baker's read barrier case.
+    MacroAssembler* masm = GetVIXLAssembler();
+    UseScratchRegisterScope temps(masm);
+    // /* HeapReference<Object> */ out = *(base + offset)
+    Register base = RegisterFrom(base_loc, Primitive::kPrimNot);
+    Register temp = temps.AcquireW();
+    // Note that potential implicit null checks are handled in this
+    // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
+    codegen_->GenerateFieldLoadWithBakerReadBarrier(
+        instruction,
+        out,
+        base,
+        offset,
+        temp,
+        /* needs_null_check */ true,
+        field_info.IsVolatile() && use_acquire_release);
+    if (field_info.IsVolatile() && !use_acquire_release) {
+      // For IRIW sequential consistency kLoadAny is not sufficient.
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+    }
+  } else {
+    // General case.
+    if (field_info.IsVolatile()) {
+      if (use_acquire_release) {
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorARM64::LoadAcquire call.
+        // NB: LoadAcquire will record the pc info if needed.
+        codegen_->LoadAcquire(
+            instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
+      } else {
+        codegen_->Load(field_type, OutputCPURegister(instruction), field);
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        // For IRIW sequential consistency kLoadAny is not sufficient.
+        codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+      }
     } else {
       codegen_->Load(field_type, OutputCPURegister(instruction), field);
       codegen_->MaybeRecordImplicitNullCheck(instruction);
-      // For IRIW sequential consistency kLoadAny is not sufficient.
-      GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
     }
-  } else {
-    codegen_->Load(field_type, OutputCPURegister(instruction), field);
-    codegen_->MaybeRecordImplicitNullCheck(instruction);
-  }
-
-  if (field_type == Primitive::kPrimNot) {
-    LocationSummary* locations = instruction->GetLocations();
-    Location base = locations->InAt(0);
-    Location out = locations->Out();
-    uint32_t offset = field_info.GetFieldOffset().Uint32Value();
-    codegen_->MaybeGenerateReadBarrier(instruction, out, out, base, offset);
+    if (field_type == Primitive::kPrimNot) {
+      // If read barriers are enabled, emit read barriers other than
+      // Baker's using a slow path (and also unpoison the loaded
+      // reference, if heap poisoning is enabled).
+      codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
+    }
   }
 }
 
@@ -1718,10 +1811,10 @@
         codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       } else {
-        GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+        codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
         codegen_->Store(field_type, source, HeapOperand(obj, offset));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
-        GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+        codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
       }
     } else {
       codegen_->Store(field_type, source, HeapOperand(obj, offset));
@@ -2026,50 +2119,62 @@
   LocationSummary* locations = instruction->GetLocations();
   Location index = locations->InAt(1);
   uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value();
-  MemOperand source = HeapOperand(obj);
-  CPURegister dest = OutputCPURegister(instruction);
+  Location out = locations->Out();
 
   MacroAssembler* masm = GetVIXLAssembler();
   UseScratchRegisterScope temps(masm);
   // Block pools between `Load` and `MaybeRecordImplicitNullCheck`.
   BlockPoolsScope block_pools(masm);
 
-  if (index.IsConstant()) {
-    offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
-    source = HeapOperand(obj, offset);
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // Object ArrayGet with Baker's read barrier case.
+    Register temp = temps.AcquireW();
+    // The read barrier instrumentation does not support the
+    // HArm64IntermediateAddress instruction yet.
+    DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress());
+    // Note that a potential implicit null check is handled in the
+    // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
+    codegen_->GenerateArrayLoadWithBakerReadBarrier(
+        instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true);
   } else {
-    Register temp = temps.AcquireSameSizeAs(obj);
-    if (instruction->GetArray()->IsArm64IntermediateAddress()) {
-      // The read barrier instrumentation does not support the
-      // HArm64IntermediateAddress instruction yet.
-      DCHECK(!kEmitCompilerReadBarrier);
-      // We do not need to compute the intermediate address from the array: the
-      // input instruction has done it already. See the comment in
-      // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
-      if (kIsDebugBuild) {
-        HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
-        DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
-      }
-      temp = obj;
-    } else {
-      __ Add(temp, obj, offset);
-    }
-    source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
-  }
-
-  codegen_->Load(type, dest, source);
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
-
-  if (type == Primitive::kPrimNot) {
-    static_assert(
-        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-    Location obj_loc = locations->InAt(0);
-    Location out = locations->Out();
+    // General case.
+    MemOperand source = HeapOperand(obj);
     if (index.IsConstant()) {
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
+      offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
+      source = HeapOperand(obj, offset);
     } else {
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset, index);
+      Register temp = temps.AcquireSameSizeAs(obj);
+      if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+        // The read barrier instrumentation does not support the
+        // HArm64IntermediateAddress instruction yet.
+        DCHECK(!kEmitCompilerReadBarrier);
+        // We do not need to compute the intermediate address from the array: the
+        // input instruction has done it already. See the comment in
+        // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
+        if (kIsDebugBuild) {
+          HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
+          DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
+        }
+        temp = obj;
+      } else {
+        __ Add(temp, obj, offset);
+      }
+      source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
+    }
+
+    codegen_->Load(type, OutputCPURegister(instruction), source);
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
+
+    if (type == Primitive::kPrimNot) {
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+      Location obj_loc = locations->InAt(0);
+      if (index.IsConstant()) {
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset);
+      } else {
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index);
+      }
     }
   }
 }
@@ -2199,12 +2304,12 @@
           //   __ Mov(temp2, temp);
           //   // /* HeapReference<Class> */ temp = temp->component_type_
           //   __ Ldr(temp, HeapOperand(temp, component_offset));
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
           //
           //   // /* HeapReference<Class> */ temp2 = value->klass_
           //   __ Ldr(temp2, HeapOperand(Register(value), class_offset));
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc);
           //
           //   __ Cmp(temp, temp2);
@@ -2381,12 +2486,8 @@
       } else {
         __ Fcmp(left, InputFPRegisterAt(compare, 1));
       }
-      if (compare->IsGtBias()) {
-        __ Cset(result, ne);
-      } else {
-        __ Csetm(result, ne);
-      }
-      __ Cneg(result, result, compare->IsGtBias() ? mi : gt);
+      __ Cset(result, ne);
+      __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
       break;
     }
     default:
@@ -2422,7 +2523,6 @@
   LocationSummary* locations = instruction->GetLocations();
   Register res = RegisterFrom(locations->Out(), instruction->GetType());
   IfCondition if_cond = instruction->GetCondition();
-  Condition arm64_cond = ARM64Condition(if_cond);
 
   if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
     FPRegister lhs = InputFPRegisterAt(instruction, 0);
@@ -2433,20 +2533,13 @@
     } else {
       __ Fcmp(lhs, InputFPRegisterAt(instruction, 1));
     }
-    __ Cset(res, arm64_cond);
-    if (instruction->IsFPConditionTrueIfNaN()) {
-      // res = IsUnordered(arm64_cond) ? 1 : res  <=>  res = IsNotUnordered(arm64_cond) ? res : 1
-      __ Csel(res, res, Operand(1), vc);  // VC for "not unordered".
-    } else if (instruction->IsFPConditionFalseIfNaN()) {
-      // res = IsUnordered(arm64_cond) ? 0 : res  <=>  res = IsNotUnordered(arm64_cond) ? res : 0
-      __ Csel(res, res, Operand(0), vc);  // VC for "not unordered".
-    }
+    __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
   } else {
     // Integer cases.
     Register lhs = InputRegisterAt(instruction, 0);
     Operand rhs = InputOperandAt(instruction, 1);
     __ Cmp(lhs, rhs);
-    __ Cset(res, arm64_cond);
+    __ Cset(res, ARM64Condition(if_cond));
   }
 }
 
@@ -2816,15 +2909,11 @@
       } else {
         __ Fcmp(lhs, InputFPRegisterAt(condition, 1));
       }
-      if (condition->IsFPConditionTrueIfNaN()) {
-        __ B(vs, true_target == nullptr ? &fallthrough_target : true_target);
-      } else if (condition->IsFPConditionFalseIfNaN()) {
-        __ B(vs, false_target == nullptr ? &fallthrough_target : false_target);
-      }
       if (true_target == nullptr) {
-        __ B(ARM64Condition(condition->GetOppositeCondition()), false_target);
+        IfCondition opposite_condition = condition->GetOppositeCondition();
+        __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
       } else {
-        __ B(ARM64Condition(condition->GetCondition()), true_target);
+        __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
       }
     } else {
       // Integer cases.
@@ -2841,7 +2930,8 @@
         non_fallthrough_target = true_target;
       }
 
-      if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
+      if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
+          rhs.IsImmediate() && (rhs.immediate() == 0)) {
         switch (arm64_cond) {
           case eq:
             __ Cbz(lhs, non_fallthrough_target);
@@ -2942,6 +3032,14 @@
   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
 }
 
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+  return kEmitCompilerReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+}
+
 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
@@ -2968,21 +3066,22 @@
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
   // When read barriers are enabled, we need a temporary register for
   // some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
   Register cls = InputRegisterAt(instruction, 1);
   Location out_loc = locations->Out();
   Register out = OutputRegister(instruction);
+  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(0) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -2998,10 +3097,9 @@
   }
 
   // /* HeapReference<Class> */ out = obj->klass_
-  __ Ldr(out, HeapOperand(obj.W(), class_offset));
-  codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
       __ Cmp(out, cls);
       __ Cset(out, eq);
@@ -3016,17 +3114,8 @@
       // object to avoid doing a comparison we know will fail.
       vixl::Label loop, success;
       __ Bind(&loop);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = WRegisterFrom(temp_loc);
-        __ Mov(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ Ldr(out, HeapOperand(out, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Cbz(out, &done);
       __ Cmp(out, cls);
@@ -3044,17 +3133,8 @@
       __ Bind(&loop);
       __ Cmp(out, cls);
       __ B(eq, &success);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = WRegisterFrom(temp_loc);
-        __ Mov(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ Ldr(out, HeapOperand(out, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
       __ Cbnz(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ B(&done);
@@ -3072,17 +3152,8 @@
       __ Cmp(out, cls);
       __ B(eq, &exact_check);
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = WRegisterFrom(temp_loc);
-        __ Mov(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->component_type_
-      __ Ldr(out, HeapOperand(out, component_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Cbz(out, &done);
       __ Ldrh(out, HeapOperand(out, primitive_offset));
@@ -3121,6 +3192,13 @@
       // HInstanceOf instruction (following the runtime calling
       // convention), which might be cluttered by the potential first
       // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       DCHECK(locations->OnlyCallsOnSlowPath());
       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
                                                                       /* is_fatal */ false);
@@ -3173,30 +3251,29 @@
   locations->SetInAt(1, Location::RequiresRegister());
   // Note that TypeCheckSlowPathARM64 uses this "temp" register too.
   locations->AddTemp(Location::RequiresRegister());
-  locations->AddTemp(Location::RequiresRegister());
   // When read barriers are enabled, we need an additional temporary
   // register for some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
-     locations->AddTemp(Location::RequiresRegister());
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
   Register cls = InputRegisterAt(instruction, 1);
   Location temp_loc = locations->GetTemp(0);
+  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(1) :
+      Location::NoLocation();
   Register temp = WRegisterFrom(temp_loc);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
 
-  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   bool is_type_check_slow_path_fatal =
       (type_check_kind == TypeCheckKind::kExactCheck ||
        type_check_kind == TypeCheckKind::kAbstractClassCheck ||
@@ -3215,8 +3292,7 @@
   }
 
   // /* HeapReference<Class> */ temp = obj->klass_
-  __ Ldr(temp, HeapOperand(obj, class_offset));
-  codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
 
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
@@ -3233,18 +3309,8 @@
       // object to avoid doing a comparison we know will fail.
       vixl::Label loop, compare_classes;
       __ Bind(&loop);
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = WRegisterFrom(temp2_loc);
-        __ Mov(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ Ldr(temp, HeapOperand(temp, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // to the `compare_classes` label to compare it with the checked
@@ -3256,8 +3322,8 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ Ldr(temp, HeapOperand(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ B(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&compare_classes);
@@ -3273,18 +3339,8 @@
       __ Cmp(temp, cls);
       __ B(eq, &done);
 
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = WRegisterFrom(temp2_loc);
-        __ Mov(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ Ldr(temp, HeapOperand(temp, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
@@ -3295,8 +3351,8 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ Ldr(temp, HeapOperand(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ B(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -3308,19 +3364,8 @@
       __ B(eq, &done);
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = WRegisterFrom(temp2_loc);
-        __ Mov(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->component_type_
-      __ Ldr(temp, HeapOperand(temp, component_offset));
-      codegen_->MaybeGenerateReadBarrier(
-          instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
 
       // If the component type is not null (i.e. the object is indeed
       // an array), jump to label `check_non_primitive_component_type`
@@ -3333,8 +3378,8 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ Ldr(temp, HeapOperand(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ B(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&check_non_primitive_component_type);
@@ -3343,8 +3388,8 @@
       __ Cbz(temp, &done);
       // Same comment as above regarding `temp` and the slow path.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ Ldr(temp, HeapOperand(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ B(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -3361,6 +3406,13 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       __ B(type_check_slow_path->GetEntryLabel());
       break;
   }
@@ -3462,9 +3514,9 @@
 }
 
 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena());
   if (intrinsic.TryDispatch(invoke)) {
@@ -3486,7 +3538,7 @@
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method ATTRIBUTE_UNUSED) {
-  // On arm64 we support all dispatch types.
+  // On ARM64 we support all dispatch types.
   return desired_dispatch_info;
 }
 
@@ -3712,9 +3764,9 @@
 
 
 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -3763,32 +3815,17 @@
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
-    uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-      __ Add(out.X(), current_method.X(), declaring_class_offset);
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-      __ Ldr(out, MemOperand(current_method, declaring_class_offset));
-    }
+    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+    GenerateGcRootFieldLoad(
+        cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   } else {
     MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize);
     // /* GcRoot<mirror::Class>[] */ out =
     //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
     __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value()));
-
-    size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &out[type_index]
-      __ Add(out.X(), out.X(), cache_offset);
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = out[type_index]
-      __ Ldr(out, MemOperand(out.X(), cache_offset));
-    }
+    // /* GcRoot<mirror::Class> */ out = out[type_index]
+    GenerateGcRootFieldLoad(
+        cls, out_loc, out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
 
     if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
       DCHECK(cls->CanCallRuntime());
@@ -3851,30 +3888,14 @@
   Register out = OutputRegister(load);
   Register current_method = InputRegisterAt(load, 0);
 
-  uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-    __ Add(out.X(), current_method.X(), declaring_class_offset);
-    // /* mirror::Class* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-    __ Ldr(out, MemOperand(current_method, declaring_class_offset));
-  }
-
+  // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+  GenerateGcRootFieldLoad(
+      load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
   __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
-
-  size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::String>* */ out = &out[string_index]
-    __ Add(out.X(), out.X(), cache_offset);
-    // /* mirror::String* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::String> */ out = out[string_index]
-    __ Ldr(out, MemOperand(out.X(), cache_offset));
-  }
+  // /* GcRoot<mirror::String> */ out = out[string_index]
+  GenerateGcRootFieldLoad(
+      load, out_loc, out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex()));
 
   if (!load->IsInDexCache()) {
     SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
@@ -4243,7 +4264,7 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
-  GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+  codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
 }
 
 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
@@ -4628,14 +4649,288 @@
   }
 }
 
-void CodeGeneratorARM64::GenerateReadBarrier(HInstruction* instruction,
-                                             Location out,
-                                             Location ref,
-                                             Location obj,
-                                             uint32_t offset,
-                                             Location index) {
+void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                                                     Location out,
+                                                                     uint32_t offset,
+                                                                     Location maybe_temp) {
+  Primitive::Type type = Primitive::kPrimNot;
+  Register out_reg = RegisterFrom(out, type);
+  if (kEmitCompilerReadBarrier) {
+    Register temp_reg = RegisterFrom(maybe_temp, type);
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(out + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                      out,
+                                                      out_reg,
+                                                      offset,
+                                                      temp_reg,
+                                                      /* needs_null_check */ false,
+                                                      /* use_load_acquire */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // Save the value of `out` into `maybe_temp` before overwriting it
+      // in the following move operation, as we will need it for the
+      // read barrier below.
+      __ Mov(temp_reg, out_reg);
+      // /* HeapReference<Object> */ out = *(out + offset)
+      __ Ldr(out_reg, HeapOperand(out_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    __ Ldr(out_reg, HeapOperand(out_reg, offset));
+    GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                                                      Location out,
+                                                                      Location obj,
+                                                                      uint32_t offset,
+                                                                      Location maybe_temp) {
+  Primitive::Type type = Primitive::kPrimNot;
+  Register out_reg = RegisterFrom(out, type);
+  Register obj_reg = RegisterFrom(obj, type);
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      Register temp_reg = RegisterFrom(maybe_temp, type);
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                      out,
+                                                      obj_reg,
+                                                      offset,
+                                                      temp_reg,
+                                                      /* needs_null_check */ false,
+                                                      /* use_load_acquire */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      __ Ldr(out_reg, HeapOperand(obj_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    __ Ldr(out_reg, HeapOperand(obj_reg, offset));
+    GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instruction,
+                                                            Location root,
+                                                            vixl::Register obj,
+                                                            uint32_t offset) {
+  Register root_reg = RegisterFrom(root, Primitive::kPrimNot);
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+      // Baker's read barrier are used:
+      //
+      //   root = obj.field;
+      //   if (Thread::Current()->GetIsGcMarking()) {
+      //     root = ReadBarrier::Mark(root)
+      //   }
+
+      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+      __ Ldr(root_reg, MemOperand(obj, offset));
+      static_assert(
+          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+          "have different sizes.");
+      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                    "have different sizes.");
+
+      // Slow path used to mark the GC root `root`.
+      SlowPathCodeARM64* slow_path =
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root, root);
+      codegen_->AddSlowPath(slow_path);
+
+      MacroAssembler* masm = GetVIXLAssembler();
+      UseScratchRegisterScope temps(masm);
+      Register temp = temps.AcquireW();
+      // temp = Thread::Current()->GetIsGcMarking()
+      __ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArm64WordSize>().Int32Value()));
+      __ Cbnz(temp, slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+    } else {
+      // GC root loaded through a slow path for read barriers other
+      // than Baker's.
+      // /* GcRoot<mirror::Object>* */ root = obj + offset
+      __ Add(root_reg.X(), obj.X(), offset);
+      // /* mirror::Object* */ root = root->Read()
+      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+    }
+  } else {
+    // Plain GC root load with no read barrier.
+    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+    __ Ldr(root_reg, MemOperand(obj, offset));
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
+  }
+}
+
+void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                               Location ref,
+                                                               vixl::Register obj,
+                                                               uint32_t offset,
+                                                               Register temp,
+                                                               bool needs_null_check,
+                                                               bool use_load_acquire) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref = *(obj + offset)
+  Location no_index = Location::NoLocation();
+  GenerateReferenceLoadWithBakerReadBarrier(
+      instruction, ref, obj, offset, no_index, temp, needs_null_check, use_load_acquire);
+}
+
+void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                               Location ref,
+                                                               vixl::Register obj,
+                                                               uint32_t data_offset,
+                                                               Location index,
+                                                               Register temp,
+                                                               bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // Array cells are never volatile variables, therefore array loads
+  // never use Load-Acquire instructions on ARM64.
+  const bool use_load_acquire = false;
+
+  // /* HeapReference<Object> */ ref =
+  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  GenerateReferenceLoadWithBakerReadBarrier(
+      instruction, ref, obj, data_offset, index, temp, needs_null_check, use_load_acquire);
+}
+
+void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                   Location ref,
+                                                                   vixl::Register obj,
+                                                                   uint32_t offset,
+                                                                   Location index,
+                                                                   Register temp,
+                                                                   bool needs_null_check,
+                                                                   bool use_load_acquire) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+  // If `index` is a valid location, then we are emitting an array
+  // load, so we shouldn't be using a Load Acquire instruction.
+  // In other words: `index.IsValid()` => `!use_load_acquire`.
+  DCHECK(!index.IsValid() || !use_load_acquire);
+
+  MacroAssembler* masm = GetVIXLAssembler();
+  UseScratchRegisterScope temps(masm);
+
+  // In slow path based read barriers, the read barrier call is
+  // inserted after the original load. However, in fast path based
+  // Baker's read barriers, we need to perform the load of
+  // mirror::Object::monitor_ *before* the original reference load.
+  // This load-load ordering is required by the read barrier.
+  // The fast path/slow path (for Baker's algorithm) should look like:
+  //
+  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //   HeapReference<Object> ref = *src;  // Original reference load.
+  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   if (is_gray) {
+  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  //   }
+  //
+  // Note: the original implementation in ReadBarrier::Barrier is
+  // slightly more complex as it performs additional checks that we do
+  // not do here for performance reasons.
+
+  Primitive::Type type = Primitive::kPrimNot;
+  Register ref_reg = RegisterFrom(ref, type);
+  DCHECK(obj.IsW());
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  // /* int32_t */ monitor = obj->monitor_
+  __ Ldr(temp, HeapOperand(obj, monitor_offset));
+  if (needs_null_check) {
+    MaybeRecordImplicitNullCheck(instruction);
+  }
+  // /* LockWord */ lock_word = LockWord(monitor)
+  static_assert(sizeof(LockWord) == sizeof(int32_t),
+                "art::LockWord and int32_t have different sizes.");
+  // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
+  __ Lsr(temp, temp, LockWord::kReadBarrierStateShift);
+  __ And(temp, temp, Operand(LockWord::kReadBarrierStateMask));
+  static_assert(
+      LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
+      "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
+
+  // Introduce a dependency on the high bits of rb_state, which shall
+  // be all zeroes, to prevent load-load reordering, and without using
+  // a memory barrier (which would be more expensive).
+  // temp2 = rb_state & ~LockWord::kReadBarrierStateMask = 0
+  Register temp2 = temps.AcquireW();
+  __ Bic(temp2, temp, Operand(LockWord::kReadBarrierStateMask));
+  // obj is unchanged by this operation, but its value now depends on
+  // temp2, which depends on temp.
+  __ Add(obj, obj, Operand(temp2));
+  temps.Release(temp2);
+
+  // The actual reference load.
+  if (index.IsValid()) {
+    static_assert(
+        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+    temp2 = temps.AcquireW();
+    // /* HeapReference<Object> */ ref =
+    //     *(obj + offset + index * sizeof(HeapReference<Object>))
+    MemOperand source = HeapOperand(obj);
+    if (index.IsConstant()) {
+      uint32_t computed_offset =
+          offset + (Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type));
+      source = HeapOperand(obj, computed_offset);
+    } else {
+      __ Add(temp2, obj, offset);
+      source = HeapOperand(temp2, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
+    }
+    Load(type, ref_reg, source);
+    temps.Release(temp2);
+  } else {
+    // /* HeapReference<Object> */ ref = *(obj + offset)
+    MemOperand field = HeapOperand(obj, offset);
+    if (use_load_acquire) {
+      LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
+    } else {
+      Load(type, ref_reg, field);
+    }
+  }
+
+  // Object* ref = ref_addr->AsMirrorPtr()
+  GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+
+  // Slow path used to mark the object `ref` when it is gray.
+  SlowPathCodeARM64* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref, ref);
+  AddSlowPath(slow_path);
+
+  // if (rb_state == ReadBarrier::gray_ptr_)
+  //   ref = ReadBarrier::Mark(ref);
+  __ Cmp(temp, ReadBarrier::gray_ptr_);
+  __ B(eq, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
+                                                 Location out,
+                                                 Location ref,
+                                                 Location obj,
+                                                 uint32_t offset,
+                                                 Location index) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the reference load.
+  //
   // If heap poisoning is enabled, the unpoisoning of the loaded
   // reference will be carried out by the runtime within the slow
   // path.
@@ -4649,57 +4944,41 @@
       ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
   AddSlowPath(slow_path);
 
-  // TODO: When read barrier has a fast path, add it here.
-  /* Currently the read barrier call is inserted after the original load.
-   * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
-   * original load. This load-load ordering is required by the read barrier.
-   * The fast path/slow path (for Baker's algorithm) should look like:
-   *
-   * bool isGray = obj.LockWord & kReadBarrierMask;
-   * lfence;  // load fence or artificial data dependence to prevent load-load reordering
-   * ref = obj.field;    // this is the original load
-   * if (isGray) {
-   *   ref = Mark(ref);  // ideally the slow path just does Mark(ref)
-   * }
-   */
-
   __ B(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
-void CodeGeneratorARM64::MaybeGenerateReadBarrier(HInstruction* instruction,
-                                                  Location out,
-                                                  Location ref,
-                                                  Location obj,
-                                                  uint32_t offset,
-                                                  Location index) {
+void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                                      Location out,
+                                                      Location ref,
+                                                      Location obj,
+                                                      uint32_t offset,
+                                                      Location index) {
   if (kEmitCompilerReadBarrier) {
+    // Baker's read barriers shall be handled by the fast path
+    // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
+    DCHECK(!kUseBakerReadBarrier);
     // If heap poisoning is enabled, unpoisoning will be taken care of
     // by the runtime within the slow path.
-    GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
   } else if (kPoisonHeapReferences) {
     GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
   }
 }
 
-void CodeGeneratorARM64::GenerateReadBarrierForRoot(HInstruction* instruction,
-                                                    Location out,
-                                                    Location root) {
+void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+                                                        Location out,
+                                                        Location root) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the GC root load.
+  //
   // Note that GC roots are not affected by heap poisoning, so we do
   // not need to do anything special for this here.
   SlowPathCodeARM64* slow_path =
       new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
   AddSlowPath(slow_path);
 
-  // TODO: Implement a fast path for ReadBarrierForRoot, performing
-  // the following operation (for Baker's algorithm):
-  //
-  //   if (thread.tls32_.is_gc_marking) {
-  //     root = Mark(root);
-  //   }
-
   __ B(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index f2ff894..a9d1bbd 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -208,14 +208,53 @@
 
  private:
   void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::Register class_reg);
-  void GenerateMemoryBarrier(MemBarrierKind kind);
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* instr);
+
   void HandleFieldSet(HInstruction* instruction,
                       const FieldInfo& field_info,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleCondition(HCondition* instruction);
+
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a read barrier and
+  // shall be a register in that case; it may be an invalid location
+  // otherwise.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location maybe_temp);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a Baker's (fast
+  // path) read barrier and shall be a register in that case; it may
+  // be an invalid location otherwise.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location maybe_temp);
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers (if any).
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               vixl::Register obj,
+                               uint32_t offset);
+
   void HandleShift(HBinaryOperation* instr);
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
@@ -337,12 +376,11 @@
   // Emit a write barrier.
   void MarkGCCard(vixl::Register object, vixl::Register value, bool value_can_be_null);
 
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   // Register allocation.
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-  // AllocateFreeRegister() is only used when allocating registers locally
-  // during CompileBaseline().
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
@@ -389,9 +427,12 @@
   void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
 
   void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src);
-  void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
-  void LoadAcquire(HInstruction* instruction, vixl::CPURegister dst, const vixl::MemOperand& src);
-  void StoreRelease(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
+  void Store(Primitive::Type type, vixl::CPURegister src, const vixl::MemOperand& dst);
+  void LoadAcquire(HInstruction* instruction,
+                   vixl::CPURegister dst,
+                   const vixl::MemOperand& src,
+                   bool needs_null_check);
+  void StoreRelease(Primitive::Type type, vixl::CPURegister src, const vixl::MemOperand& dst);
 
   // Generate code to invoke a runtime entry point.
   void InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -426,7 +467,27 @@
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
-  // Generate a read barrier for a heap reference within `instruction`.
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             vixl::Register obj,
+                                             uint32_t offset,
+                                             vixl::Register temp,
+                                             bool needs_null_check,
+                                             bool use_load_acquire);
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference array load when Baker's read barriers are used.
+  void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             vixl::Register obj,
+                                             uint32_t data_offset,
+                                             Location index,
+                                             vixl::Register temp,
+                                             bool needs_null_check);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
   //
   // A read barrier for an object reference read from the heap is
   // implemented as a call to the artReadBarrierSlow runtime entry
@@ -443,23 +504,25 @@
   // When `index` is provided (i.e. for array accesses), the offset
   // value passed to artReadBarrierSlow is adjusted to take `index`
   // into account.
-  void GenerateReadBarrier(HInstruction* instruction,
-                           Location out,
-                           Location ref,
-                           Location obj,
-                           uint32_t offset,
-                           Location index = Location::NoLocation());
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
 
-  // If read barriers are enabled, generate a read barrier for a heap reference.
-  // If heap poisoning is enabled, also unpoison the reference in `out`.
-  void MaybeGenerateReadBarrier(HInstruction* instruction,
-                                Location out,
-                                Location ref,
-                                Location obj,
-                                uint32_t offset,
-                                Location index = Location::NoLocation());
+  // If read barriers are enabled, generate a read barrier for a heap
+  // reference using a slow path. If heap poisoning is enabled, also
+  // unpoison the reference in `out`.
+  void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                    Location out,
+                                    Location ref,
+                                    Location obj,
+                                    uint32_t offset,
+                                    Location index = Location::NoLocation());
 
-  // Generate a read barrier for a GC root within `instruction`.
+  // Generate a read barrier for a GC root within `instruction` using
+  // a slow path.
   //
   // A read barrier for an object reference GC root is implemented as
   // a call to the artReadBarrierForRootSlow runtime entry point,
@@ -469,9 +532,20 @@
   //
   // The `out` location contains the value returned by
   // artReadBarrierForRootSlow.
-  void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+  void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
 
  private:
+  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 vixl::Register obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 vixl::Register temp,
+                                                 bool needs_null_check,
+                                                 bool use_load_acquire);
+
   using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>;
   using MethodToLiteralMap = ArenaSafeMap<MethodReference,
                                           vixl::Literal<uint64_t>*,
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index e34767c..5bd136a 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1042,7 +1042,7 @@
   __ Bind(&done);
 }
 
-void CodeGeneratorMIPS::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorMIPS::SetupBlockedRegisters() const {
   // Don't allocate the dalvik style register pair passing.
   blocked_register_pairs_[A1_A2] = true;
 
@@ -1072,16 +1072,6 @@
     blocked_fpu_registers_[i] = true;
   }
 
-  if (is_baseline) {
-    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-      blocked_core_registers_[kCoreCalleeSaves[i]] = true;
-    }
-
-    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
-      blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
-    }
-  }
-
   UpdateBlockedPairRegisters();
 }
 
@@ -1096,52 +1086,6 @@
   }
 }
 
-Location CodeGeneratorMIPS::AllocateFreeRegister(Primitive::Type type) const {
-  switch (type) {
-    case Primitive::kPrimLong: {
-      size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
-      MipsManagedRegister pair =
-          MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
-
-      blocked_core_registers_[pair.AsRegisterPairLow()] = true;
-      blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
-      UpdateBlockedPairRegisters();
-      return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
-    }
-
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      int reg = FindFreeEntry(blocked_core_registers_, kNumberOfCoreRegisters);
-      // Block all register pairs that contain `reg`.
-      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
-        MipsManagedRegister current =
-            MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
-        if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
-          blocked_register_pairs_[i] = true;
-        }
-      }
-      return Location::RegisterLocation(reg);
-    }
-
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      int reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFRegisters);
-      return Location::FpuRegisterLocation(reg);
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  UNREACHABLE();
-}
-
 size_t CodeGeneratorMIPS::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   __ StoreToOffset(kStoreWord, Register(reg_id), SP, stack_index);
   return kMipsWordSize;
@@ -3835,9 +3779,9 @@
 }
 
 void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderMIPS intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -3973,9 +3917,9 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index c3d4851..2cde0ed 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -290,10 +290,7 @@
 
   // Register allocation.
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-  // AllocateFreeRegister() is only used when allocating registers locally
-  // during CompileBaseline().
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 79cd56d..0505486 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -979,7 +979,7 @@
   __ Bind(&done);
 }
 
-void CodeGeneratorMIPS64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const {
+void CodeGeneratorMIPS64::SetupBlockedRegisters() const {
   // ZERO, K0, K1, GP, SP, RA are always reserved and can't be allocated.
   blocked_core_registers_[ZERO] = true;
   blocked_core_registers_[K0] = true;
@@ -1003,8 +1003,7 @@
 
   // TODO: review; anything else?
 
-  // TODO: make these two for's conditional on is_baseline once
-  // all the issues with register saving/restoring are sorted out.
+  // TODO: remove once all the issues with register saving/restoring are sorted out.
   for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
     blocked_core_registers_[kCoreCalleeSaves[i]] = true;
   }
@@ -1014,20 +1013,6 @@
   }
 }
 
-Location CodeGeneratorMIPS64::AllocateFreeRegister(Primitive::Type type) const {
-  if (type == Primitive::kPrimVoid) {
-    LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  if (Primitive::IsFloatingPointType(type)) {
-    size_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFpuRegisters);
-    return Location::FpuRegisterLocation(reg);
-  } else {
-    size_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfGpuRegisters);
-    return Location::RegisterLocation(reg);
-  }
-}
-
 size_t CodeGeneratorMIPS64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   __ StoreToOffset(kStoreDoubleword, GpuRegister(reg_id), SP, stack_index);
   return kMips64WordSize;
@@ -3031,9 +3016,9 @@
 }
 
 void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderMIPS64 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -3182,9 +3167,9 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 7182e8e..140ff95 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -289,10 +289,7 @@
 
   // Register allocation.
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-  // AllocateFreeRegister() is only used when allocating registers locally
-  // during CompileBaseline().
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 6259acd..f7ccdd8 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -817,65 +817,13 @@
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
 
-Location CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type) const {
-  switch (type) {
-    case Primitive::kPrimLong: {
-      size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
-      X86ManagedRegister pair =
-          X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
-      blocked_core_registers_[pair.AsRegisterPairLow()] = true;
-      blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
-      UpdateBlockedPairRegisters();
-      return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
-    }
-
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      Register reg = static_cast<Register>(
-          FindFreeEntry(blocked_core_registers_, kNumberOfCpuRegisters));
-      // Block all register pairs that contain `reg`.
-      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
-        X86ManagedRegister current =
-            X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
-        if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
-          blocked_register_pairs_[i] = true;
-        }
-      }
-      return Location::RegisterLocation(reg);
-    }
-
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      return Location::FpuRegisterLocation(
-          FindFreeEntry(blocked_fpu_registers_, kNumberOfXmmRegisters));
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  return Location::NoLocation();
-}
-
-void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorX86::SetupBlockedRegisters() const {
   // Don't allocate the dalvik style register pair passing.
   blocked_register_pairs_[ECX_EDX] = true;
 
   // Stack register is always reserved.
   blocked_core_registers_[ESP] = true;
 
-  if (is_baseline) {
-    blocked_core_registers_[EBP] = true;
-    blocked_core_registers_[ESI] = true;
-    blocked_core_registers_[EDI] = true;
-  }
-
   UpdateBlockedPairRegisters();
 }
 
@@ -1981,9 +1929,9 @@
 }
 
 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -1999,17 +1947,6 @@
   if (invoke->HasPcRelativeDexCache()) {
     invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
   }
-
-  if (codegen_->IsBaseline()) {
-    // Baseline does not have enough registers if the current method also
-    // needs a register. We therefore do not require a register for it, and let
-    // the code generation of the invoke handle it.
-    LocationSummary* locations = invoke->GetLocations();
-    Location location = locations->InAt(invoke->GetSpecialInputIndex());
-    if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
-      locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation());
-    }
-  }
 }
 
 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
@@ -2022,9 +1959,9 @@
 }
 
 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -4286,7 +4223,7 @@
       if (current_method.IsRegister()) {
         method_reg = current_method.AsRegister<Register>();
       } else {
-        DCHECK(IsBaseline() || invoke->GetLocations()->Intrinsified());
+        DCHECK(invoke->GetLocations()->Intrinsified());
         DCHECK(!current_method.IsValid());
         method_reg = reg;
         __ movl(reg, Address(ESP, kCurrentMethodStackOffset));
@@ -5076,11 +5013,6 @@
 }
 
 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
-  // This location builder might end up asking to up to four registers, which is
-  // not currently possible for baseline. The situation in which we need four
-  // registers cannot be met by baseline though, because it has not run any
-  // optimization.
-
   Primitive::Type value_type = instruction->GetComponentType();
 
   bool needs_write_barrier =
@@ -6077,7 +6009,7 @@
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck: {
       // Note that we indeed only call on slow path, but we always go
-      // into the slow path for the unresolved & interface check
+      // into the slow path for the unresolved and interface check
       // cases.
       //
       // We cannot directly call the InstanceofNonTrivial runtime
@@ -6308,8 +6240,8 @@
 
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved &
-      // interface check cases.
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
       //
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
@@ -6588,6 +6520,8 @@
     // Plain GC root load with no read barrier.
     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
     __ movl(root_reg, Address(obj, offset));
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
   }
 }
 
@@ -6650,7 +6584,9 @@
   // Note: the original implementation in ReadBarrier::Barrier is
   // slightly more complex as:
   // - it implements the load-load fence using a data dependency on
-  //   the high-bits of rb_state, which are expected to be all zeroes;
+  //   the high-bits of rb_state, which are expected to be all zeroes
+  //   (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
+  //   which is a no-op thanks to the x86 memory model);
   // - it performs additional checks that we do not do here for
   //   performance reasons.
 
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index c65c423..43e9543 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -359,9 +359,7 @@
     return GetLabelOf(block)->Position();
   }
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
@@ -453,7 +451,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              Register obj,
                                              uint32_t offset,
                                              Location temp,
@@ -461,7 +459,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference array load when Baker's read barriers are used.
   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              Register obj,
                                              uint32_t data_offset,
                                              Location index,
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index e024ce2..2ce2d91 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1002,47 +1002,12 @@
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
-Location CodeGeneratorX86_64::AllocateFreeRegister(Primitive::Type type) const {
-  switch (type) {
-    case Primitive::kPrimLong:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      size_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfCpuRegisters);
-      return Location::RegisterLocation(reg);
-    }
-
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      size_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFloatRegisters);
-      return Location::FpuRegisterLocation(reg);
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  return Location::NoLocation();
-}
-
-void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorX86_64::SetupBlockedRegisters() const {
   // Stack register is always reserved.
   blocked_core_registers_[RSP] = true;
 
   // Block the register used as TMP.
   blocked_core_registers_[TMP] = true;
-
-  if (is_baseline) {
-    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-      blocked_core_registers_[kCoreCalleeSaves[i]] = true;
-    }
-    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
-      blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
-    }
-  }
 }
 
 static dwarf::Reg DWARFReg(Register reg) {
@@ -2161,9 +2126,9 @@
 }
 
 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -2183,9 +2148,9 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -4698,13 +4663,13 @@
 
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-  bool may_need_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool object_array_set_with_read_barrier =
       kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      (may_need_runtime_call || object_array_set_with_read_barrier) ?
+      (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
           LocationSummary::kCallOnSlowPath :
           LocationSummary::kNoCall);
 
@@ -4733,7 +4698,7 @@
   Location index = locations->InAt(1);
   Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool may_need_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -4785,7 +4750,7 @@
         __ movl(address, Immediate(0));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         DCHECK(!needs_write_barrier);
-        DCHECK(!may_need_runtime_call);
+        DCHECK(!may_need_runtime_call_for_type_check);
         break;
       }
 
@@ -4794,7 +4759,7 @@
       NearLabel done, not_null, do_put;
       SlowPathCode* slow_path = nullptr;
       CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
-      if (may_need_runtime_call) {
+      if (may_need_runtime_call_for_type_check) {
         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
         codegen_->AddSlowPath(slow_path);
         if (instruction->GetValueCanBeNull()) {
@@ -4872,7 +4837,7 @@
       } else {
         __ movl(address, register_value);
       }
-      if (!may_need_runtime_call) {
+      if (!may_need_runtime_call_for_type_check) {
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
 
@@ -5661,7 +5626,7 @@
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck: {
       // Note that we indeed only call on slow path, but we always go
-      // into the slow path for the unresolved & interface check
+      // into the slow path for the unresolved and interface check
       // cases.
       //
       // We cannot directly call the InstanceofNonTrivial runtime
@@ -5892,8 +5857,8 @@
 
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved &
-      // interface check cases.
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
       //
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
@@ -6155,6 +6120,8 @@
     // Plain GC root load with no read barrier.
     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
     __ movl(root_reg, Address(obj, offset));
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
   }
 }
 
@@ -6217,7 +6184,9 @@
   // Note: the original implementation in ReadBarrier::Barrier is
   // slightly more complex as:
   // - it implements the load-load fence using a data dependency on
-  //   the high-bits of rb_state, which are expected to be all zeroes;
+  //   the high-bits of rb_state, which are expected to be all zeroes
+  //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
+  //   here, which is a no-op thanks to the x86-64 memory model);
   // - it performs additional checks that we do not do here for
   //   performance reasons.
 
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 505c9dc..82aabb0 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -347,8 +347,7 @@
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
   void Finalize(CodeAllocator* allocator) OVERRIDE;
@@ -401,7 +400,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              CpuRegister obj,
                                              uint32_t offset,
                                              Location temp,
@@ -409,7 +408,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference array load when Baker's read barriers are used.
   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              CpuRegister obj,
                                              uint32_t data_offset,
                                              Location index,
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index d970704..19d63de 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -40,6 +40,7 @@
 #include "dex_file.h"
 #include "dex_instruction.h"
 #include "driver/compiler_options.h"
+#include "graph_checker.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
 #include "prepare_for_register_allocation.h"
@@ -70,8 +71,8 @@
     AddAllocatedRegister(Location::RegisterLocation(arm::R7));
   }
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE {
-    arm::CodeGeneratorARM::SetupBlockedRegisters(is_baseline);
+  void SetupBlockedRegisters() const OVERRIDE {
+    arm::CodeGeneratorARM::SetupBlockedRegisters();
     blocked_core_registers_[arm::R4] = true;
     blocked_core_registers_[arm::R6] = false;
     blocked_core_registers_[arm::R7] = false;
@@ -90,8 +91,8 @@
     AddAllocatedRegister(Location::RegisterLocation(x86::EDI));
   }
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE {
-    x86::CodeGeneratorX86::SetupBlockedRegisters(is_baseline);
+  void SetupBlockedRegisters() const OVERRIDE {
+    x86::CodeGeneratorX86::SetupBlockedRegisters();
     // ebx is a callee-save register in C, but caller-save for ART.
     blocked_core_registers_[x86::EBX] = true;
     blocked_register_pairs_[x86::EAX_EBX] = true;
@@ -200,259 +201,228 @@
 }
 
 template <typename Expected>
-static void RunCodeBaseline(InstructionSet target_isa,
-                            HGraph* graph,
-                            bool has_result,
-                            Expected expected) {
-  InternalCodeAllocator allocator;
+static void RunCode(CodeGenerator* codegen,
+                    HGraph* graph,
+                    std::function<void(HGraph*)> hook_before_codegen,
+                    bool has_result,
+                    Expected expected) {
+  ASSERT_TRUE(graph->IsInSsaForm());
 
-  CompilerOptions compiler_options;
-  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
-      X86InstructionSetFeatures::FromCppDefines());
-  TestCodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
-  // We avoid doing a stack overflow check that requires the runtime being setup,
-  // by making sure the compiler knows the methods we are running are leaf methods.
-  codegenX86.CompileBaseline(&allocator, true);
-  if (target_isa == kX86) {
-    Run(allocator, codegenX86, has_result, expected);
-  }
+  SSAChecker graph_checker(graph);
+  graph_checker.Run();
+  ASSERT_TRUE(graph_checker.IsValid());
 
-  std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
-      ArmInstructionSetFeatures::FromCppDefines());
-  TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
-  codegenARM.CompileBaseline(&allocator, true);
-  if (target_isa == kArm || target_isa == kThumb2) {
-    Run(allocator, codegenARM, has_result, expected);
-  }
-
-  std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
-      X86_64InstructionSetFeatures::FromCppDefines());
-  x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
-  codegenX86_64.CompileBaseline(&allocator, true);
-  if (target_isa == kX86_64) {
-    Run(allocator, codegenX86_64, has_result, expected);
-  }
-
-  std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
-      Arm64InstructionSetFeatures::FromCppDefines());
-  arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
-  codegenARM64.CompileBaseline(&allocator, true);
-  if (target_isa == kArm64) {
-    Run(allocator, codegenARM64, has_result, expected);
-  }
-
-  std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
-      MipsInstructionSetFeatures::FromCppDefines());
-  mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
-  codegenMIPS.CompileBaseline(&allocator, true);
-  if (kRuntimeISA == kMips) {
-    Run(allocator, codegenMIPS, has_result, expected);
-  }
-
-  std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
-      Mips64InstructionSetFeatures::FromCppDefines());
-  mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
-  codegenMIPS64.CompileBaseline(&allocator, true);
-  if (target_isa == kMips64) {
-    Run(allocator, codegenMIPS64, has_result, expected);
-  }
-}
-
-template <typename Expected>
-static void RunCodeOptimized(CodeGenerator* codegen,
-                             HGraph* graph,
-                             std::function<void(HGraph*)> hook_before_codegen,
-                             bool has_result,
-                             Expected expected) {
-  // Tests may have already computed it.
-  if (graph->GetReversePostOrder().empty()) {
-    graph->BuildDominatorTree();
-  }
   SsaLivenessAnalysis liveness(graph, codegen);
-  liveness.Analyze();
 
-  RegisterAllocator register_allocator(graph->GetArena(), codegen, liveness);
-  register_allocator.AllocateRegisters();
+  PrepareForRegisterAllocation(graph).Run();
+  liveness.Analyze();
+  RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
   hook_before_codegen(graph);
 
   InternalCodeAllocator allocator;
-  codegen->CompileOptimized(&allocator);
+  codegen->Compile(&allocator);
   Run(allocator, *codegen, has_result, expected);
 }
 
 template <typename Expected>
-static void RunCodeOptimized(InstructionSet target_isa,
-                             HGraph* graph,
-                             std::function<void(HGraph*)> hook_before_codegen,
-                             bool has_result,
-                             Expected expected) {
+static void RunCode(InstructionSet target_isa,
+                    HGraph* graph,
+                    std::function<void(HGraph*)> hook_before_codegen,
+                    bool has_result,
+                    Expected expected) {
   CompilerOptions compiler_options;
   if (target_isa == kArm || target_isa == kThumb2) {
     std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
         ArmInstructionSetFeatures::FromCppDefines());
     TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
-    RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenARM, graph, hook_before_codegen, has_result, expected);
   } else if (target_isa == kArm64) {
     std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
         Arm64InstructionSetFeatures::FromCppDefines());
     arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
-    RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenARM64, graph, hook_before_codegen, has_result, expected);
   } else if (target_isa == kX86) {
     std::unique_ptr<const X86InstructionSetFeatures> features_x86(
         X86InstructionSetFeatures::FromCppDefines());
     x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
-    RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenX86, graph, hook_before_codegen, has_result, expected);
   } else if (target_isa == kX86_64) {
     std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
         X86_64InstructionSetFeatures::FromCppDefines());
     x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
-    RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
   } else if (target_isa == kMips) {
     std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
         MipsInstructionSetFeatures::FromCppDefines());
     mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
-    RunCodeOptimized(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
   } else if (target_isa == kMips64) {
     std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
         Mips64InstructionSetFeatures::FromCppDefines());
     mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
-    RunCodeOptimized(&codegenMIPS64, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenMIPS64, graph, hook_before_codegen, has_result, expected);
   }
 }
 
-static void TestCode(InstructionSet target_isa,
-                     const uint16_t* data,
+static ::std::vector<InstructionSet> GetTargetISAs() {
+  ::std::vector<InstructionSet> v;
+  // Add all ISAs that are executable on hardware or on simulator.
+  const ::std::vector<InstructionSet> executable_isa_candidates = {
+    kArm,
+    kArm64,
+    kThumb2,
+    kX86,
+    kX86_64,
+    kMips,
+    kMips64
+  };
+
+  for (auto target_isa : executable_isa_candidates) {
+    if (CanExecute(target_isa)) {
+      v.push_back(target_isa);
+    }
+  }
+
+  return v;
+}
+
+static void TestCode(const uint16_t* data,
                      bool has_result = false,
                      int32_t expected = 0) {
-  ArenaPool pool;
-  ArenaAllocator arena(&pool);
-  HGraph* graph = CreateGraph(&arena);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
-  // Remove suspend checks, they cannot be executed in this context.
-  RemoveSuspendChecks(graph);
-  RunCodeBaseline(target_isa, graph, has_result, expected);
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    HGraph* graph = CreateGraph(&arena);
+    HGraphBuilder builder(graph);
+    const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
+    bool graph_built = builder.BuildGraph(*item);
+    ASSERT_TRUE(graph_built);
+    // Remove suspend checks, they cannot be executed in this context.
+    RemoveSuspendChecks(graph);
+    TransformToSsa(graph);
+    RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected);
+  }
 }
 
-static void TestCodeLong(InstructionSet target_isa,
-                         const uint16_t* data,
+static void TestCodeLong(const uint16_t* data,
                          bool has_result,
                          int64_t expected) {
-  ArenaPool pool;
-  ArenaAllocator arena(&pool);
-  HGraph* graph = CreateGraph(&arena);
-  HGraphBuilder builder(graph, Primitive::kPrimLong);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
-  // Remove suspend checks, they cannot be executed in this context.
-  RemoveSuspendChecks(graph);
-  RunCodeBaseline(target_isa, graph, has_result, expected);
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    HGraph* graph = CreateGraph(&arena);
+    HGraphBuilder builder(graph, Primitive::kPrimLong);
+    const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
+    bool graph_built = builder.BuildGraph(*item);
+    ASSERT_TRUE(graph_built);
+    // Remove suspend checks, they cannot be executed in this context.
+    RemoveSuspendChecks(graph);
+    TransformToSsa(graph);
+    RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected);
+  }
 }
 
-class CodegenTest: public ::testing::TestWithParam<InstructionSet> {};
+class CodegenTest : public CommonCompilerTest {};
 
-TEST_P(CodegenTest, ReturnVoid) {
+TEST_F(CodegenTest, ReturnVoid) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(Instruction::RETURN_VOID);
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, CFG1) {
+TEST_F(CodegenTest, CFG1) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x100,
     Instruction::RETURN_VOID);
 
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, CFG2) {
+TEST_F(CodegenTest, CFG2) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x100,
     Instruction::GOTO | 0x100,
     Instruction::RETURN_VOID);
 
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, CFG3) {
+TEST_F(CodegenTest, CFG3) {
   const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x200,
     Instruction::RETURN_VOID,
     Instruction::GOTO | 0xFF00);
 
-  TestCode(GetParam(), data1);
+  TestCode(data1);
 
   const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO_16, 3,
     Instruction::RETURN_VOID,
     Instruction::GOTO_16, 0xFFFF);
 
-  TestCode(GetParam(), data2);
+  TestCode(data2);
 
   const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO_32, 4, 0,
     Instruction::RETURN_VOID,
     Instruction::GOTO_32, 0xFFFF, 0xFFFF);
 
-  TestCode(GetParam(), data3);
+  TestCode(data3);
 }
 
-TEST_P(CodegenTest, CFG4) {
+TEST_F(CodegenTest, CFG4) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::RETURN_VOID,
     Instruction::GOTO | 0x100,
     Instruction::GOTO | 0xFE00);
 
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, CFG5) {
+TEST_F(CodegenTest, CFG5) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,
     Instruction::GOTO | 0x100,
     Instruction::RETURN_VOID);
 
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, IntConstant) {
+TEST_F(CodegenTest, IntConstant) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN_VOID);
 
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, Return1) {
+TEST_F(CodegenTest, Return1) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN | 0);
 
-  TestCode(GetParam(), data, true, 0);
+  TestCode(data, true, 0);
 }
 
-TEST_P(CodegenTest, Return2) {
+TEST_F(CodegenTest, Return2) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 0 | 1 << 8,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(GetParam(), data, true, 0);
+  TestCode(data, true, 0);
 }
 
-TEST_P(CodegenTest, Return3) {
+TEST_F(CodegenTest, Return3) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(GetParam(), data, true, 1);
+  TestCode(data, true, 1);
 }
 
-TEST_P(CodegenTest, ReturnIf1) {
+TEST_F(CodegenTest, ReturnIf1) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
@@ -460,10 +430,10 @@
     Instruction::RETURN | 0 << 8,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(GetParam(), data, true, 1);
+  TestCode(data, true, 1);
 }
 
-TEST_P(CodegenTest, ReturnIf2) {
+TEST_F(CodegenTest, ReturnIf2) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
@@ -471,12 +441,12 @@
     Instruction::RETURN | 0 << 8,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(GetParam(), data, true, 0);
+  TestCode(data, true, 0);
 }
 
 // Exercise bit-wise (one's complement) not-int instruction.
 #define NOT_INT_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \
-TEST_P(CodegenTest, TEST_NAME) {                        \
+TEST_F(CodegenTest, TEST_NAME) {                        \
   const int32_t input = INPUT;                          \
   const uint16_t input_lo = Low16Bits(input);           \
   const uint16_t input_hi = High16Bits(input);          \
@@ -485,7 +455,7 @@
       Instruction::NOT_INT | 1 << 8 | 0 << 12 ,         \
       Instruction::RETURN | 1 << 8);                    \
                                                         \
-  TestCode(GetParam(), data, true, EXPECTED_OUTPUT);    \
+  TestCode(data, true, EXPECTED_OUTPUT);                \
 }
 
 NOT_INT_TEST(ReturnNotIntMinus2, -2, 1)
@@ -501,7 +471,7 @@
 
 // Exercise bit-wise (one's complement) not-long instruction.
 #define NOT_LONG_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT)                 \
-TEST_P(CodegenTest, TEST_NAME) {                                         \
+TEST_F(CodegenTest, TEST_NAME) {                                         \
   const int64_t input = INPUT;                                           \
   const uint16_t word0 = Low16Bits(Low32Bits(input));   /* LSW. */       \
   const uint16_t word1 = High16Bits(Low32Bits(input));                   \
@@ -512,7 +482,7 @@
       Instruction::NOT_LONG | 2 << 8 | 0 << 12,                          \
       Instruction::RETURN_WIDE | 2 << 8);                                \
                                                                          \
-  TestCodeLong(GetParam(), data, true, EXPECTED_OUTPUT);                 \
+  TestCodeLong(data, true, EXPECTED_OUTPUT);                             \
 }
 
 NOT_LONG_TEST(ReturnNotLongMinus2, INT64_C(-2), INT64_C(1))
@@ -551,7 +521,7 @@
 
 #undef NOT_LONG_TEST
 
-TEST_P(CodegenTest, IntToLongOfLongToInt) {
+TEST_F(CodegenTest, IntToLongOfLongToInt) {
   const int64_t input = INT64_C(4294967296);             // 2^32
   const uint16_t word0 = Low16Bits(Low32Bits(input));    // LSW.
   const uint16_t word1 = High16Bits(Low32Bits(input));
@@ -565,192 +535,146 @@
       Instruction::INT_TO_LONG | 2 << 8 | 4 << 12,
       Instruction::RETURN_WIDE | 2 << 8);
 
-  TestCodeLong(GetParam(), data, true, 1);
+  TestCodeLong(data, true, 1);
 }
 
-TEST_P(CodegenTest, ReturnAdd1) {
+TEST_F(CodegenTest, ReturnAdd1) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::ADD_INT, 1 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 7);
+  TestCode(data, true, 7);
 }
 
-TEST_P(CodegenTest, ReturnAdd2) {
+TEST_F(CodegenTest, ReturnAdd2) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::ADD_INT_2ADDR | 1 << 12,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 7);
+  TestCode(data, true, 7);
 }
 
-TEST_P(CodegenTest, ReturnAdd3) {
+TEST_F(CodegenTest, ReturnAdd3) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::ADD_INT_LIT8, 3 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 7);
+  TestCode(data, true, 7);
 }
 
-TEST_P(CodegenTest, ReturnAdd4) {
+TEST_F(CodegenTest, ReturnAdd4) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::ADD_INT_LIT16, 3,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 7);
+  TestCode(data, true, 7);
 }
 
-TEST_P(CodegenTest, NonMaterializedCondition) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-
-  HGraph* graph = CreateGraph(&allocator);
-  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(entry);
-  graph->SetEntryBlock(entry);
-  entry->AddInstruction(new (&allocator) HGoto());
-
-  HBasicBlock* first_block = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(first_block);
-  entry->AddSuccessor(first_block);
-  HIntConstant* constant0 = graph->GetIntConstant(0);
-  HIntConstant* constant1 = graph->GetIntConstant(1);
-  HEqual* equal = new (&allocator) HEqual(constant0, constant0);
-  first_block->AddInstruction(equal);
-  first_block->AddInstruction(new (&allocator) HIf(equal));
-
-  HBasicBlock* then = new (&allocator) HBasicBlock(graph);
-  HBasicBlock* else_ = new (&allocator) HBasicBlock(graph);
-  HBasicBlock* exit = new (&allocator) HBasicBlock(graph);
-
-  graph->AddBlock(then);
-  graph->AddBlock(else_);
-  graph->AddBlock(exit);
-  first_block->AddSuccessor(then);
-  first_block->AddSuccessor(else_);
-  then->AddSuccessor(exit);
-  else_->AddSuccessor(exit);
-
-  exit->AddInstruction(new (&allocator) HExit());
-  then->AddInstruction(new (&allocator) HReturn(constant0));
-  else_->AddInstruction(new (&allocator) HReturn(constant1));
-
-  ASSERT_TRUE(equal->NeedsMaterialization());
-  graph->BuildDominatorTree();
-  PrepareForRegisterAllocation(graph).Run();
-  ASSERT_FALSE(equal->NeedsMaterialization());
-
-  auto hook_before_codegen = [](HGraph* graph_in) {
-    HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
-    HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
-    block->InsertInstructionBefore(move, block->GetLastInstruction());
-  };
-
-  RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, 0);
-}
-
-TEST_P(CodegenTest, ReturnMulInt) {
+TEST_F(CodegenTest, ReturnMulInt) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::MUL_INT, 1 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 12);
+  TestCode(data, true, 12);
 }
 
-TEST_P(CodegenTest, ReturnMulInt2addr) {
+TEST_F(CodegenTest, ReturnMulInt2addr) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::MUL_INT_2ADDR | 1 << 12,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 12);
+  TestCode(data, true, 12);
 }
 
-TEST_P(CodegenTest, ReturnMulLong) {
+TEST_F(CodegenTest, ReturnMulLong) {
   const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM(
-    Instruction::CONST_4 | 3 << 12 | 0,
-    Instruction::CONST_4 | 0 << 12 | 1 << 8,
-    Instruction::CONST_4 | 4 << 12 | 2 << 8,
-    Instruction::CONST_4 | 0 << 12 | 3 << 8,
+    Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0,
+    Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0,
     Instruction::MUL_LONG, 2 << 8 | 0,
     Instruction::RETURN_WIDE);
 
-  TestCodeLong(GetParam(), data, true, 12);
+  TestCodeLong(data, true, 12);
 }
 
-TEST_P(CodegenTest, ReturnMulLong2addr) {
+TEST_F(CodegenTest, ReturnMulLong2addr) {
   const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM(
-    Instruction::CONST_4 | 3 << 12 | 0 << 8,
-    Instruction::CONST_4 | 0 << 12 | 1 << 8,
-    Instruction::CONST_4 | 4 << 12 | 2 << 8,
-    Instruction::CONST_4 | 0 << 12 | 3 << 8,
+    Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0,
+    Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0,
     Instruction::MUL_LONG_2ADDR | 2 << 12,
     Instruction::RETURN_WIDE);
 
-  TestCodeLong(GetParam(), data, true, 12);
+  TestCodeLong(data, true, 12);
 }
 
-TEST_P(CodegenTest, ReturnMulIntLit8) {
+TEST_F(CodegenTest, ReturnMulIntLit8) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::MUL_INT_LIT8, 3 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 12);
+  TestCode(data, true, 12);
 }
 
-TEST_P(CodegenTest, ReturnMulIntLit16) {
+TEST_F(CodegenTest, ReturnMulIntLit16) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::MUL_INT_LIT16, 3,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 12);
+  TestCode(data, true, 12);
 }
 
-TEST_P(CodegenTest, MaterializedCondition1) {
-  // Check that condition are materialized correctly. A materialized condition
-  // should yield `1` if it evaluated to true, and `0` otherwise.
-  // We force the materialization of comparisons for different combinations of
-  // inputs and check the results.
-
-  int lhs[] = {1, 2, -1, 2, 0xabc};
-  int rhs[] = {2, 1, 2, -1, 0xabc};
-
-  for (size_t i = 0; i < arraysize(lhs); i++) {
+TEST_F(CodegenTest, NonMaterializedCondition) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
     ArenaPool pool;
     ArenaAllocator allocator(&pool);
+
     HGraph* graph = CreateGraph(&allocator);
+    HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+    graph->AddBlock(entry);
+    graph->SetEntryBlock(entry);
+    entry->AddInstruction(new (&allocator) HGoto());
 
-    HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(entry_block);
-    graph->SetEntryBlock(entry_block);
-    entry_block->AddInstruction(new (&allocator) HGoto());
-    HBasicBlock* code_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(code_block);
+    HBasicBlock* first_block = new (&allocator) HBasicBlock(graph);
+    graph->AddBlock(first_block);
+    entry->AddSuccessor(first_block);
+    HIntConstant* constant0 = graph->GetIntConstant(0);
+    HIntConstant* constant1 = graph->GetIntConstant(1);
+    HEqual* equal = new (&allocator) HEqual(constant0, constant0);
+    first_block->AddInstruction(equal);
+    first_block->AddInstruction(new (&allocator) HIf(equal));
+
+    HBasicBlock* then_block = new (&allocator) HBasicBlock(graph);
+    HBasicBlock* else_block = new (&allocator) HBasicBlock(graph);
     HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(exit_block);
-    exit_block->AddInstruction(new (&allocator) HExit());
-
-    entry_block->AddSuccessor(code_block);
-    code_block->AddSuccessor(exit_block);
     graph->SetExitBlock(exit_block);
 
-    HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
-    HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
-    HLessThan cmp_lt(cst_lhs, cst_rhs);
-    code_block->AddInstruction(&cmp_lt);
-    HReturn ret(&cmp_lt);
-    code_block->AddInstruction(&ret);
+    graph->AddBlock(then_block);
+    graph->AddBlock(else_block);
+    graph->AddBlock(exit_block);
+    first_block->AddSuccessor(then_block);
+    first_block->AddSuccessor(else_block);
+    then_block->AddSuccessor(exit_block);
+    else_block->AddSuccessor(exit_block);
+
+    exit_block->AddInstruction(new (&allocator) HExit());
+    then_block->AddInstruction(new (&allocator) HReturn(constant0));
+    else_block->AddInstruction(new (&allocator) HReturn(constant1));
+
+    ASSERT_TRUE(equal->NeedsMaterialization());
+    TransformToSsa(graph);
+    PrepareForRegisterAllocation(graph).Run();
+    ASSERT_FALSE(equal->NeedsMaterialization());
 
     auto hook_before_codegen = [](HGraph* graph_in) {
       HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
@@ -758,93 +682,143 @@
       block->InsertInstructionBefore(move, block->GetLastInstruction());
     };
 
-    RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+    RunCode(target_isa, graph, hook_before_codegen, true, 0);
   }
 }
 
-TEST_P(CodegenTest, MaterializedCondition2) {
-  // Check that HIf correctly interprets a materialized condition.
-  // We force the materialization of comparisons for different combinations of
-  // inputs. An HIf takes the materialized combination as input and returns a
-  // value that we verify.
+TEST_F(CodegenTest, MaterializedCondition1) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    // Check that condition are materialized correctly. A materialized condition
+    // should yield `1` if it evaluated to true, and `0` otherwise.
+    // We force the materialization of comparisons for different combinations of
 
-  int lhs[] = {1, 2, -1, 2, 0xabc};
-  int rhs[] = {2, 1, 2, -1, 0xabc};
+    // inputs and check the results.
 
+    int lhs[] = {1, 2, -1, 2, 0xabc};
+    int rhs[] = {2, 1, 2, -1, 0xabc};
 
-  for (size_t i = 0; i < arraysize(lhs); i++) {
-    ArenaPool pool;
-    ArenaAllocator allocator(&pool);
-    HGraph* graph = CreateGraph(&allocator);
+    for (size_t i = 0; i < arraysize(lhs); i++) {
+      ArenaPool pool;
+      ArenaAllocator allocator(&pool);
+      HGraph* graph = CreateGraph(&allocator);
 
-    HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(entry_block);
-    graph->SetEntryBlock(entry_block);
-    entry_block->AddInstruction(new (&allocator) HGoto());
+      HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(entry_block);
+      graph->SetEntryBlock(entry_block);
+      entry_block->AddInstruction(new (&allocator) HGoto());
+      HBasicBlock* code_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(code_block);
+      HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(exit_block);
+      exit_block->AddInstruction(new (&allocator) HExit());
 
-    HBasicBlock* if_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(if_block);
-    HBasicBlock* if_true_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(if_true_block);
-    HBasicBlock* if_false_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(if_false_block);
-    HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(exit_block);
-    exit_block->AddInstruction(new (&allocator) HExit());
+      entry_block->AddSuccessor(code_block);
+      code_block->AddSuccessor(exit_block);
+      graph->SetExitBlock(exit_block);
 
-    graph->SetEntryBlock(entry_block);
-    entry_block->AddSuccessor(if_block);
-    if_block->AddSuccessor(if_true_block);
-    if_block->AddSuccessor(if_false_block);
-    if_true_block->AddSuccessor(exit_block);
-    if_false_block->AddSuccessor(exit_block);
-    graph->SetExitBlock(exit_block);
+      HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
+      HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
+      HLessThan cmp_lt(cst_lhs, cst_rhs);
+      code_block->AddInstruction(&cmp_lt);
+      HReturn ret(&cmp_lt);
+      code_block->AddInstruction(&ret);
 
-    HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
-    HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
-    HLessThan cmp_lt(cst_lhs, cst_rhs);
-    if_block->AddInstruction(&cmp_lt);
-    // We insert a temporary to separate the HIf from the HLessThan and force
-    // the materialization of the condition.
-    HTemporary force_materialization(0);
-    if_block->AddInstruction(&force_materialization);
-    HIf if_lt(&cmp_lt);
-    if_block->AddInstruction(&if_lt);
-
-    HIntConstant* cst_lt = graph->GetIntConstant(1);
-    HReturn ret_lt(cst_lt);
-    if_true_block->AddInstruction(&ret_lt);
-    HIntConstant* cst_ge = graph->GetIntConstant(0);
-    HReturn ret_ge(cst_ge);
-    if_false_block->AddInstruction(&ret_ge);
-
-    auto hook_before_codegen = [](HGraph* graph_in) {
-      HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
-      HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
-      block->InsertInstructionBefore(move, block->GetLastInstruction());
-    };
-
-    RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+      TransformToSsa(graph);
+      auto hook_before_codegen = [](HGraph* graph_in) {
+        HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
+        HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
+        block->InsertInstructionBefore(move, block->GetLastInstruction());
+      };
+      RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+    }
   }
 }
 
-TEST_P(CodegenTest, ReturnDivIntLit8) {
+TEST_F(CodegenTest, MaterializedCondition2) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    // Check that HIf correctly interprets a materialized condition.
+    // We force the materialization of comparisons for different combinations of
+    // inputs. An HIf takes the materialized combination as input and returns a
+    // value that we verify.
+
+    int lhs[] = {1, 2, -1, 2, 0xabc};
+    int rhs[] = {2, 1, 2, -1, 0xabc};
+
+
+    for (size_t i = 0; i < arraysize(lhs); i++) {
+      ArenaPool pool;
+      ArenaAllocator allocator(&pool);
+      HGraph* graph = CreateGraph(&allocator);
+
+      HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(entry_block);
+      graph->SetEntryBlock(entry_block);
+      entry_block->AddInstruction(new (&allocator) HGoto());
+
+      HBasicBlock* if_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(if_block);
+      HBasicBlock* if_true_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(if_true_block);
+      HBasicBlock* if_false_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(if_false_block);
+      HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(exit_block);
+      exit_block->AddInstruction(new (&allocator) HExit());
+
+      graph->SetEntryBlock(entry_block);
+      entry_block->AddSuccessor(if_block);
+      if_block->AddSuccessor(if_true_block);
+      if_block->AddSuccessor(if_false_block);
+      if_true_block->AddSuccessor(exit_block);
+      if_false_block->AddSuccessor(exit_block);
+      graph->SetExitBlock(exit_block);
+
+      HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
+      HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
+      HLessThan cmp_lt(cst_lhs, cst_rhs);
+      if_block->AddInstruction(&cmp_lt);
+      // We insert a temporary to separate the HIf from the HLessThan and force
+      // the materialization of the condition.
+      HTemporary force_materialization(0);
+      if_block->AddInstruction(&force_materialization);
+      HIf if_lt(&cmp_lt);
+      if_block->AddInstruction(&if_lt);
+
+      HIntConstant* cst_lt = graph->GetIntConstant(1);
+      HReturn ret_lt(cst_lt);
+      if_true_block->AddInstruction(&ret_lt);
+      HIntConstant* cst_ge = graph->GetIntConstant(0);
+      HReturn ret_ge(cst_ge);
+      if_false_block->AddInstruction(&ret_ge);
+
+      TransformToSsa(graph);
+      auto hook_before_codegen = [](HGraph* graph_in) {
+        HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
+        HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
+        block->InsertInstructionBefore(move, block->GetLastInstruction());
+      };
+      RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+    }
+  }
+}
+
+TEST_F(CodegenTest, ReturnDivIntLit8) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::DIV_INT_LIT8, 3 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 1);
+  TestCode(data, true, 1);
 }
 
-TEST_P(CodegenTest, ReturnDivInt2Addr) {
+TEST_F(CodegenTest, ReturnDivInt2Addr) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0,
     Instruction::CONST_4 | 2 << 12 | 1 << 8,
     Instruction::DIV_INT_2ADDR | 1 << 12,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 2);
+  TestCode(data, true, 2);
 }
 
 // Helper method.
@@ -933,80 +907,55 @@
   block->AddInstruction(comparison);
   block->AddInstruction(new (&allocator) HReturn(comparison));
 
-  auto hook_before_codegen = [](HGraph*) {
-  };
-  RunCodeOptimized(target_isa, graph, hook_before_codegen, true, expected_result);
+  TransformToSsa(graph);
+  RunCode(target_isa, graph, [](HGraph*) {}, true, expected_result);
 }
 
-TEST_P(CodegenTest, ComparisonsInt) {
-  const InstructionSet target_isa = GetParam();
-  for (int64_t i = -1; i <= 1; i++) {
-    for (int64_t j = -1; j <= 1; j++) {
-      TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondB,  i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondA,  i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa);
+TEST_F(CodegenTest, ComparisonsInt) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    for (int64_t i = -1; i <= 1; i++) {
+      for (int64_t j = -1; j <= 1; j++) {
+        TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondB,  i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondA,  i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa);
+      }
     }
   }
 }
 
-TEST_P(CodegenTest, ComparisonsLong) {
+TEST_F(CodegenTest, ComparisonsLong) {
   // TODO: make MIPS work for long
   if (kRuntimeISA == kMips || kRuntimeISA == kMips64) {
     return;
   }
 
-  const InstructionSet target_isa = GetParam();
-  if (target_isa == kMips || target_isa == kMips64) {
-    return;
-  }
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    if (target_isa == kMips || target_isa == kMips64) {
+      continue;
+    }
 
-  for (int64_t i = -1; i <= 1; i++) {
-    for (int64_t j = -1; j <= 1; j++) {
-      TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondB,  i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondA,  i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa);
+    for (int64_t i = -1; i <= 1; i++) {
+      for (int64_t j = -1; j <= 1; j++) {
+        TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondB,  i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondA,  i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa);
+      }
     }
   }
 }
 
-static ::std::vector<InstructionSet> GetTargetISAs() {
-  ::std::vector<InstructionSet> v;
-  // Add all ISAs that are executable on hardware or on simulator.
-  const ::std::vector<InstructionSet> executable_isa_candidates = {
-    kArm,
-    kArm64,
-    kThumb2,
-    kX86,
-    kX86_64,
-    kMips,
-    kMips64
-  };
-
-  for (auto target_isa : executable_isa_candidates) {
-    if (CanExecute(target_isa)) {
-      v.push_back(target_isa);
-    }
-  }
-
-  return v;
-}
-
-INSTANTIATE_TEST_CASE_P(MultipleTargets,
-                        CodegenTest,
-                        ::testing::ValuesIn(GetTargetISAs()));
-
 }  // namespace art
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 86a695b..e170e37 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -89,15 +89,18 @@
 }
 
 void HDeadCodeElimination::RemoveDeadBlocks() {
+  if (graph_->HasIrreducibleLoops()) {
+    // Do not eliminate dead blocks if the graph has irreducible loops. We could
+    // support it, but that would require changes in our loop representation to handle
+    // multiple entry points. We decided it was not worth the complexity.
+    return;
+  }
   // Classify blocks as reachable/unreachable.
   ArenaAllocator* allocator = graph_->GetArena();
   ArenaBitVector live_blocks(allocator, graph_->GetBlocks().size(), false);
 
   MarkReachableBlocks(graph_, &live_blocks);
   bool removed_one_or_more_blocks = false;
-  // If the graph has irreducible loops we need to reset all graph analysis we have done
-  // before: the irreducible loop can be turned into a reducible one.
-  // For simplicity, we do the full computation regardless of the type of the loops.
   bool rerun_dominance_and_loop_analysis = false;
 
   // Remove all dead blocks. Iterate in post order because removal needs the
@@ -105,9 +108,6 @@
   // inside out.
   for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block  = it.Current();
-    if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) {
-      rerun_dominance_and_loop_analysis = true;
-    }
     int id = block->GetBlockId();
     if (!live_blocks.IsBitSet(id)) {
       MaybeRecordDeadBlock(block);
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index 91e4a99..feb8b20 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -133,8 +133,9 @@
 
   const uint32_t dominators[] = {
       kInvalidBlockId,
-      0,
-      kInvalidBlockId
+      3,
+      kInvalidBlockId,
+      0
   };
 
   TestCode(data1, dominators, sizeof(dominators) / sizeof(int));
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 9439ba0..3113677 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -484,6 +484,18 @@
         loop_information->GetPreHeader()->GetSuccessors().size()));
   }
 
+  if (loop_information->GetSuspendCheck() == nullptr) {
+    AddError(StringPrintf(
+        "Loop with header %d does not have a suspend check.",
+        loop_header->GetBlockId()));
+  }
+
+  if (loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) {
+    AddError(StringPrintf(
+        "Loop header %d does not have the loop suspend check as the first instruction.",
+        loop_header->GetBlockId()));
+  }
+
   // Ensure the loop header has only one incoming branch and the remaining
   // predecessors are back edges.
   size_t num_preds = loop_header->GetPredecessors().size();
@@ -589,6 +601,14 @@
     }
   }
 
+  if (instruction->NeedsEnvironment() && !instruction->HasEnvironment()) {
+    AddError(StringPrintf("Instruction %s:%d in block %d requires an environment "
+                          "but does not have one.",
+                          instruction->DebugName(),
+                          instruction->GetId(),
+                          current_block_->GetBlockId()));
+  }
+
   // Ensure an instruction having an environment is dominated by the
   // instructions contained in the environment.
   for (HEnvironment* environment = instruction->GetEnvironment();
diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc
index d4b9b71..d530564 100644
--- a/compiler/optimizing/graph_test.cc
+++ b/compiler/optimizing/graph_test.cc
@@ -164,7 +164,7 @@
 
   // Ensure there is only one back edge.
   ASSERT_EQ(if_block->GetPredecessors().size(), 2u);
-  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block);
+  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block->GetSingleSuccessor());
   ASSERT_NE(if_block->GetPredecessors()[1], if_block);
 
   // Ensure the new block is the back edge.
@@ -199,7 +199,7 @@
 
   // Ensure there is only one back edge.
   ASSERT_EQ(if_block->GetPredecessors().size(), 2u);
-  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block);
+  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block->GetSingleSuccessor());
   ASSERT_NE(if_block->GetPredecessors()[1], if_block);
 
   // Ensure the new block is the back edge.
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 293282e..2e79df1 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -356,12 +356,12 @@
       compare, invoke_instruction->GetDexPc());
   // TODO: Extend reference type propagation to understand the guard.
   if (cursor != nullptr) {
-    bb_cursor->InsertInstructionAfter(load_class, cursor);
+    bb_cursor->InsertInstructionAfter(field_get, cursor);
   } else {
-    bb_cursor->InsertInstructionBefore(load_class, bb_cursor->GetFirstInstruction());
+    bb_cursor->InsertInstructionBefore(field_get, bb_cursor->GetFirstInstruction());
   }
-  bb_cursor->InsertInstructionAfter(field_get, load_class);
-  bb_cursor->InsertInstructionAfter(compare, field_get);
+  bb_cursor->InsertInstructionAfter(load_class, field_get);
+  bb_cursor->InsertInstructionAfter(compare, load_class);
   bb_cursor->InsertInstructionAfter(deoptimize, compare);
   deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
 
@@ -419,7 +419,10 @@
   size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
   if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
     VLOG(compiler) << "Method " << PrettyMethod(method)
-                   << " is too big to inline";
+                   << " is too big to inline: "
+                   << code_item->insns_size_in_code_units_
+                   << " > "
+                   << inline_max_code_units;
     return false;
   }
 
@@ -639,9 +642,12 @@
 
   for (; !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
-    if (block->IsLoopHeader()) {
+
+    if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) {
+      // Don't inline methods with irreducible loops, they could prevent some
+      // optimizations to run.
       VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
-                     << " could not be inlined because it contains a loop";
+                     << " could not be inlined because it contains an irreducible loop";
       return false;
     }
 
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index c6da9a3..5caf077 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -176,6 +176,16 @@
       }
 
     // Misc data processing.
+    case kIntrinsicBitCount:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::kPrimInt:
+          return Intrinsics::kIntegerBitCount;
+        case Primitive::kPrimLong:
+          return Intrinsics::kLongBitCount;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
     case kIntrinsicNumberOfLeadingZeros:
       switch (GetType(method.d.data, true)) {
         case Primitive::kPrimInt:
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 9f50d18..3bf3f7f 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -85,9 +85,9 @@
                             InvokeDexCallingConventionVisitor* calling_convention_visitor) {
     if (kIsDebugBuild && invoke->IsInvokeStaticOrDirect()) {
       HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
-      // When we do not run baseline, explicit clinit checks triggered by static
-      // invokes must have been pruned by art::PrepareForRegisterAllocation.
-      DCHECK(codegen->IsBaseline() || !invoke_static_or_direct->IsStaticWithExplicitClinitCheck());
+      // Explicit clinit checks triggered by static invokes must have been
+      // pruned by art::PrepareForRegisterAllocation.
+      DCHECK(!invoke_static_or_direct->IsStaticWithExplicitClinitCheck());
     }
 
     if (invoke->GetNumberOfArguments() == 0) {
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index b1fbf28..e72f927 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1577,10 +1577,12 @@
 void IntrinsicCodeGeneratorARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
 UNIMPLEMENTED_INTRINSIC(IntegerReverse)
 UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
 UNIMPLEMENTED_INTRINSIC(LongReverse)
 UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 81cab86..8cf2d4f 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -752,21 +752,33 @@
   Register trg = RegisterFrom(trg_loc, type);
   bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
 
-  MemOperand mem_op(base.X(), offset);
-  if (is_volatile) {
-    if (use_acquire_release) {
-      codegen->LoadAcquire(invoke, trg, mem_op);
-    } else {
-      codegen->Load(type, trg, mem_op);
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
+    UseScratchRegisterScope temps(masm);
+    Register temp = temps.AcquireW();
+    codegen->GenerateArrayLoadWithBakerReadBarrier(
+        invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+    if (is_volatile && !use_acquire_release) {
       __ Dmb(InnerShareable, BarrierReads);
     }
   } else {
-    codegen->Load(type, trg, mem_op);
-  }
+    // Other cases.
+    MemOperand mem_op(base.X(), offset);
+    if (is_volatile) {
+      if (use_acquire_release) {
+        codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true);
+      } else {
+        codegen->Load(type, trg, mem_op);
+        __ Dmb(InnerShareable, BarrierReads);
+      }
+    } else {
+      codegen->Load(type, trg, mem_op);
+    }
 
-  if (type == Primitive::kPrimNot) {
-    DCHECK(trg.IsW());
-    codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+    if (type == Primitive::kPrimNot) {
+      DCHECK(trg.IsW());
+      codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+    }
   }
 }
 
@@ -1026,10 +1038,15 @@
   vixl::Label loop_head, exit_loop;
   if (use_acquire_release) {
     __ Bind(&loop_head);
-    __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
-    // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`?
+    // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
+    // the reference stored in the object before attempting the CAS,
+    // similar to the one in the art::Unsafe_compareAndSwapObject JNI
+    // implementation.
+    //
     // Note that this code is not (yet) used when read barriers are
     // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
+    DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
+    __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
     __ Cmp(tmp_value, expected);
     __ B(&exit_loop, ne);
     __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
@@ -1447,8 +1464,10 @@
 void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
 UNIMPLEMENTED_INTRINSIC(LongRotateRight)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index 2e87546..ea38034 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -28,12 +28,14 @@
   V(FloatIntBitsToFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerRotateLeft, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(LongReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(LongReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(LongNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(LongNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(LongRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index bc126a2..81112b1 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -935,6 +935,9 @@
 void IntrinsicCodeGeneratorMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
+
 UNIMPLEMENTED_INTRINSIC(MathAbsDouble)
 UNIMPLEMENTED_INTRINSIC(MathAbsFloat)
 UNIMPLEMENTED_INTRINSIC(MathAbsInt)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 8b45ea7..ac969e3 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1724,6 +1724,9 @@
 void IntrinsicCodeGeneratorMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
+
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
 
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 677f2e9..e48bed5 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2303,6 +2303,81 @@
   SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
 }
 
+static void CreateBitCountLocations(
+    ArenaAllocator* arena, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
+  if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
+    // Do nothing if there is no popcnt support. This results in generating
+    // a call for the intrinsic rather than direct code.
+    return;
+  }
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  if (is_long) {
+    locations->SetInAt(0, Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  } else {
+    locations->SetInAt(0, Location::Any());
+  }
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void GenBitCount(X86Assembler* assembler, HInvoke* invoke, bool is_long) {
+  LocationSummary* locations = invoke->GetLocations();
+  Location src = locations->InAt(0);
+  Register out = locations->Out().AsRegister<Register>();
+
+  if (invoke->InputAt(0)->IsConstant()) {
+    // Evaluate this at compile time.
+    int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
+    value = is_long
+        ? POPCOUNT(static_cast<uint64_t>(value))
+        : POPCOUNT(static_cast<uint32_t>(value));
+    if (value == 0) {
+      __ xorl(out, out);
+    } else {
+      __ movl(out, Immediate(value));
+    }
+    return;
+  }
+
+  // Handle the non-constant cases.
+  if (!is_long) {
+    if (src.IsRegister()) {
+      __ popcntl(out, src.AsRegister<Register>());
+    } else {
+      DCHECK(src.IsStackSlot());
+      __ popcntl(out, Address(ESP, src.GetStackIndex()));
+    }
+    return;
+  }
+
+  // The 64-bit case needs to worry about both parts of the register.
+  DCHECK(src.IsRegisterPair());
+  Register src_lo = src.AsRegisterPairLow<Register>();
+  Register src_hi = src.AsRegisterPairHigh<Register>();
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
+  __ popcntl(temp, src_lo);
+  __ popcntl(out, src_hi);
+  __ addl(out, temp);
+}
+
+void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ false);
+}
+
+void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(GetAssembler(), invoke, /* is_long */ false);
+}
+
+void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
+  CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ true);
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(GetAssembler(), invoke, /* is_long */ true);
+}
+
 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 690cf3d..23a628f 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2368,6 +2368,70 @@
   SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
 }
 
+static void CreateBitCountLocations(
+    ArenaAllocator* arena, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
+  if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
+    // Do nothing if there is no popcnt support. This results in generating
+    // a call for the intrinsic rather than direct code.
+    return;
+  }
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::Any());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void GenBitCount(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) {
+  LocationSummary* locations = invoke->GetLocations();
+  Location src = locations->InAt(0);
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+  if (invoke->InputAt(0)->IsConstant()) {
+    // Evaluate this at compile time.
+    int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
+    value = is_long
+        ? POPCOUNT(static_cast<uint64_t>(value))
+        : POPCOUNT(static_cast<uint32_t>(value));
+    if (value == 0) {
+      __ xorl(out, out);
+    } else {
+      __ movl(out, Immediate(value));
+    }
+    return;
+  }
+
+  if (src.IsRegister()) {
+    if (is_long) {
+      __ popcntq(out, src.AsRegister<CpuRegister>());
+    } else {
+      __ popcntl(out, src.AsRegister<CpuRegister>());
+    }
+  } else if (is_long) {
+    DCHECK(src.IsDoubleStackSlot());
+    __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
+  } else {
+    DCHECK(src.IsStackSlot());
+    __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateBitCountLocations(arena_, codegen_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(GetAssembler(), invoke, /* is_long */ false);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
+  CreateBitCountLocations(arena_, codegen_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(GetAssembler(), invoke, /* is_long */ true);
+}
+
 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 854d92a..adf8734 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -167,11 +167,7 @@
 void HGraph::ClearLoopInformation() {
   SetHasIrreducibleLoops(false);
   for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
-    HBasicBlock* current = it.Current();
-    if (current->IsLoopHeader()) {
-      current->RemoveInstruction(current->GetLoopInformation()->GetSuspendCheck());
-    }
-    current->SetLoopInformation(nullptr);
+    it.Current()->SetLoopInformation(nullptr);
   }
 }
 
@@ -180,6 +176,14 @@
   dominator_ = nullptr;
 }
 
+HInstruction* HBasicBlock::GetFirstInstructionDisregardMoves() const {
+  HInstruction* instruction = GetFirstInstruction();
+  while (instruction->IsParallelMove()) {
+    instruction = instruction->GetNext();
+  }
+  return instruction;
+}
+
 void HGraph::ComputeDominanceInformation() {
   DCHECK(reverse_post_order_.empty());
   reverse_post_order_.reserve(blocks_.size());
@@ -284,9 +288,10 @@
 
   // Make sure the loop has only one pre header. This simplifies SSA building by having
   // to just look at the pre header to know which locals are initialized at entry of the
-  // loop.
+  // loop. Also, don't allow the entry block to be a pre header: this simplifies inlining
+  // this graph.
   size_t number_of_incomings = header->GetPredecessors().size() - info->NumberOfBackEdges();
-  if (number_of_incomings != 1) {
+  if (number_of_incomings != 1 || (GetEntryBlock()->GetSingleSuccessor() == header)) {
     HBasicBlock* pre_header = new (arena_) HBasicBlock(this, header->GetDexPc());
     AddBlock(pre_header);
     pre_header->AddInstruction(new (arena_) HGoto(header->GetDexPc()));
@@ -457,6 +462,10 @@
     }
     if (block->IsLoopHeader()) {
       SimplifyLoop(block);
+    } else if (!block->IsEntryBlock() && block->GetFirstInstruction()->IsSuspendCheck()) {
+      // We are being called by the dead code elimiation pass, and what used to be
+      // a loop got dismantled. Just remove the suspend check.
+      block->RemoveInstruction(block->GetFirstInstruction());
     }
   }
 }
@@ -1829,6 +1838,7 @@
     DCHECK(GetBlocks()[0]->IsEntryBlock());
     DCHECK(GetBlocks()[2]->IsExitBlock());
     DCHECK(!body->IsExitBlock());
+    DCHECK(!body->IsInLoop());
     HInstruction* last = body->GetLastInstruction();
 
     invoke->GetBlock()->instructions_.AddAfter(invoke, body->GetInstructions());
@@ -1887,7 +1897,7 @@
     // Update the meta information surrounding blocks:
     // (1) the graph they are now in,
     // (2) the reverse post order of that graph,
-    // (3) the potential loop information they are now in,
+    // (3) their potential loop information, inner and outer,
     // (4) try block membership.
     // Note that we do not need to update catch phi inputs because they
     // correspond to the register file of the outer method which the inlinee
@@ -1916,15 +1926,24 @@
     for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
       HBasicBlock* current = it.Current();
       if (current != exit_block_ && current != entry_block_ && current != first) {
-        DCHECK(!current->IsInLoop());
         DCHECK(current->GetTryCatchInformation() == nullptr);
         DCHECK(current->GetGraph() == this);
         current->SetGraph(outer_graph);
         outer_graph->AddBlock(current);
         outer_graph->reverse_post_order_[++index_of_at] = current;
-        if (loop_info != nullptr) {
+        if (!current->IsInLoop()) {
           current->SetLoopInformation(loop_info);
-          for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) {
+        } else if (current->IsLoopHeader()) {
+          // Clear the information of which blocks are contained in that loop. Since the
+          // information is stored as a bit vector based on block ids, we have to update
+          // it, as those block ids were specific to the callee graph and we are now adding
+          // these blocks to the caller graph.
+          current->GetLoopInformation()->ClearAllBlocks();
+        }
+        if (current->IsInLoop()) {
+          for (HLoopInformationOutwardIterator loop_it(*current);
+               !loop_it.Done();
+               loop_it.Advance()) {
             loop_it.Current()->Add(current);
           }
         }
@@ -1937,7 +1956,9 @@
     outer_graph->AddBlock(to);
     outer_graph->reverse_post_order_[++index_of_at] = to;
     if (loop_info != nullptr) {
-      to->SetLoopInformation(loop_info);
+      if (!to->IsInLoop()) {
+        to->SetLoopInformation(loop_info);
+      }
       for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) {
         loop_it.Current()->Add(to);
       }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 859d570..5246fd1 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -689,6 +689,10 @@
   void Add(HBasicBlock* block);
   void Remove(HBasicBlock* block);
 
+  void ClearAllBlocks() {
+    blocks_.ClearAllBits();
+  }
+
  private:
   // Internal recursive implementation of `Populate`.
   void PopulateRecursive(HBasicBlock* block);
@@ -860,6 +864,8 @@
   HInstruction* GetLastPhi() const { return phis_.last_instruction_; }
   const HInstructionList& GetPhis() const { return phis_; }
 
+  HInstruction* GetFirstInstructionDisregardMoves() const;
+
   void AddSuccessor(HBasicBlock* block) {
     successors_.push_back(block);
     block->predecessors_.push_back(this);
@@ -3687,19 +3693,13 @@
     DCHECK(!IsStaticWithExplicitClinitCheck());
   }
 
-  HNewInstance* GetThisArgumentOfStringInit() const {
+  HInstruction* GetAndRemoveThisArgumentOfStringInit() {
     DCHECK(IsStringInit());
     size_t index = InputCount() - 1;
-    DCHECK(InputAt(index)->IsNewInstance());
-    return InputAt(index)->AsNewInstance();
-  }
-
-  void RemoveThisArgumentOfStringInit() {
-    DCHECK(IsStringInit());
-    size_t index = InputCount() - 1;
-    DCHECK(InputAt(index)->IsNewInstance());
+    HInstruction* input = InputAt(index);
     RemoveAsUserOfInput(index);
     inputs_.pop_back();
+    return input;
   }
 
   // Is this a call to a static method whose declaring class has an
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index bb840ea..fffd005 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -127,7 +127,7 @@
         timing_logger_enabled_(compiler_driver->GetDumpPasses()),
         timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true),
         disasm_info_(graph->GetArena()),
-        visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()),
+        visualizer_enabled_(!compiler_driver->GetCompilerOptions().GetDumpCfgFileName().empty()),
         visualizer_(visualizer_output, graph, *codegen),
         graph_in_bad_state_(false) {
     if (timing_logger_enabled_ || visualizer_enabled_) {
@@ -305,30 +305,19 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
-  // Whether we should run any optimization or register allocation. If false, will
-  // just run the code generation after the graph was built.
-  const bool run_optimizations_;
-
   // Create a 'CompiledMethod' for an optimized graph.
-  CompiledMethod* EmitOptimized(ArenaAllocator* arena,
-                                CodeVectorAllocator* code_allocator,
-                                CodeGenerator* codegen,
-                                CompilerDriver* driver) const;
-
-  // Create a 'CompiledMethod' for a non-optimized graph.
-  CompiledMethod* EmitBaseline(ArenaAllocator* arena,
-                               CodeVectorAllocator* code_allocator,
-                               CodeGenerator* codegen,
-                               CompilerDriver* driver) const;
+  CompiledMethod* Emit(ArenaAllocator* arena,
+                       CodeVectorAllocator* code_allocator,
+                       CodeGenerator* codegen,
+                       CompilerDriver* driver) const;
 
   // Try compiling a method and return the code generator used for
   // compiling it.
   // This method:
   // 1) Builds the graph. Returns null if it failed to build it.
-  // 2) If `run_optimizations_` is set:
-  //    2.1) Transform the graph to SSA. Returns null if it failed.
-  //    2.2) Run optimizations on the graph, including register allocator.
-  // 3) Generate code with the `code_allocator` provided.
+  // 2) Transforms the graph to SSA. Returns null if it failed.
+  // 3) Runs optimizations on the graph, including register allocator.
+  // 4) Generates code with the `code_allocator` provided.
   CodeGenerator* TryCompile(ArenaAllocator* arena,
                             CodeVectorAllocator* code_allocator,
                             const DexFile::CodeItem* code_item,
@@ -350,21 +339,19 @@
 static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */
 
 OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver)
-    : Compiler(driver, kMaximumCompilationTimeBeforeWarning),
-      run_optimizations_(
-          driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime) {}
+    : Compiler(driver, kMaximumCompilationTimeBeforeWarning) {}
 
 void OptimizingCompiler::Init() {
   // Enable C1visualizer output. Must be done in Init() because the compiler
   // driver is not fully initialized when passed to the compiler's constructor.
   CompilerDriver* driver = GetCompilerDriver();
-  const std::string cfg_file_name = driver->GetDumpCfgFileName();
+  const std::string cfg_file_name = driver->GetCompilerOptions().GetDumpCfgFileName();
   if (!cfg_file_name.empty()) {
     CHECK_EQ(driver->GetThreadCount(), 1U)
       << "Graph visualizer requires the compiler to run single-threaded. "
       << "Invoke the compiler with '-j1'.";
     std::ios_base::openmode cfg_file_mode =
-        driver->GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out;
+        driver->GetCompilerOptions().GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out;
     visualizer_output_.reset(new std::ofstream(cfg_file_name, cfg_file_mode));
   }
   if (driver->GetDumpStats()) {
@@ -577,17 +564,6 @@
   AllocateRegisters(graph, codegen, pass_observer);
 }
 
-// The stack map we generate must be 4-byte aligned on ARM. Since existing
-// maps are generated alongside these stack maps, we must also align them.
-static ArrayRef<const uint8_t> AlignVectorSize(ArenaVector<uint8_t>& vector) {
-  size_t size = vector.size();
-  size_t aligned_size = RoundUp(size, 4);
-  for (; size < aligned_size; ++size) {
-    vector.push_back(0);
-  }
-  return ArrayRef<const uint8_t>(vector);
-}
-
 static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) {
   ArenaVector<LinkerPatch> linker_patches(codegen->GetGraph()->GetArena()->Adapter());
   codegen->EmitLinkerPatches(&linker_patches);
@@ -601,10 +577,10 @@
   return linker_patches;
 }
 
-CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena,
-                                                  CodeVectorAllocator* code_allocator,
-                                                  CodeGenerator* codegen,
-                                                  CompilerDriver* compiler_driver) const {
+CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena,
+                                         CodeVectorAllocator* code_allocator,
+                                         CodeGenerator* codegen,
+                                         CompilerDriver* compiler_driver) const {
   ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
   ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps));
   stack_map.resize(codegen->ComputeStackMapsSize());
@@ -630,39 +606,6 @@
   return compiled_method;
 }
 
-CompiledMethod* OptimizingCompiler::EmitBaseline(
-    ArenaAllocator* arena,
-    CodeVectorAllocator* code_allocator,
-    CodeGenerator* codegen,
-    CompilerDriver* compiler_driver) const {
-  ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
-
-  ArenaVector<uint8_t> mapping_table(arena->Adapter(kArenaAllocBaselineMaps));
-  codegen->BuildMappingTable(&mapping_table);
-  ArenaVector<uint8_t> vmap_table(arena->Adapter(kArenaAllocBaselineMaps));
-  codegen->BuildVMapTable(&vmap_table);
-  ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps));
-  codegen->BuildNativeGCMap(&gc_map, *compiler_driver);
-
-  CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
-      compiler_driver,
-      codegen->GetInstructionSet(),
-      ArrayRef<const uint8_t>(code_allocator->GetMemory()),
-      // Follow Quick's behavior and set the frame size to zero if it is
-      // considered "empty" (see the definition of
-      // art::CodeGenerator::HasEmptyFrame).
-      codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
-      codegen->GetCoreSpillMask(),
-      codegen->GetFpuSpillMask(),
-      ArrayRef<const SrcMapElem>(),
-      AlignVectorSize(mapping_table),
-      AlignVectorSize(vmap_table),
-      AlignVectorSize(gc_map),
-      ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
-      ArrayRef<const LinkerPatch>(linker_patches));
-  return compiled_method;
-}
-
 CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
                                               CodeVectorAllocator* code_allocator,
                                               const DexFile::CodeItem* code_item,
@@ -775,41 +718,37 @@
 
   VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName();
 
-  if (run_optimizations_) {
-    ScopedObjectAccess soa(Thread::Current());
-    StackHandleScopeCollection handles(soa.Self());
-    ScopedThreadSuspension sts(soa.Self(), kNative);
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScopeCollection handles(soa.Self());
+  ScopedThreadSuspension sts(soa.Self(), kNative);
 
-    {
-      PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
-      GraphAnalysisResult result = graph->TryBuildingSsa(&handles);
-      if (result != kAnalysisSuccess) {
-        switch (result) {
-          case kAnalysisFailThrowCatchLoop:
-            MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
-            break;
-          case kAnalysisFailAmbiguousArrayOp:
-            MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
-            break;
-          case kAnalysisSuccess:
-            UNREACHABLE();
-        }
-        pass_observer.SetGraphInBadState();
-        return nullptr;
+  {
+    PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
+    GraphAnalysisResult result = graph->TryBuildingSsa(&handles);
+    if (result != kAnalysisSuccess) {
+      switch (result) {
+        case kAnalysisFailThrowCatchLoop:
+          MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
+          break;
+        case kAnalysisFailAmbiguousArrayOp:
+          MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
+          break;
+        case kAnalysisSuccess:
+          UNREACHABLE();
       }
+      pass_observer.SetGraphInBadState();
+      return nullptr;
     }
-
-    RunOptimizations(graph,
-                     codegen.get(),
-                     compiler_driver,
-                     compilation_stats_.get(),
-                     dex_compilation_unit,
-                     &pass_observer,
-                     &handles);
-    codegen->CompileOptimized(code_allocator);
-  } else {
-    codegen->CompileBaseline(code_allocator);
   }
+
+  RunOptimizations(graph,
+                   codegen.get(),
+                   compiler_driver,
+                   compilation_stats_.get(),
+                   dex_compilation_unit,
+                   &pass_observer,
+                   &handles);
+  codegen->Compile(code_allocator);
   pass_observer.DumpDisassembly();
 
   if (kArenaAllocatorCountAllocations) {
@@ -861,11 +800,7 @@
                    dex_cache));
     if (codegen.get() != nullptr) {
       MaybeRecordStat(MethodCompilationStat::kCompiled);
-      if (run_optimizations_) {
-        method = EmitOptimized(&arena, &code_allocator, codegen.get(), compiler_driver);
-      } else {
-        method = EmitBaseline(&arena, &code_allocator, codegen.get(), compiler_driver);
-      }
+      method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver);
     }
   } else {
     if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
@@ -928,8 +863,6 @@
   {
     // Go to native so that we don't block GC during compilation.
     ScopedThreadSuspension sts(self, kNative);
-
-    DCHECK(run_optimizations_);
     codegen.reset(
         TryCompile(&arena,
                    &code_allocator,
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 9d136f3..be470cc 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -504,7 +504,7 @@
 void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) {
   // This function is used to reduce the dependencies in the graph after
   // (from -> to) has been performed. Since we ensure there is no move with the same
-  // destination, (to -> X) can not be blocked while (from -> X) might still be
+  // destination, (to -> X) cannot be blocked while (from -> X) might still be
   // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After
   // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is
   // a dependency between the two. If we update the source location from 1 to 2, we
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 2bae4bc..d77639d 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -72,8 +72,7 @@
   float_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
   double_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
 
-  static constexpr bool kIsBaseline = false;
-  codegen->SetupBlockedRegisters(kIsBaseline);
+  codegen->SetupBlockedRegisters();
   physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr);
   physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr);
   // Always reserve for the current method and the graph's max out registers.
@@ -1735,6 +1734,12 @@
   }
 }
 
+static bool IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(
+    HInstruction* instruction) {
+  return instruction->GetBlock()->GetGraph()->HasIrreducibleLoops() &&
+         (instruction->IsConstant() || instruction->IsCurrentMethod());
+}
+
 void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
                                              HBasicBlock* from,
                                              HBasicBlock* to) const {
@@ -1751,7 +1756,19 @@
     // Interval was not split.
     return;
   }
-  DCHECK(destination != nullptr && source != nullptr);
+
+  LiveInterval* parent = interval->GetParent();
+  HInstruction* defined_by = parent->GetDefinedBy();
+  if (destination == nullptr) {
+    // Our live_in fixed point calculation has found that the instruction is live
+    // in the `to` block because it will eventually enter an irreducible loop. Our
+    // live interval computation however does not compute a fixed point, and
+    // therefore will not have a location for that instruction for `to`.
+    // Because the instruction is a constant or the ArtMethod, we don't need to
+    // do anything: it will be materialized in the irreducible loop.
+    DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by));
+    return;
+  }
 
   if (!destination->HasRegister()) {
     // Values are eagerly spilled. Spill slot already contains appropriate value.
@@ -1762,13 +1779,13 @@
   // we need to put the moves at the entry of `to`.
   if (from->GetNormalSuccessors().size() == 1) {
     InsertParallelMoveAtExitOf(from,
-                               interval->GetParent()->GetDefinedBy(),
+                               defined_by,
                                source->ToLocation(),
                                destination->ToLocation());
   } else {
     DCHECK_EQ(to->GetPredecessors().size(), 1u);
     InsertParallelMoveAtEntryOf(to,
-                                interval->GetParent()->GetDefinedBy(),
+                                defined_by,
                                 source->ToLocation(),
                                 destination->ToLocation());
   }
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 7494e33..165d09d 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -422,6 +422,34 @@
   return true;
 }
 
+void SsaBuilder::RemoveRedundantUninitializedStrings() {
+  if (GetGraph()->IsDebuggable()) {
+    // Do not perform the optimization for consistency with the interpreter
+    // which always allocates an object for new-instance of String.
+    return;
+  }
+
+  for (HNewInstance* new_instance : uninitialized_strings_) {
+    DCHECK(new_instance->IsStringAlloc());
+
+    // Replace NewInstance of String with NullConstant if not used prior to
+    // calling StringFactory. In case of deoptimization, the interpreter is
+    // expected to skip null check on the `this` argument of the StringFactory call.
+    if (!new_instance->HasNonEnvironmentUses()) {
+      new_instance->ReplaceWith(GetGraph()->GetNullConstant());
+      new_instance->GetBlock()->RemoveInstruction(new_instance);
+
+      // Remove LoadClass if not needed any more.
+      HLoadClass* load_class = new_instance->InputAt(0)->AsLoadClass();
+      DCHECK(load_class != nullptr);
+      DCHECK(!load_class->NeedsAccessCheck()) << "String class is always accessible";
+      if (!load_class->HasUses()) {
+        load_class->GetBlock()->RemoveInstruction(load_class);
+      }
+    }
+  }
+}
+
 GraphAnalysisResult SsaBuilder::BuildSsa() {
   // 1) Visit in reverse post order. We need to have all predecessors of a block
   // visited (with the exception of loops) in order to create the right environment
@@ -487,7 +515,15 @@
   // input types.
   dead_phi_elimimation.EliminateDeadPhis();
 
-  // 11) Clear locals.
+  // 11) Step 1) replaced uses of NewInstances of String with the results of
+  // their corresponding StringFactory calls. Unless the String objects are used
+  // before they are initialized, they can be replaced with NullConstant.
+  // Note that this optimization is valid only if unsimplified code does not use
+  // the uninitialized value because we assume execution can be deoptimized at
+  // any safepoint. We must therefore perform it before any other optimizations.
+  RemoveRedundantUninitializedStrings();
+
+  // 12) Clear locals.
   for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
        !it.Done();
        it.Advance()) {
@@ -891,12 +927,21 @@
   if (invoke->IsStringInit()) {
     // This is a StringFactory call which acts as a String constructor. Its
     // result replaces the empty String pre-allocated by NewInstance.
-    HNewInstance* new_instance = invoke->GetThisArgumentOfStringInit();
-    invoke->RemoveThisArgumentOfStringInit();
+    HInstruction* arg_this = invoke->GetAndRemoveThisArgumentOfStringInit();
 
-    // Walk over all vregs and replace any occurrence of `new_instance` with `invoke`.
+    // Replacing the NewInstance might render it redundant. Keep a list of these
+    // to be visited once it is clear whether it is has remaining uses.
+    if (arg_this->IsNewInstance()) {
+      uninitialized_strings_.push_back(arg_this->AsNewInstance());
+    } else {
+      DCHECK(arg_this->IsPhi());
+      // NewInstance is not the direct input of the StringFactory call. It might
+      // be redundant but optimizing this case is not worth the effort.
+    }
+
+    // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`.
     for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
-      if ((*current_locals_)[vreg] == new_instance) {
+      if ((*current_locals_)[vreg] == arg_this) {
         (*current_locals_)[vreg] = invoke;
       }
     }
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 28eef6a..ccef8ea 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -57,6 +57,7 @@
         loop_headers_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
         ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
         ambiguous_asets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
+        uninitialized_strings_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
         locals_for_(graph->GetBlocks().size(),
                     ArenaVector<HInstruction*>(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
                     graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) {
@@ -105,6 +106,8 @@
   HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
   HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget);
 
+  void RemoveRedundantUninitializedStrings();
+
   StackHandleScopeCollection* const handles_;
 
   // True if types of ambiguous ArrayGets have been resolved.
@@ -119,6 +122,7 @@
 
   ArenaVector<HArrayGet*> ambiguous_agets_;
   ArenaVector<HArraySet*> ambiguous_asets_;
+  ArenaVector<HNewInstance*> uninitialized_strings_;
 
   // HEnvironment for each block.
   ArenaVector<ArenaVector<HInstruction*>> locals_for_;
diff --git a/compiler/profile_assistant.cc b/compiler/profile_assistant.cc
index 81f2a56..85335ef 100644
--- a/compiler/profile_assistant.cc
+++ b/compiler/profile_assistant.cc
@@ -16,54 +16,154 @@
 
 #include "profile_assistant.h"
 
+#include "base/unix_file/fd_file.h"
+#include "os.h"
+
 namespace art {
 
 // Minimum number of new methods that profiles must contain to enable recompilation.
 static constexpr const uint32_t kMinNewMethodsForCompilation = 10;
 
-bool ProfileAssistant::ProcessProfiles(
-      const std::vector<std::string>& profile_files,
-      const std::vector<std::string>& reference_profile_files,
-      /*out*/ ProfileCompilationInfo** profile_compilation_info) {
+bool ProfileAssistant::ProcessProfilesInternal(
+        const std::vector<ScopedFlock>& profile_files,
+        const std::vector<ScopedFlock>& reference_profile_files,
+        /*out*/ ProfileCompilationInfo** profile_compilation_info) {
   DCHECK(!profile_files.empty());
-  DCHECK(reference_profile_files.empty() ||
+  DCHECK(!reference_profile_files.empty() ||
       (profile_files.size() == reference_profile_files.size()));
 
   std::vector<ProfileCompilationInfo> new_info(profile_files.size());
   bool should_compile = false;
   // Read the main profile files.
-  for (size_t i = 0; i < profile_files.size(); i++) {
-    if (!new_info[i].Load(profile_files[i])) {
-      LOG(WARNING) << "Could not load profile file: " << profile_files[i];
+  for (size_t i = 0; i < new_info.size(); i++) {
+    if (!new_info[i].Load(profile_files[i].GetFile()->Fd())) {
+      LOG(WARNING) << "Could not load profile file at index " << i;
       return false;
     }
     // Do we have enough new profiled methods that will make the compilation worthwhile?
     should_compile |= (new_info[i].GetNumberOfMethods() > kMinNewMethodsForCompilation);
   }
+
   if (!should_compile) {
-    *profile_compilation_info = nullptr;
     return true;
   }
 
   std::unique_ptr<ProfileCompilationInfo> result(new ProfileCompilationInfo());
+  // Merge information.
   for (size_t i = 0; i < new_info.size(); i++) {
-    // Merge all data into a single object.
-    result->Load(new_info[i]);
-    // If we have any reference profile information merge their information with
-    // the current profiles and save them back to disk.
     if (!reference_profile_files.empty()) {
-      if (!new_info[i].Load(reference_profile_files[i])) {
-        LOG(WARNING) << "Could not load reference profile file: " << reference_profile_files[i];
+      if (!new_info[i].Load(reference_profile_files[i].GetFile()->Fd())) {
+        LOG(WARNING) << "Could not load reference profile file at index " << i;
         return false;
       }
-      if (!new_info[i].Save(reference_profile_files[i])) {
-        LOG(WARNING) << "Could not save reference profile file: " << reference_profile_files[i];
+    }
+    // Merge all data into a single object.
+    if (!result->Load(new_info[i])) {
+      LOG(WARNING) << "Could not merge profile data at index " << i;
+      return false;
+    }
+  }
+  // We were successful in merging all profile information. Update the files.
+  for (size_t i = 0; i < new_info.size(); i++) {
+    if (!reference_profile_files.empty()) {
+      if (!reference_profile_files[i].GetFile()->ClearContent()) {
+        PLOG(WARNING) << "Could not clear reference profile file at index " << i;
+        return false;
+      }
+      if (!new_info[i].Save(reference_profile_files[i].GetFile()->Fd())) {
+        LOG(WARNING) << "Could not save reference profile file at index " << i;
+        return false;
+      }
+      if (!profile_files[i].GetFile()->ClearContent()) {
+        PLOG(WARNING) << "Could not clear profile file at index " << i;
         return false;
       }
     }
   }
+
   *profile_compilation_info = result.release();
   return true;
 }
 
+class ScopedCollectionFlock {
+ public:
+  explicit ScopedCollectionFlock(size_t size) : flocks_(size) {}
+
+  // Will block until all the locks are acquired.
+  bool Init(const std::vector<std::string>& filenames, /* out */ std::string* error) {
+    for (size_t i = 0; i < filenames.size(); i++) {
+      if (!flocks_[i].Init(filenames[i].c_str(), O_RDWR, /* block */ true, error)) {
+        *error += " (index=" + std::to_string(i) + ")";
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // Will block until all the locks are acquired.
+  bool Init(const std::vector<uint32_t>& fds, /* out */ std::string* error) {
+    for (size_t i = 0; i < fds.size(); i++) {
+      // We do not own the descriptor, so disable auto-close and don't check usage.
+      File file(fds[i], false);
+      file.DisableAutoClose();
+      if (!flocks_[i].Init(&file, error)) {
+        *error += " (index=" + std::to_string(i) + ")";
+        return false;
+      }
+    }
+    return true;
+  }
+
+  const std::vector<ScopedFlock>& Get() const { return flocks_; }
+
+ private:
+  std::vector<ScopedFlock> flocks_;
+};
+
+bool ProfileAssistant::ProcessProfiles(
+        const std::vector<uint32_t>& profile_files_fd,
+        const std::vector<uint32_t>& reference_profile_files_fd,
+        /*out*/ ProfileCompilationInfo** profile_compilation_info) {
+  *profile_compilation_info = nullptr;
+
+  std::string error;
+  ScopedCollectionFlock profile_files_flocks(profile_files_fd.size());
+  if (!profile_files_flocks.Init(profile_files_fd, &error)) {
+    LOG(WARNING) << "Could not lock profile files: " << error;
+    return false;
+  }
+  ScopedCollectionFlock reference_profile_files_flocks(reference_profile_files_fd.size());
+  if (!reference_profile_files_flocks.Init(reference_profile_files_fd, &error)) {
+    LOG(WARNING) << "Could not lock reference profile files: " << error;
+    return false;
+  }
+
+  return ProcessProfilesInternal(profile_files_flocks.Get(),
+                                 reference_profile_files_flocks.Get(),
+                                 profile_compilation_info);
+}
+
+bool ProfileAssistant::ProcessProfiles(
+        const std::vector<std::string>& profile_files,
+        const std::vector<std::string>& reference_profile_files,
+        /*out*/ ProfileCompilationInfo** profile_compilation_info) {
+  *profile_compilation_info = nullptr;
+
+  std::string error;
+  ScopedCollectionFlock profile_files_flocks(profile_files.size());
+  if (!profile_files_flocks.Init(profile_files, &error)) {
+    LOG(WARNING) << "Could not lock profile files: " << error;
+    return false;
+  }
+  ScopedCollectionFlock reference_profile_files_flocks(reference_profile_files.size());
+  if (!reference_profile_files_flocks.Init(reference_profile_files, &error)) {
+    LOG(WARNING) << "Could not lock reference profile files: " << error;
+    return false;
+  }
+
+  return ProcessProfilesInternal(profile_files_flocks.Get(),
+                                 reference_profile_files_flocks.Get(),
+                                 profile_compilation_info);
+}
+
 }  // namespace art
diff --git a/compiler/profile_assistant.h b/compiler/profile_assistant.h
index 088c8bd..ad5e216 100644
--- a/compiler/profile_assistant.h
+++ b/compiler/profile_assistant.h
@@ -20,6 +20,7 @@
 #include <string>
 #include <vector>
 
+#include "base/scoped_flock.h"
 #include "jit/offline_profiling_info.cc"
 
 namespace art {
@@ -52,7 +53,17 @@
       const std::vector<std::string>& reference_profile_files,
       /*out*/ ProfileCompilationInfo** profile_compilation_info);
 
+  static bool ProcessProfiles(
+      const std::vector<uint32_t>& profile_files_fd_,
+      const std::vector<uint32_t>& reference_profile_files_fd_,
+      /*out*/ ProfileCompilationInfo** profile_compilation_info);
+
  private:
+  static bool ProcessProfilesInternal(
+      const std::vector<ScopedFlock>& profile_files,
+      const std::vector<ScopedFlock>& reference_profile_files,
+      /*out*/ ProfileCompilationInfo** profile_compilation_info);
+
   DISALLOW_COPY_AND_ASSIGN(ProfileAssistant);
 };
 
diff --git a/compiler/profile_assistant_test.cc b/compiler/profile_assistant_test.cc
new file mode 100644
index 0000000..58b7513
--- /dev/null
+++ b/compiler/profile_assistant_test.cc
@@ -0,0 +1,279 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "base/unix_file/fd_file.h"
+#include "common_runtime_test.h"
+#include "compiler/profile_assistant.h"
+#include "jit/offline_profiling_info.h"
+
+namespace art {
+
+class ProfileAssistantTest : public CommonRuntimeTest {
+ protected:
+  void SetupProfile(const std::string& id,
+                    uint32_t checksum,
+                    uint16_t number_of_methods,
+                    const ScratchFile& profile,
+                    ProfileCompilationInfo* info,
+                    uint16_t start_method_index = 0) {
+    std::string dex_location1 = "location1" + id;
+    uint32_t dex_location_checksum1 = checksum;
+    std::string dex_location2 = "location2" + id;
+    uint32_t dex_location_checksum2 = 10 * checksum;
+    for (uint16_t i = start_method_index; i < start_method_index + number_of_methods; i++) {
+      ASSERT_TRUE(info->AddData(dex_location1, dex_location_checksum1, i));
+      ASSERT_TRUE(info->AddData(dex_location2, dex_location_checksum2, i));
+    }
+    ASSERT_TRUE(info->Save(GetFd(profile)));
+    ASSERT_EQ(0, profile.GetFile()->Flush());
+    ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  }
+
+  uint32_t GetFd(const ScratchFile& file) const {
+    return static_cast<uint32_t>(file.GetFd());
+  }
+};
+
+TEST_F(ProfileAssistantTest, AdviseCompilationEmptyReferences) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile1;
+  ScratchFile reference_profile2;
+
+  std::vector<uint32_t> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  std::vector<uint32_t> reference_profile_fds({
+      GetFd(reference_profile1),
+      GetFd(reference_profile2)});
+
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+
+  // We should advise compilation.
+  ProfileCompilationInfo* result;
+  ASSERT_TRUE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+  ASSERT_TRUE(result != nullptr);
+
+  // The resulting compilation info must be equal to the merge of the inputs.
+  ProfileCompilationInfo expected;
+  ASSERT_TRUE(expected.Load(info1));
+  ASSERT_TRUE(expected.Load(info2));
+  ASSERT_TRUE(expected.Equals(*result));
+
+  // The information from profiles must be transfered to the reference profiles.
+  ProfileCompilationInfo file_info1;
+  ASSERT_TRUE(reference_profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(reference_profile1)));
+  ASSERT_TRUE(file_info1.Equals(info1));
+
+  ProfileCompilationInfo file_info2;
+  ASSERT_TRUE(reference_profile2.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(reference_profile2)));
+  ASSERT_TRUE(file_info2.Equals(info2));
+
+  // Initial profiles must be cleared.
+  ASSERT_EQ(0, profile1.GetFile()->GetLength());
+  ASSERT_EQ(0, profile2.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, AdviseCompilationNonEmptyReferences) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile1;
+  ScratchFile reference_profile2;
+
+  std::vector<uint32_t> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  std::vector<uint32_t> reference_profile_fds({
+      GetFd(reference_profile1),
+      GetFd(reference_profile2)});
+
+  // The new profile info will contain the methods with indices 0-100.
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+
+
+  // The reference profile info will contain the methods with indices 50-150.
+  const uint16_t kNumberOfMethodsAlreadyCompiled = 100;
+  ProfileCompilationInfo reference_info1;
+  SetupProfile("p1", 1, kNumberOfMethodsAlreadyCompiled, reference_profile1,
+      &reference_info1, kNumberOfMethodsToEnableCompilation / 2);
+  ProfileCompilationInfo reference_info2;
+  SetupProfile("p2", 2, kNumberOfMethodsAlreadyCompiled, reference_profile2,
+      &reference_info2, kNumberOfMethodsToEnableCompilation / 2);
+
+  // We should advise compilation.
+  ProfileCompilationInfo* result;
+  ASSERT_TRUE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+  ASSERT_TRUE(result != nullptr);
+
+  // The resulting compilation info must be equal to the merge of the inputs
+  ProfileCompilationInfo expected;
+  ASSERT_TRUE(expected.Load(info1));
+  ASSERT_TRUE(expected.Load(info2));
+  ASSERT_TRUE(expected.Load(reference_info1));
+  ASSERT_TRUE(expected.Load(reference_info2));
+  ASSERT_TRUE(expected.Equals(*result));
+
+  // The information from profiles must be transfered to the reference profiles.
+  ProfileCompilationInfo file_info1;
+  ProfileCompilationInfo merge1;
+  ASSERT_TRUE(merge1.Load(info1));
+  ASSERT_TRUE(merge1.Load(reference_info1));
+  ASSERT_TRUE(reference_profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(reference_profile1)));
+  ASSERT_TRUE(file_info1.Equals(merge1));
+
+  ProfileCompilationInfo file_info2;
+  ProfileCompilationInfo merge2;
+  ASSERT_TRUE(merge2.Load(info2));
+  ASSERT_TRUE(merge2.Load(reference_info2));
+  ASSERT_TRUE(reference_profile2.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(reference_profile2)));
+  ASSERT_TRUE(file_info2.Equals(merge2));
+
+  // Initial profiles must be cleared.
+  ASSERT_EQ(0, profile1.GetFile()->GetLength());
+  ASSERT_EQ(0, profile2.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, DoNotAdviseCompilation) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile1;
+  ScratchFile reference_profile2;
+
+  std::vector<uint32_t> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  std::vector<uint32_t> reference_profile_fds({
+      GetFd(reference_profile1),
+      GetFd(reference_profile2)});
+
+  const uint16_t kNumberOfMethodsToSkipCompilation = 1;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToSkipCompilation, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p2", 2, kNumberOfMethodsToSkipCompilation, profile2, &info2);
+
+  // We should not advise compilation.
+  ProfileCompilationInfo* result = nullptr;
+  ASSERT_TRUE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+  ASSERT_TRUE(result == nullptr);
+
+  // The information from profiles must remain the same.
+  ProfileCompilationInfo file_info1;
+  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(profile1)));
+  ASSERT_TRUE(file_info1.Equals(info1));
+
+  ProfileCompilationInfo file_info2;
+  ASSERT_TRUE(profile2.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(profile2)));
+  ASSERT_TRUE(file_info2.Equals(info2));
+
+  // Reference profile files must remain empty.
+  ASSERT_EQ(0, reference_profile1.GetFile()->GetLength());
+  ASSERT_EQ(0, reference_profile2.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, FailProcessingBecauseOfProfiles) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile1;
+  ScratchFile reference_profile2;
+
+  std::vector<uint32_t> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  std::vector<uint32_t> reference_profile_fds({
+      GetFd(reference_profile1),
+      GetFd(reference_profile2)});
+
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  // Assign different hashes for the same dex file. This will make merging of information to fail.
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+
+  // We should fail processing.
+  ProfileCompilationInfo* result = nullptr;
+  ASSERT_FALSE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+  ASSERT_TRUE(result == nullptr);
+
+  // The information from profiles must still remain the same.
+  ProfileCompilationInfo file_info1;
+  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(profile1)));
+  ASSERT_TRUE(file_info1.Equals(info1));
+
+  ProfileCompilationInfo file_info2;
+  ASSERT_TRUE(profile2.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(profile2)));
+  ASSERT_TRUE(file_info2.Equals(info2));
+
+  // Reference profile files must still remain empty.
+  ASSERT_EQ(0, reference_profile1.GetFile()->GetLength());
+  ASSERT_EQ(0, reference_profile2.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, FailProcessingBecauseOfReferenceProfiles) {
+  ScratchFile profile1;
+  ScratchFile reference_profile;
+
+  std::vector<uint32_t> profile_fds({
+      GetFd(profile1)});
+  std::vector<uint32_t> reference_profile_fds({
+      GetFd(reference_profile)});
+
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  // Assign different hashes for the same dex file. This will make merging of information to fail.
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  ProfileCompilationInfo reference_info;
+  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, reference_profile, &reference_info);
+
+  // We should not advise compilation.
+  ProfileCompilationInfo* result = nullptr;
+  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_FALSE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+  ASSERT_TRUE(result == nullptr);
+
+  // The information from profiles must still remain the same.
+  ProfileCompilationInfo file_info1;
+  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(profile1)));
+  ASSERT_TRUE(file_info1.Equals(info1));
+
+  ProfileCompilationInfo file_info2;
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(reference_profile)));
+  ASSERT_TRUE(file_info2.Equals(reference_info));
+}
+
+}  // namespace art
diff --git a/compiler/utils/test_dex_file_builder.h b/compiler/utils/test_dex_file_builder.h
index b6a228c..e57a540 100644
--- a/compiler/utils/test_dex_file_builder.h
+++ b/compiler/utils/test_dex_file_builder.h
@@ -21,6 +21,7 @@
 #include <set>
 #include <map>
 #include <vector>
+#include <zlib.h>
 
 #include "base/bit_utils.h"
 #include "base/logging.h"
@@ -161,7 +162,6 @@
     uint32_t total_size = data_section_offset + data_section_size;
 
     dex_file_data_.resize(total_size);
-    std::memcpy(&dex_file_data_[0], header_data.data, sizeof(DexFile::Header));
 
     for (const auto& entry : strings_) {
       CHECK_LT(entry.first.size(), 128u);
@@ -210,7 +210,12 @@
       Write32(raw_offset + 4u, GetStringIdx(entry.first.name));
     }
 
-    // Leave checksum and signature as zeros.
+    // Leave signature as zeros.
+
+    header->file_size_ = dex_file_data_.size();
+    size_t skip = sizeof(header->magic_) + sizeof(header->checksum_);
+    header->checksum_ = adler32(0u, dex_file_data_.data() + skip, dex_file_data_.size() - skip);
+    std::memcpy(&dex_file_data_[0], header_data.data, sizeof(DexFile::Header));
 
     std::string error_msg;
     std::unique_ptr<const DexFile> dex_file(DexFile::Open(
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index d6caa3c..7138a46 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -186,6 +186,22 @@
   EmitOperand(dst, src);
 }
 
+void X86Assembler::popcntl(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitRegisterOperand(dst, src);
+}
+
+void X86Assembler::popcntl(Register dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitOperand(dst, src);
+}
+
 void X86Assembler::movzxb(Register dst, ByteRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 655af9c..759a41e 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -330,11 +330,15 @@
   void movntl(const Address& dst, Register src);
 
   void bswapl(Register dst);
+
   void bsfl(Register dst, Register src);
   void bsfl(Register dst, const Address& src);
   void bsrl(Register dst, Register src);
   void bsrl(Register dst, const Address& src);
 
+  void popcntl(Register dst, Register src);
+  void popcntl(Register dst, const Address& src);
+
   void rorl(Register reg, const Immediate& imm);
   void rorl(Register operand, Register shifter);
   void roll(Register reg, const Immediate& imm);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index a9b991c..0fd0982 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -260,6 +260,19 @@
   DriverStr(expected, "bsrl_address");
 }
 
+TEST_F(AssemblerX86Test, Popcntl) {
+  DriverStr(RepeatRR(&x86::X86Assembler::popcntl, "popcntl %{reg2}, %{reg1}"), "popcntl");
+}
+
+TEST_F(AssemblerX86Test, PopcntlAddress) {
+  GetAssembler()->popcntl(x86::Register(x86::EDI), x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12));
+  const char* expected =
+    "popcntl 0xc(%EDI,%EBX,4), %EDI\n";
+
+  DriverStr(expected, "popcntl_address");
+}
+
 // Rorl only allows CL as the shift count.
 std::string rorl_fn(AssemblerX86Test::Base* assembler_test, x86::X86Assembler* assembler) {
   std::ostringstream str;
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index db07267..10f5a00 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -2247,6 +2247,42 @@
   EmitOperand(dst.LowBits(), src);
 }
 
+void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitRegisterOperand(dst.LowBits(), src.LowBits());
+}
+
+void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitRex64(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitRegisterOperand(dst.LowBits(), src.LowBits());
+}
+
+void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitRex64(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitOperand(dst.LowBits(), src);
+}
+
 void X86_64Assembler::repne_scasw() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 01d28e3..6f0847e 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -647,6 +647,11 @@
   void bsrq(CpuRegister dst, CpuRegister src);
   void bsrq(CpuRegister dst, const Address& src);
 
+  void popcntl(CpuRegister dst, CpuRegister src);
+  void popcntl(CpuRegister dst, const Address& src);
+  void popcntq(CpuRegister dst, CpuRegister src);
+  void popcntq(CpuRegister dst, const Address& src);
+
   void rorl(CpuRegister reg, const Immediate& imm);
   void rorl(CpuRegister operand, CpuRegister shifter);
   void roll(CpuRegister reg, const Immediate& imm);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 00bb5ca..8a87fca 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1333,6 +1333,44 @@
   DriverStr(expected, "bsrq_address");
 }
 
+TEST_F(AssemblerX86_64Test, Popcntl) {
+  DriverStr(Repeatrr(&x86_64::X86_64Assembler::popcntl, "popcntl %{reg2}, %{reg1}"), "popcntl");
+}
+
+TEST_F(AssemblerX86_64Test, PopcntlAddress) {
+  GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+  const char* expected =
+    "popcntl 0xc(%RDI,%RBX,4), %R10d\n"
+    "popcntl 0xc(%R10,%RBX,4), %edi\n"
+    "popcntl 0xc(%RDI,%R9,4), %edi\n";
+
+  DriverStr(expected, "popcntl_address");
+}
+
+TEST_F(AssemblerX86_64Test, Popcntq) {
+  DriverStr(RepeatRR(&x86_64::X86_64Assembler::popcntq, "popcntq %{reg2}, %{reg1}"), "popcntq");
+}
+
+TEST_F(AssemblerX86_64Test, PopcntqAddress) {
+  GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+  const char* expected =
+    "popcntq 0xc(%RDI,%RBX,4), %R10\n"
+    "popcntq 0xc(%R10,%RBX,4), %RDI\n"
+    "popcntq 0xc(%RDI,%R9,4), %RDI\n";
+
+  DriverStr(expected, "popcntq_address");
+}
+
 /////////////////
 // Near labels //
 /////////////////
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index ea54239..918a01b 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -340,6 +340,12 @@
   UsageError("      --profile-file will be merged into --reference-profile-file. Valid only when");
   UsageError("      specified together with --profile-file.");
   UsageError("");
+  UsageError("  --profile-file-fd=<number>: same as --profile-file but accepts a file descriptor.");
+  UsageError("      Cannot be used together with --profile-file.");
+  UsageError("");
+  UsageError("  --reference-profile-file-fd=<number>: same as --reference-profile-file but");
+  UsageError("      accepts a file descriptor. Cannot be used together with");
+  UsageError("       --reference-profile-file.");
   UsageError("  --print-pass-names: print a list of pass names");
   UsageError("");
   UsageError("  --disable-passes=<pass-names>:  disable one or more passes separated by comma.");
@@ -497,12 +503,24 @@
   return dex_files_size >= kMinDexFileCumulativeSizeForSwap;
 }
 
+static void CloseAllFds(const std::vector<uint32_t>& fds, const char* descriptor) {
+  for (size_t i = 0; i < fds.size(); i++) {
+    if (close(fds[i]) < 0) {
+      PLOG(WARNING) << "Failed to close descriptor for " << descriptor << " at index " << i;
+    }
+  }
+}
+
 class Dex2Oat FINAL {
  public:
   explicit Dex2Oat(TimingLogger* timings) :
       compiler_kind_(Compiler::kOptimizing),
       instruction_set_(kRuntimeISA),
       // Take the default set of instruction features from the build.
+      image_file_location_oat_checksum_(0),
+      image_file_location_oat_data_begin_(0),
+      image_patch_delta_(0),
+      key_value_store_(nullptr),
       verification_results_(nullptr),
       method_inliner_map_(),
       runtime_(nullptr),
@@ -522,23 +540,23 @@
       boot_image_(false),
       multi_image_(false),
       is_host_(false),
+      class_loader_(nullptr),
+      elf_writers_(),
+      oat_writers_(),
+      rodata_(),
       image_writer_(nullptr),
       driver_(nullptr),
+      opened_dex_files_maps_(),
+      opened_dex_files_(),
       dump_stats_(false),
       dump_passes_(false),
       dump_timing_(false),
       dump_slow_timing_(kIsDebugBuild),
-      dump_cfg_append_(false),
       swap_fd_(-1),
       app_image_fd_(kInvalidImageFd),
       timings_(timings) {}
 
   ~Dex2Oat() {
-    // Free opened dex files before deleting the runtime_, because ~DexFile
-    // uses MemMap, which is shut down by ~Runtime.
-    class_path_files_.clear();
-    opened_dex_files_.clear();
-
     // Log completion time before deleting the runtime_, because this accesses
     // the runtime.
     LogCompletionTime();
@@ -551,6 +569,9 @@
       for (std::unique_ptr<const DexFile>& dex_file : opened_dex_files_) {
         dex_file.release();
       }
+      for (std::unique_ptr<MemMap>& map : opened_dex_files_maps_) {
+        map.release();
+      }
       for (std::unique_ptr<File>& oat_file : oat_files_) {
         oat_file.release();
       }
@@ -576,6 +597,14 @@
     ParseUintOption(option, "--oat-fd", &oat_fd_, Usage);
   }
 
+  void ParseFdForCollection(const StringPiece& option,
+                            const char* arg_name,
+                            std::vector<uint32_t>* fds) {
+    uint32_t fd;
+    ParseUintOption(option, arg_name, &fd, Usage);
+    fds->push_back(fd);
+  }
+
   void ParseJ(const StringPiece& option) {
     ParseUintOption(option, "-j", &thread_count_, Usage, /* is_long_option */ false);
   }
@@ -779,11 +808,25 @@
       }
     }
 
+    if (!profile_files_.empty() && !profile_files_fd_.empty()) {
+      Usage("Profile files should not be specified with both --profile-file-fd and --profile-file");
+    }
     if (!profile_files_.empty()) {
       if (!reference_profile_files_.empty() &&
           (reference_profile_files_.size() != profile_files_.size())) {
         Usage("If specified, --reference-profile-file should match the number of --profile-file.");
       }
+    } else if (!reference_profile_files_.empty()) {
+      Usage("--reference-profile-file should only be supplied with --profile-file");
+    }
+    if (!profile_files_fd_.empty()) {
+      if (!reference_profile_files_fd_.empty() &&
+          (reference_profile_files_fd_.size() != profile_files_fd_.size())) {
+        Usage("If specified, --reference-profile-file-fd should match the number",
+              " of --profile-file-fd.");
+      }
+    } else if (!reference_profile_files_fd_.empty()) {
+      Usage("--reference-profile-file-fd should only be supplied with --profile-file-fd");
     }
 
     if (!parser_options->oat_symbols.empty()) {
@@ -1077,6 +1120,10 @@
       } else if (option.starts_with("--reference-profile-file=")) {
         reference_profile_files_.push_back(
             option.substr(strlen("--reference-profile-file=")).ToString());
+      } else if (option.starts_with("--profile-file-fd=")) {
+        ParseFdForCollection(option, "--profile-file-fd", &profile_files_fd_);
+      } else if (option.starts_with("--reference-profile-file-fd=")) {
+        ParseFdForCollection(option, "--reference_profile-file-fd", &reference_profile_files_fd_);
       } else if (option == "--no-profile-file") {
         // No profile
       } else if (option == "--host") {
@@ -1093,10 +1140,6 @@
         dump_timing_ = true;
       } else if (option == "--dump-passes") {
         dump_passes_ = true;
-      } else if (option.starts_with("--dump-cfg=")) {
-        dump_cfg_file_name_ = option.substr(strlen("--dump-cfg=")).data();
-      } else if (option.starts_with("--dump-cfg-append")) {
-        dump_cfg_append_ = true;
       } else if (option == "--dump-stats") {
         dump_stats_ = true;
       } else if (option.starts_with("--swap-file=")) {
@@ -1130,6 +1173,9 @@
   // Check whether the oat output files are writable, and open them for later. Also open a swap
   // file, if a name is given.
   bool OpenFile() {
+    // Prune non-existent dex files now so that we don't create empty oat files for multi-image.
+    PruneNonExistentDexFiles();
+
     // Expand oat and image filenames for multi image.
     if (IsBootImage() && multi_image_) {
       ExpandOatAndImageFilenames();
@@ -1201,9 +1247,6 @@
     }
     // Note that dex2oat won't close the swap_fd_. The compiler driver's swap space will do that.
 
-    // Organize inputs, handling multi-dex and multiple oat file outputs.
-    CreateDexOatMappings();
-
     return true;
   }
 
@@ -1246,89 +1289,135 @@
       return false;
     }
 
+    CreateOatWriters();
+    if (!AddDexFileSources()) {
+      return false;
+    }
+
+    if (IsBootImage() && image_filenames_.size() > 1) {
+      // If we're compiling the boot image, store the boot classpath into the Key-Value store.
+      // We need this for the multi-image case.
+      key_value_store_->Put(OatHeader::kBootClassPath, GetMultiImageBootClassPath());
+    }
+
+    if (!IsBootImage()) {
+      // When compiling an app, create the runtime early to retrieve
+      // the image location key needed for the oat header.
+      if (!CreateRuntime(std::move(runtime_options))) {
+        return false;
+      }
+
+      {
+        TimingLogger::ScopedTiming t3("Loading image checksum", timings_);
+        std::vector<gc::space::ImageSpace*> image_spaces =
+            Runtime::Current()->GetHeap()->GetBootImageSpaces();
+        image_file_location_oat_checksum_ = image_spaces[0]->GetImageHeader().GetOatChecksum();
+        image_file_location_oat_data_begin_ =
+            reinterpret_cast<uintptr_t>(image_spaces[0]->GetImageHeader().GetOatDataBegin());
+        image_patch_delta_ = image_spaces[0]->GetImageHeader().GetPatchDelta();
+        // Store the boot image filename(s).
+        std::vector<std::string> image_filenames;
+        for (const gc::space::ImageSpace* image_space : image_spaces) {
+          image_filenames.push_back(image_space->GetImageFilename());
+        }
+        std::string image_file_location = Join(image_filenames, ':');
+        if (!image_file_location.empty()) {
+          key_value_store_->Put(OatHeader::kImageLocationKey, image_file_location);
+        }
+      }
+
+      // Open dex files for class path.
+      const std::vector<std::string> class_path_locations =
+          GetClassPathLocations(runtime_->GetClassPathString());
+      OpenClassPathFiles(class_path_locations, &class_path_files_);
+
+      // Store the classpath we have right now.
+      std::vector<const DexFile*> class_path_files = MakeNonOwningPointerVector(class_path_files_);
+      key_value_store_->Put(OatHeader::kClassPathKey,
+                            OatFile::EncodeDexFileDependencies(class_path_files));
+    }
+
+    // Now that we have finalized key_value_store_, start writing the oat file.
     {
-      TimingLogger::ScopedTiming t_runtime("Create runtime", timings_);
+      TimingLogger::ScopedTiming t_dex("Writing and opening dex files", timings_);
+      rodata_.reserve(oat_writers_.size());
+      for (size_t i = 0, size = oat_writers_.size(); i != size; ++i) {
+        rodata_.push_back(elf_writers_[i]->StartRoData());
+        // Unzip or copy dex files straight to the oat file.
+        std::unique_ptr<MemMap> opened_dex_files_map;
+        std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+        if (!oat_writers_[i]->WriteAndOpenDexFiles(rodata_.back(),
+                                                   oat_files_[i].get(),
+                                                   instruction_set_,
+                                                   instruction_set_features_.get(),
+                                                   key_value_store_.get(),
+                                                   &opened_dex_files_map,
+                                                   &opened_dex_files)) {
+          return false;
+        }
+        dex_files_per_oat_file_.push_back(MakeNonOwningPointerVector(opened_dex_files));
+        if (opened_dex_files_map != nullptr) {
+          opened_dex_files_maps_.push_back(std::move(opened_dex_files_map));
+          for (std::unique_ptr<const DexFile>& dex_file : opened_dex_files) {
+            dex_file_oat_filename_map_.emplace(dex_file.get(), oat_filenames_[i]);
+            opened_dex_files_.push_back(std::move(dex_file));
+          }
+        } else {
+          DCHECK(opened_dex_files.empty());
+        }
+      }
+    }
+
+    dex_files_ = MakeNonOwningPointerVector(opened_dex_files_);
+    if (IsBootImage()) {
+      // For boot image, pass opened dex files to the Runtime::Create().
+      // Note: Runtime acquires ownership of these dex files.
+      runtime_options.Set(RuntimeArgumentMap::BootClassPathDexList, &opened_dex_files_);
       if (!CreateRuntime(std::move(runtime_options))) {
         return false;
       }
     }
 
-    // Runtime::Create acquired the mutator_lock_ that is normally given away when we
-    // Runtime::Start, give it away now so that we don't starve GC.
-    Thread* self = Thread::Current();
-    self->TransitionFromRunnableToSuspended(kNative);
     // If we're doing the image, override the compiler filter to force full compilation. Must be
     // done ahead of WellKnownClasses::Init that causes verification.  Note: doesn't force
     // compilation of class initializers.
     // Whilst we're in native take the opportunity to initialize well known classes.
+    Thread* self = Thread::Current();
     WellKnownClasses::Init(self->GetJniEnv());
 
     ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-    if (boot_image_filename_.empty()) {
-      dex_files_ = class_linker->GetBootClassPath();
-      // Prune invalid dex locations.
-      for (size_t i = 0; i < dex_locations_.size(); i++) {
-        const char* dex_location = dex_locations_[i];
-        bool contains = false;
-        for (const DexFile* dex_file : dex_files_) {
-          if (strcmp(dex_location, dex_file->GetLocation().c_str()) == 0) {
-            contains = true;
-            break;
-          }
-        }
-        if (!contains) {
-          dex_locations_.erase(dex_locations_.begin() + i);
-          i--;
-        }
-      }
-    } else {
-      TimingLogger::ScopedTiming t_dex("Opening dex files", timings_);
-      if (dex_filenames_.empty()) {
-        ATRACE_BEGIN("Opening zip archive from file descriptor");
-        std::string error_msg;
-        std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(zip_fd_,
-                                                                       zip_location_.c_str(),
-                                                                       &error_msg));
-        if (zip_archive.get() == nullptr) {
-          LOG(ERROR) << "Failed to open zip from file descriptor for '" << zip_location_ << "': "
-              << error_msg;
-          return false;
-        }
-        if (!DexFile::OpenFromZip(*zip_archive.get(), zip_location_, &error_msg, &opened_dex_files_)) {
-          LOG(ERROR) << "Failed to open dex from file descriptor for zip file '" << zip_location_
-              << "': " << error_msg;
-          return false;
-        }
-        for (auto& dex_file : opened_dex_files_) {
-          dex_files_.push_back(dex_file.get());
-        }
-        ATRACE_END();
-      } else {
-        size_t failure_count = OpenDexFiles(dex_filenames_, dex_locations_, &opened_dex_files_);
-        if (failure_count > 0) {
-          LOG(ERROR) << "Failed to open some dex files: " << failure_count;
-          return false;
-        }
-        for (auto& dex_file : opened_dex_files_) {
-          dex_files_.push_back(dex_file.get());
-        }
-      }
-
+    if (!IsBootImage()) {
       constexpr bool kSaveDexInput = false;
       if (kSaveDexInput) {
         SaveDexInput();
       }
+
+      // Handle and ClassLoader creation needs to come after Runtime::Create.
+      ScopedObjectAccess soa(self);
+
+      // Classpath: first the class-path given.
+      std::vector<const DexFile*> class_path_files = MakeNonOwningPointerVector(class_path_files_);
+
+      // Then the dex files we'll compile. Thus we'll resolve the class-path first.
+      class_path_files.insert(class_path_files.end(), dex_files_.begin(), dex_files_.end());
+
+      class_loader_ = class_linker->CreatePathClassLoader(self, class_path_files);
     }
-    // Ensure opened dex files are writable for dex-to-dex transformations. Also ensure that
-    // the dex caches stay live since we don't want class unloading to occur during compilation.
-    for (const auto& dex_file : dex_files_) {
-      if (!dex_file->EnableWrite()) {
-        PLOG(ERROR) << "Failed to make .dex file writeable '" << dex_file->GetLocation() << "'\n";
+
+    // Ensure opened dex files are writable for dex-to-dex transformations.
+    for (const std::unique_ptr<MemMap>& map : opened_dex_files_maps_) {
+      if (!map->Protect(PROT_READ | PROT_WRITE)) {
+        PLOG(ERROR) << "Failed to make .dex files writeable.";
+        return false;
       }
+    }
+
+    // Ensure that the dex caches stay live since we don't want class unloading
+    // to occur during compilation.
+    for (const auto& dex_file : dex_files_) {
       ScopedObjectAccess soa(self);
       dex_caches_.push_back(soa.AddLocalReference<jobject>(
           class_linker->RegisterDexFile(*dex_file, Runtime::Current()->GetLinearAlloc())));
-      dex_file->CreateTypeLookupTable();
     }
 
     /*
@@ -1353,59 +1442,11 @@
     return true;
   }
 
-  void CreateDexOatMappings() {
-    if (oat_files_.size() > 1) {
-      size_t index = 0;
-      for (size_t i = 0; i < oat_files_.size(); ++i) {
-        std::vector<const DexFile*> dex_files;
-        if (index < dex_files_.size()) {
-          dex_files.push_back(dex_files_[index]);
-          dex_file_oat_filename_map_.emplace(dex_files_[index], oat_filenames_[i]);
-          index++;
-          while (index < dex_files_.size() &&
-              (dex_files_[index]->GetBaseLocation() == dex_files_[index - 1]->GetBaseLocation())) {
-            dex_file_oat_filename_map_.emplace(dex_files_[index], oat_filenames_[i]);
-            dex_files.push_back(dex_files_[index]);
-            index++;
-          }
-        }
-        dex_files_per_oat_file_.push_back(std::move(dex_files));
-      }
-    } else {
-      dex_files_per_oat_file_.push_back(dex_files_);
-      for (const DexFile* dex_file : dex_files_) {
-        dex_file_oat_filename_map_.emplace(dex_file, oat_filenames_[0]);
-      }
-    }
-  }
-
   // Create and invoke the compiler driver. This will compile all the dex files.
   void Compile() {
     TimingLogger::ScopedTiming t("dex2oat Compile", timings_);
     compiler_phases_timings_.reset(new CumulativeLogger("compilation times"));
 
-    // Handle and ClassLoader creation needs to come after Runtime::Create
-    jobject class_loader = nullptr;
-    Thread* self = Thread::Current();
-
-    if (!boot_image_filename_.empty()) {
-      ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-      OpenClassPathFiles(runtime_->GetClassPathString(), dex_files_, &class_path_files_);
-      ScopedObjectAccess soa(self);
-
-      // Classpath: first the class-path given.
-      std::vector<const DexFile*> class_path_files = MakeNonOwningPointerVector(class_path_files_);
-
-      // Store the classpath we have right now.
-      key_value_store_->Put(OatHeader::kClassPathKey,
-                            OatFile::EncodeDexFileDependencies(class_path_files));
-
-      // Then the dex files we'll compile. Thus we'll resolve the class-path first.
-      class_path_files.insert(class_path_files.end(), dex_files_.begin(), dex_files_.end());
-
-      class_loader = class_linker->CreatePathClassLoader(self, class_path_files);
-    }
-
     // Find the dex file we should not inline from.
 
     // For now, on the host always have core-oj removed.
@@ -1453,49 +1494,6 @@
       }
     }
 
-    if (IsBootImage() && image_filenames_.size() > 1) {
-      // If we're compiling the boot image, store the boot classpath into the Key-Value store. If
-      // the image filename was adapted (e.g., for our tests), we need to change this here, too, but
-      // need to strip all path components (they will be re-established when loading).
-      // We need this for the multi-image case.
-      std::ostringstream bootcp_oss;
-      bool first_bootcp = true;
-      for (size_t i = 0; i < dex_locations_.size(); ++i) {
-        if (!first_bootcp) {
-          bootcp_oss << ":";
-        }
-
-        std::string dex_loc = dex_locations_[i];
-        std::string image_filename = image_filenames_[i];
-
-        // Use the dex_loc path, but the image_filename name (without path elements).
-        size_t dex_last_slash = dex_loc.rfind('/');
-
-        // npos is max(size_t). That makes this a bit ugly.
-        size_t image_last_slash = image_filename.rfind('/');
-        size_t image_last_at = image_filename.rfind('@');
-        size_t image_last_sep = (image_last_slash == std::string::npos)
-                                    ? image_last_at
-                                    : (image_last_at == std::string::npos)
-                                          ? std::string::npos
-                                          : std::max(image_last_slash, image_last_at);
-        // Note: whenever image_last_sep == npos, +1 overflow means using the full string.
-
-        if (dex_last_slash == std::string::npos) {
-          dex_loc = image_filename.substr(image_last_sep + 1);
-        } else {
-          dex_loc = dex_loc.substr(0, dex_last_slash + 1) +
-              image_filename.substr(image_last_sep + 1);
-        }
-
-        // Image filenames already end with .art, no need to replace.
-
-        bootcp_oss << dex_loc;
-        first_bootcp = false;
-      }
-      key_value_store_->Put(OatHeader::kBootClassPath, bootcp_oss.str());
-    }
-
     driver_.reset(new CompilerDriver(compiler_options_.get(),
                                      verification_results_.get(),
                                      &method_inliner_map_,
@@ -1509,14 +1507,12 @@
                                      thread_count_,
                                      dump_stats_,
                                      dump_passes_,
-                                     dump_cfg_file_name_,
-                                     dump_cfg_append_,
                                      compiler_phases_timings_.get(),
                                      swap_fd_,
                                      &dex_file_oat_filename_map_,
                                      profile_compilation_info_.get()));
     driver_->SetDexFilesForOatFile(dex_files_);
-    driver_->CompileAll(class_loader, dex_files_, timings_);
+    driver_->CompileAll(class_loader_, dex_files_, timings_);
   }
 
   // Notes on the interleaving of creating the images and oat files to
@@ -1584,19 +1580,18 @@
   // ImageWriter, if necessary.
   // Note: Flushing (and closing) the file is the caller's responsibility, except for the failure
   //       case (when the file will be explicitly erased).
-  bool CreateOatFiles() {
-    CHECK(key_value_store_.get() != nullptr);
-
+  bool WriteOatFiles() {
     TimingLogger::ScopedTiming t("dex2oat Oat", timings_);
 
-    std::vector<std::unique_ptr<OatWriter>> oat_writers;
-    {
-      TimingLogger::ScopedTiming t2("dex2oat OatWriter", timings_);
-      std::string image_file_location;
-      uint32_t image_file_location_oat_checksum = 0;
-      uintptr_t image_file_location_oat_data_begin = 0;
-      int32_t image_patch_delta = 0;
+    // Sync the data to the file, in case we did dex2dex transformations.
+    for (const std::unique_ptr<MemMap>& map : opened_dex_files_maps_) {
+      if (!map->Sync()) {
+        PLOG(ERROR) << "Failed to Sync() dex2dex output. Map: " << map->GetName();
+        return false;
+      }
+    }
 
+    if (IsImage()) {
       if (app_image_ && image_base_ == 0) {
         std::vector<gc::space::ImageSpace*> image_spaces =
             Runtime::Current()->GetHeap()->GetBootImageSpaces();
@@ -1608,47 +1603,15 @@
         VLOG(compiler) << "App image base=" << reinterpret_cast<void*>(image_base_);
       }
 
-      if (IsImage()) {
-        PrepareImageWriter(image_base_);
-      }
+      image_writer_.reset(new ImageWriter(*driver_,
+                                          image_base_,
+                                          compiler_options_->GetCompilePic(),
+                                          IsAppImage(),
+                                          image_storage_mode_,
+                                          oat_filenames_,
+                                          dex_file_oat_filename_map_));
 
-      if (!IsBootImage()) {
-        TimingLogger::ScopedTiming t3("Loading image checksum", timings_);
-        std::vector<gc::space::ImageSpace*> image_spaces =
-            Runtime::Current()->GetHeap()->GetBootImageSpaces();
-        image_file_location_oat_checksum = image_spaces[0]->GetImageHeader().GetOatChecksum();
-        image_file_location_oat_data_begin =
-            reinterpret_cast<uintptr_t>(image_spaces[0]->GetImageHeader().GetOatDataBegin());
-        image_patch_delta = image_spaces[0]->GetImageHeader().GetPatchDelta();
-        std::vector<std::string> image_filenames;
-        for (const gc::space::ImageSpace* image_space : image_spaces) {
-          image_filenames.push_back(image_space->GetImageFilename());
-        }
-        image_file_location = Join(image_filenames, ':');
-      }
-
-      if (!image_file_location.empty()) {
-        key_value_store_->Put(OatHeader::kImageLocationKey, image_file_location);
-      }
-
-      for (size_t i = 0; i < oat_files_.size(); ++i) {
-        std::vector<const DexFile*>& dex_files = dex_files_per_oat_file_[i];
-        std::unique_ptr<OatWriter> oat_writer(new OatWriter(dex_files,
-                                                            image_file_location_oat_checksum,
-                                                            image_file_location_oat_data_begin,
-                                                            image_patch_delta,
-                                                            driver_.get(),
-                                                            image_writer_.get(),
-                                                            IsBootImage(),
-                                                            timings_,
-                                                            key_value_store_.get()));
-        oat_writers.push_back(std::move(oat_writer));
-      }
-    }
-
-    if (IsImage()) {
-      // The OatWriter constructor has already updated offsets in methods and we need to
-      // prepare method offsets in the image address space for direct method patching.
+      // We need to prepare method offsets in the image address space for direct method patching.
       TimingLogger::ScopedTiming t2("dex2oat Prepare image address space", timings_);
       if (!image_writer_->PrepareImageAddressSpace()) {
         LOG(ERROR) << "Failed to prepare image address space.";
@@ -1658,20 +1621,22 @@
 
     {
       TimingLogger::ScopedTiming t2("dex2oat Write ELF", timings_);
-      for (size_t i = 0; i < oat_files_.size(); ++i) {
+      for (size_t i = 0, size = oat_files_.size(); i != size; ++i) {
         std::unique_ptr<File>& oat_file = oat_files_[i];
-        std::unique_ptr<OatWriter>& oat_writer = oat_writers[i];
-        std::unique_ptr<ElfWriter> elf_writer =
-            CreateElfWriterQuick(instruction_set_, compiler_options_.get(), oat_file.get());
+        std::unique_ptr<ElfWriter>& elf_writer = elf_writers_[i];
+        std::unique_ptr<OatWriter>& oat_writer = oat_writers_[i];
 
-        elf_writer->Start();
+        std::vector<const DexFile*>& dex_files = dex_files_per_oat_file_[i];
+        oat_writer->PrepareLayout(driver_.get(), image_writer_.get(), dex_files);
 
-        OutputStream* rodata = elf_writer->StartRoData();
+        OutputStream*& rodata = rodata_[i];
+        DCHECK(rodata != nullptr);
         if (!oat_writer->WriteRodata(rodata)) {
           LOG(ERROR) << "Failed to write .rodata section to the ELF file " << oat_file->GetPath();
           return false;
         }
         elf_writer->EndRoData(rodata);
+        rodata = nullptr;
 
         OutputStream* text = elf_writer->StartText();
         if (!oat_writer->WriteCode(text)) {
@@ -1680,6 +1645,14 @@
         }
         elf_writer->EndText(text);
 
+        if (!oat_writer->WriteHeader(elf_writer->GetStream(),
+                                     image_file_location_oat_checksum_,
+                                     image_file_location_oat_data_begin_,
+                                     image_patch_delta_)) {
+          LOG(ERROR) << "Failed to write oat header to the ELF file " << oat_file->GetPath();
+          return false;
+        }
+
         elf_writer->SetBssSize(oat_writer->GetBssSize());
         elf_writer->WriteDynamicSection();
         elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo());
@@ -1705,6 +1678,9 @@
         }
 
         VLOG(compiler) << "Oat file written successfully: " << oat_filenames_[i];
+
+        oat_writer.reset();
+        elf_writer.reset();
       }
     }
 
@@ -1822,17 +1798,27 @@
   }
 
   bool UseProfileGuidedCompilation() const {
-    return !profile_files_.empty();
+    return !profile_files_.empty() || !profile_files_fd_.empty();
   }
 
   bool ProcessProfiles() {
     DCHECK(UseProfileGuidedCompilation());
     ProfileCompilationInfo* info = nullptr;
-    if (ProfileAssistant::ProcessProfiles(profile_files_, reference_profile_files_, &info)) {
-      profile_compilation_info_.reset(info);
-      return true;
+    bool result = false;
+    if (profile_files_.empty()) {
+      DCHECK(!profile_files_fd_.empty());
+      result = ProfileAssistant::ProcessProfiles(
+          profile_files_fd_, reference_profile_files_fd_, &info);
+      CloseAllFds(profile_files_fd_, "profile_files_fd_");
+      CloseAllFds(reference_profile_files_fd_, "reference_profile_files_fd_");
+    } else {
+      result = ProfileAssistant::ProcessProfiles(
+          profile_files_, reference_profile_files_, &info);
     }
-    return false;
+
+    profile_compilation_info_.reset(info);
+
+    return result;
   }
 
   bool ShouldCompileBasedOnProfiles() const {
@@ -1852,65 +1838,78 @@
     return result;
   }
 
-  static size_t OpenDexFiles(std::vector<const char*>& dex_filenames,
-                             std::vector<const char*>& dex_locations,
-                             std::vector<std::unique_ptr<const DexFile>>* dex_files) {
-    DCHECK(dex_files != nullptr) << "OpenDexFiles out-param is nullptr";
-    size_t failure_count = 0;
-    for (size_t i = 0; i < dex_filenames.size(); i++) {
-      const char* dex_filename = dex_filenames[i];
-      const char* dex_location = dex_locations[i];
-      ATRACE_BEGIN(StringPrintf("Opening dex file '%s'", dex_filenames[i]).c_str());
-      std::string error_msg;
-      if (!OS::FileExists(dex_filename)) {
-        LOG(WARNING) << "Skipping non-existent dex file '" << dex_filename << "'";
-        dex_filenames.erase(dex_filenames.begin() + i);
-        dex_locations.erase(dex_locations.begin() + i);
-        i--;
-        continue;
+  std::string GetMultiImageBootClassPath() {
+    DCHECK(IsBootImage());
+    DCHECK_GT(oat_filenames_.size(), 1u);
+    // If the image filename was adapted (e.g., for our tests), we need to change this here,
+    // too, but need to strip all path components (they will be re-established when loading).
+    std::ostringstream bootcp_oss;
+    bool first_bootcp = true;
+    for (size_t i = 0; i < dex_locations_.size(); ++i) {
+      if (!first_bootcp) {
+        bootcp_oss << ":";
       }
-      if (!DexFile::Open(dex_filename, dex_location, &error_msg, dex_files)) {
-        LOG(WARNING) << "Failed to open .dex from file '" << dex_filename << "': " << error_msg;
-        ++failure_count;
+
+      std::string dex_loc = dex_locations_[i];
+      std::string image_filename = image_filenames_[i];
+
+      // Use the dex_loc path, but the image_filename name (without path elements).
+      size_t dex_last_slash = dex_loc.rfind('/');
+
+      // npos is max(size_t). That makes this a bit ugly.
+      size_t image_last_slash = image_filename.rfind('/');
+      size_t image_last_at = image_filename.rfind('@');
+      size_t image_last_sep = (image_last_slash == std::string::npos)
+                                  ? image_last_at
+                                  : (image_last_at == std::string::npos)
+                                        ? std::string::npos
+                                        : std::max(image_last_slash, image_last_at);
+      // Note: whenever image_last_sep == npos, +1 overflow means using the full string.
+
+      if (dex_last_slash == std::string::npos) {
+        dex_loc = image_filename.substr(image_last_sep + 1);
+      } else {
+        dex_loc = dex_loc.substr(0, dex_last_slash + 1) +
+            image_filename.substr(image_last_sep + 1);
       }
-      ATRACE_END();
+
+      // Image filenames already end with .art, no need to replace.
+
+      bootcp_oss << dex_loc;
+      first_bootcp = false;
     }
-    return failure_count;
+    return bootcp_oss.str();
   }
 
-  // Returns true if dex_files has a dex with the named location. We compare canonical locations,
-  // so that relative and absolute paths will match. Not caching for the dex_files isn't very
-  // efficient, but under normal circumstances the list is neither large nor is this part too
-  // sensitive.
-  static bool DexFilesContains(const std::vector<const DexFile*>& dex_files,
-                               const std::string& location) {
-    std::string canonical_location(DexFile::GetDexCanonicalLocation(location.c_str()));
-    for (size_t i = 0; i < dex_files.size(); ++i) {
-      if (DexFile::GetDexCanonicalLocation(dex_files[i]->GetLocation().c_str()) ==
-          canonical_location) {
-        return true;
-      }
+  std::vector<std::string> GetClassPathLocations(const std::string& class_path) {
+    // This function is used only for apps and for an app we have exactly one oat file.
+    DCHECK(!IsBootImage());
+    DCHECK_EQ(oat_writers_.size(), 1u);
+    std::vector<std::string> dex_files_canonical_locations;
+    for (const char* location : oat_writers_[0]->GetSourceLocations()) {
+      dex_files_canonical_locations.push_back(DexFile::GetDexCanonicalLocation(location));
     }
-    return false;
-  }
 
-  // Appends to opened_dex_files any elements of class_path that dex_files
-  // doesn't already contain. This will open those dex files as necessary.
-  static void OpenClassPathFiles(const std::string& class_path,
-                                 std::vector<const DexFile*> dex_files,
-                                 std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
-    DCHECK(opened_dex_files != nullptr) << "OpenClassPathFiles out-param is nullptr";
     std::vector<std::string> parsed;
     Split(class_path, ':', &parsed);
-    // Take Locks::mutator_lock_ so that lock ordering on the ClassLinker::dex_lock_ is maintained.
-    ScopedObjectAccess soa(Thread::Current());
-    for (size_t i = 0; i < parsed.size(); ++i) {
-      if (DexFilesContains(dex_files, parsed[i])) {
-        continue;
-      }
+    auto kept_it = std::remove_if(parsed.begin(),
+                                  parsed.end(),
+                                  [dex_files_canonical_locations](const std::string& location) {
+      return ContainsElement(dex_files_canonical_locations,
+                             DexFile::GetDexCanonicalLocation(location.c_str()));
+    });
+    parsed.erase(kept_it, parsed.end());
+    return parsed;
+  }
+
+  // Opens requested class path files and appends them to opened_dex_files.
+  static void OpenClassPathFiles(const std::vector<std::string>& class_path_locations,
+                                 std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
+    DCHECK(opened_dex_files != nullptr) << "OpenClassPathFiles out-param is nullptr";
+    for (const std::string& location : class_path_locations) {
       std::string error_msg;
-      if (!DexFile::Open(parsed[i].c_str(), parsed[i].c_str(), &error_msg, opened_dex_files)) {
-        LOG(WARNING) << "Failed to open dex file '" << parsed[i] << "': " << error_msg;
+      if (!DexFile::Open(location.c_str(), location.c_str(), &error_msg, opened_dex_files)) {
+        LOG(WARNING) << "Failed to open dex file '" << location << "': " << error_msg;
       }
     }
   }
@@ -1985,6 +1984,63 @@
     return true;
   }
 
+  void PruneNonExistentDexFiles() {
+    DCHECK_EQ(dex_filenames_.size(), dex_locations_.size());
+    size_t kept = 0u;
+    for (size_t i = 0, size = dex_filenames_.size(); i != size; ++i) {
+      if (!OS::FileExists(dex_filenames_[i])) {
+        LOG(WARNING) << "Skipping non-existent dex file '" << dex_filenames_[i] << "'";
+      } else {
+        dex_filenames_[kept] = dex_filenames_[i];
+        dex_locations_[kept] = dex_locations_[i];
+        ++kept;
+      }
+    }
+    dex_filenames_.resize(kept);
+    dex_locations_.resize(kept);
+  }
+
+  bool AddDexFileSources() {
+    TimingLogger::ScopedTiming t2("AddDexFileSources", timings_);
+    if (zip_fd_ != -1) {
+      DCHECK_EQ(oat_writers_.size(), 1u);
+      if (!oat_writers_[0]->AddZippedDexFilesSource(ScopedFd(zip_fd_), zip_location_.c_str())) {
+        return false;
+      }
+    } else if (oat_writers_.size() > 1u) {
+      // Multi-image.
+      DCHECK_EQ(oat_writers_.size(), dex_filenames_.size());
+      DCHECK_EQ(oat_writers_.size(), dex_locations_.size());
+      for (size_t i = 0, size = oat_writers_.size(); i != size; ++i) {
+        if (!oat_writers_[i]->AddDexFileSource(dex_filenames_[i], dex_locations_[i])) {
+          return false;
+        }
+      }
+    } else {
+      DCHECK_EQ(oat_writers_.size(), 1u);
+      DCHECK_EQ(dex_filenames_.size(), dex_locations_.size());
+      DCHECK_NE(dex_filenames_.size(), 0u);
+      for (size_t i = 0; i != dex_filenames_.size(); ++i) {
+        if (!oat_writers_[0]->AddDexFileSource(dex_filenames_[i], dex_locations_[i])) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  void CreateOatWriters() {
+    TimingLogger::ScopedTiming t2("CreateOatWriters", timings_);
+    elf_writers_.reserve(oat_files_.size());
+    oat_writers_.reserve(oat_files_.size());
+    for (const std::unique_ptr<File>& oat_file : oat_files_) {
+      elf_writers_.emplace_back(
+          CreateElfWriterQuick(instruction_set_, compiler_options_.get(), oat_file.get()));
+      elf_writers_.back()->Start();
+      oat_writers_.emplace_back(new OatWriter(IsBootImage(), timings_));
+    }
+  }
+
   void SaveDexInput() {
     for (size_t i = 0; i < dex_files_.size(); ++i) {
       const DexFile* dex_file = dex_files_[i];
@@ -2035,6 +2091,10 @@
     // Disable libsigchain. We don't don't need it during compilation and it prevents us
     // from getting a statically linked version of dex2oat (because of dlsym and RTLD_NEXT).
     raw_options.push_back(std::make_pair("-Xno-sig-chain", nullptr));
+    // Disable Hspace compaction to save heap size virtual space.
+    // Only need disable Hspace for OOM becasue background collector is equal to
+    // foreground collector by default for dex2oat.
+    raw_options.push_back(std::make_pair("-XX:DisableHSpaceCompactForOOM", nullptr));
 
     if (!Runtime::ParseOptions(raw_options, false, runtime_options)) {
       LOG(ERROR) << "Failed to parse runtime options";
@@ -2044,8 +2104,8 @@
   }
 
   // Create a runtime necessary for compilation.
-  bool CreateRuntime(RuntimeArgumentMap&& runtime_options)
-      SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_) {
+  bool CreateRuntime(RuntimeArgumentMap&& runtime_options) {
+    TimingLogger::ScopedTiming t_runtime("Create runtime", timings_);
     if (!Runtime::Create(std::move(runtime_options))) {
       LOG(ERROR) << "Failed to create runtime";
       return false;
@@ -2066,18 +2126,12 @@
 
     runtime_->GetClassLinker()->RunRootClinits();
 
-    return true;
-  }
+    // Runtime::Create acquired the mutator_lock_ that is normally given away when we
+    // Runtime::Start, give it away now so that we don't starve GC.
+    Thread* self = Thread::Current();
+    self->TransitionFromRunnableToSuspended(kNative);
 
-  void PrepareImageWriter(uintptr_t image_base) {
-    DCHECK(IsImage());
-    image_writer_.reset(new ImageWriter(*driver_,
-                                        image_base,
-                                        compiler_options_->GetCompilePic(),
-                                        IsAppImage(),
-                                        image_storage_mode_,
-                                        oat_filenames_,
-                                        dex_file_oat_filename_map_));
+    return true;
   }
 
   // Let the ImageWriter write the image files. If we do not compile PIC, also fix up the oat files.
@@ -2256,6 +2310,9 @@
   InstructionSet instruction_set_;
   std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
 
+  uint32_t image_file_location_oat_checksum_;
+  uintptr_t image_file_location_oat_data_begin_;
+  int32_t image_patch_delta_;
   std::unique_ptr<SafeMap<std::string, std::string> > key_value_store_;
 
   std::unique_ptr<VerificationResults> verification_results_;
@@ -2263,11 +2320,11 @@
   DexFileToMethodInlinerMap method_inliner_map_;
   std::unique_ptr<QuickCompilerCallbacks> callbacks_;
 
+  std::unique_ptr<Runtime> runtime_;
+
   // Ownership for the class path files.
   std::vector<std::unique_ptr<const DexFile>> class_path_files_;
 
-  std::unique_ptr<Runtime> runtime_;
-
   size_t thread_count_;
   uint64_t start_ns_;
   std::unique_ptr<WatchDog> watchdog_;
@@ -2302,24 +2359,30 @@
   std::vector<const DexFile*> dex_files_;
   std::string no_inline_from_string_;
   std::vector<jobject> dex_caches_;
-  std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
+  jobject class_loader_;
 
+  std::vector<std::unique_ptr<ElfWriter>> elf_writers_;
+  std::vector<std::unique_ptr<OatWriter>> oat_writers_;
+  std::vector<OutputStream*> rodata_;
   std::unique_ptr<ImageWriter> image_writer_;
   std::unique_ptr<CompilerDriver> driver_;
 
+  std::vector<std::unique_ptr<MemMap>> opened_dex_files_maps_;
+  std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
+
   std::vector<std::string> verbose_methods_;
   bool dump_stats_;
   bool dump_passes_;
   bool dump_timing_;
   bool dump_slow_timing_;
-  std::string dump_cfg_file_name_;
-  bool dump_cfg_append_;
   std::string swap_file_name_;
   int swap_fd_;
   std::string app_image_file_name_;
   int app_image_fd_;
   std::vector<std::string> profile_files_;
   std::vector<std::string> reference_profile_files_;
+  std::vector<uint32_t> profile_files_fd_;
+  std::vector<uint32_t> reference_profile_files_fd_;
   std::unique_ptr<ProfileCompilationInfo> profile_compilation_info_;
   TimingLogger* timings_;
   std::unique_ptr<CumulativeLogger> compiler_phases_timings_;
@@ -2355,7 +2418,7 @@
 static int CompileImage(Dex2Oat& dex2oat) {
   dex2oat.Compile();
 
-  if (!dex2oat.CreateOatFiles()) {
+  if (!dex2oat.WriteOatFiles()) {
     dex2oat.EraseOatFiles();
     return EXIT_FAILURE;
   }
@@ -2394,7 +2457,7 @@
 static int CompileApp(Dex2Oat& dex2oat) {
   dex2oat.Compile();
 
-  if (!dex2oat.CreateOatFiles()) {
+  if (!dex2oat.WriteOatFiles()) {
     dex2oat.EraseOatFiles();
     return EXIT_FAILURE;
   }
@@ -2451,6 +2514,11 @@
     }
   }
 
+  // Check early that the result of compilation can be written
+  if (!dex2oat.OpenFile()) {
+    return EXIT_FAILURE;
+  }
+
   // Print the complete line when any of the following is true:
   //   1) Debug build
   //   2) Compiling an image
@@ -2464,11 +2532,6 @@
   }
 
   if (!dex2oat.Setup()) {
-    return EXIT_FAILURE;
-  }
-
-  // Check early that the result of compilation can be written
-  if (!dex2oat.OpenFile()) {
     dex2oat.EraseOatFiles();
     return EXIT_FAILURE;
   }
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index d4bef0f..1f74c93 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -938,6 +938,11 @@
         has_modrm = true;
         load = true;
         break;
+      case 0xB8:
+        opcode1 = "popcnt";
+        has_modrm = true;
+        load = true;
+        break;
       case 0xBE:
         opcode1 = "movsxb";
         has_modrm = true;
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 04645d1..7bf6d21 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -274,7 +274,6 @@
   arch/arm64/fault_handler_arm64.cc
 
 LIBART_SRC_FILES_x86 := \
-  interpreter/mterp/mterp_stub.cc \
   arch/x86/context_x86.cc \
   arch/x86/entrypoints_init_x86.cc \
   arch/x86/jni_entrypoints_x86.S \
@@ -286,6 +285,20 @@
 LIBART_TARGET_SRC_FILES_x86 := \
   $(LIBART_SRC_FILES_x86)
 
+# Darwin uses non-standard x86 assembly syntax.  Don't build x86 Darwin host mterp there.
+ifeq ($(HOST_OS),darwin)
+  LIBART_SRC_FILES_x86 += \
+    interpreter/mterp/mterp_stub.cc
+else
+  LIBART_SRC_FILES_x86 += \
+    interpreter/mterp/mterp.cc \
+    interpreter/mterp/out/mterp_x86.S
+endif
+# But do support x86 mterp for target build regardless of host platform.
+LIBART_TARGET_SRC_FILES_x86 += \
+  interpreter/mterp/mterp.cc \
+  interpreter/mterp/out/mterp_x86.S
+
 # Note that the fault_handler_x86.cc is not a mistake.  This file is
 # shared between the x86 and x86_64 architectures.
 LIBART_SRC_FILES_x86_64 := \
diff --git a/runtime/arch/x86/instruction_set_features_x86.cc b/runtime/arch/x86/instruction_set_features_x86.cc
index 42f5df4..b97a8db 100644
--- a/runtime/arch/x86/instruction_set_features_x86.cc
+++ b/runtime/arch/x86/instruction_set_features_x86.cc
@@ -50,6 +50,10 @@
     "silvermont",
 };
 
+static constexpr const char* x86_variants_with_popcnt[] = {
+    "silvermont",
+};
+
 const X86InstructionSetFeatures* X86InstructionSetFeatures::FromVariant(
     const std::string& variant, std::string* error_msg ATTRIBUTE_UNUSED,
     bool x86_64) {
@@ -69,6 +73,11 @@
                                                arraysize(x86_variants_prefer_locked_add_sync),
                                                variant);
 
+  bool has_POPCNT = FindVariantInArray(x86_variants_with_popcnt,
+                                       arraysize(x86_variants_with_popcnt),
+                                       variant);
+
+  // Verify that variant is known.
   bool known_variant = FindVariantInArray(x86_known_variants, arraysize(x86_known_variants),
                                           variant);
   if (!known_variant && variant != "default") {
@@ -77,10 +86,10 @@
 
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add);
+                                            has_AVX2, prefers_locked_add, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add);
+                                            has_AVX2, prefers_locked_add, has_POPCNT);
   }
 }
 
@@ -93,12 +102,15 @@
   bool has_AVX = (bitmap & kAvxBitfield) != 0;
   bool has_AVX2 = (bitmap & kAvxBitfield) != 0;
   bool prefers_locked_add = (bitmap & kPrefersLockedAdd) != 0;
+  bool has_POPCNT = (bitmap & kPopCntBitfield) != 0;
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
-                                                has_AVX, has_AVX2, prefers_locked_add);
+                                            has_AVX, has_AVX2, prefers_locked_add,
+                                            has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
-                                             has_AVX, has_AVX2, prefers_locked_add);
+                                         has_AVX, has_AVX2, prefers_locked_add,
+                                         has_POPCNT);
   }
 }
 
@@ -138,12 +150,18 @@
   // No #define for memory synchronization preference.
   const bool prefers_locked_add = false;
 
+#ifndef __POPCNT__
+  const bool has_POPCNT = false;
+#else
+  const bool has_POPCNT = true;
+#endif
+
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                                has_AVX2, prefers_locked_add);
+                                            has_AVX2, prefers_locked_add, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add);
+                                         has_AVX2, prefers_locked_add, has_POPCNT);
   }
 }
 
@@ -158,6 +176,7 @@
   bool has_AVX2 = false;
   // No cpuinfo for memory synchronization preference.
   const bool prefers_locked_add = false;
+  bool has_POPCNT = false;
 
   std::ifstream in("/proc/cpuinfo");
   if (!in.fail()) {
@@ -183,6 +202,9 @@
           if (line.find("avx2") != std::string::npos) {
             has_AVX2 = true;
           }
+          if (line.find("popcnt") != std::string::npos) {
+            has_POPCNT = true;
+          }
         } else if (line.find("processor") != std::string::npos &&
             line.find(": 1") != std::string::npos) {
           smp = true;
@@ -195,10 +217,10 @@
   }
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                                has_AVX2, prefers_locked_add);
+                                            has_AVX2, prefers_locked_add, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add);
+                                         has_AVX2, prefers_locked_add, has_POPCNT);
   }
 }
 
@@ -223,7 +245,8 @@
       (has_SSE4_2_ == other_as_x86->has_SSE4_2_) &&
       (has_AVX_ == other_as_x86->has_AVX_) &&
       (has_AVX2_ == other_as_x86->has_AVX2_) &&
-      (prefers_locked_add_ == other_as_x86->prefers_locked_add_);
+      (prefers_locked_add_ == other_as_x86->prefers_locked_add_) &&
+      (has_POPCNT_ == other_as_x86->has_POPCNT_);
 }
 
 uint32_t X86InstructionSetFeatures::AsBitmap() const {
@@ -233,7 +256,8 @@
       (has_SSE4_2_ ? kSse4_2Bitfield : 0) |
       (has_AVX_ ? kAvxBitfield : 0) |
       (has_AVX2_ ? kAvx2Bitfield : 0) |
-      (prefers_locked_add_ ? kPrefersLockedAdd : 0);
+      (prefers_locked_add_ ? kPrefersLockedAdd : 0) |
+      (has_POPCNT_ ? kPopCntBitfield : 0);
 }
 
 std::string X86InstructionSetFeatures::GetFeatureString() const {
@@ -273,6 +297,11 @@
   } else {
     result += ",-lock_add";
   }
+  if (has_POPCNT_) {
+    result += ",popcnt";
+  } else {
+    result += ",-popcnt";
+  }
   return result;
 }
 
@@ -285,6 +314,7 @@
   bool has_AVX = has_AVX_;
   bool has_AVX2 = has_AVX2_;
   bool prefers_locked_add = prefers_locked_add_;
+  bool has_POPCNT = has_POPCNT_;
   for (auto i = features.begin(); i != features.end(); i++) {
     std::string feature = Trim(*i);
     if (feature == "ssse3") {
@@ -311,6 +341,10 @@
       prefers_locked_add = true;
     } else if (feature == "-lock_add") {
       prefers_locked_add = false;
+    } else if (feature == "popcnt") {
+      has_POPCNT = true;
+    } else if (feature == "-popcnt") {
+      has_POPCNT = false;
     } else {
       *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str());
       return nullptr;
@@ -318,10 +352,10 @@
   }
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                                has_AVX2, prefers_locked_add);
+                                            has_AVX2, prefers_locked_add, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add);
+                                         has_AVX2, prefers_locked_add, has_POPCNT);
   }
 }
 
diff --git a/runtime/arch/x86/instruction_set_features_x86.h b/runtime/arch/x86/instruction_set_features_x86.h
index 2b845f8..1819654 100644
--- a/runtime/arch/x86/instruction_set_features_x86.h
+++ b/runtime/arch/x86/instruction_set_features_x86.h
@@ -62,6 +62,8 @@
 
   bool PrefersLockedAddSynchronization() const { return prefers_locked_add_; }
 
+  bool HasPopCnt() const { return has_POPCNT_; }
+
  protected:
   // Parse a string of the form "ssse3" adding these to a new InstructionSetFeatures.
   virtual const InstructionSetFeatures*
@@ -75,10 +77,17 @@
                                  bool x86_64, std::string* error_msg) const;
 
   X86InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
-                            bool has_AVX, bool has_AVX2, bool prefers_locked_add)
-      : InstructionSetFeatures(smp), has_SSSE3_(has_SSSE3), has_SSE4_1_(has_SSE4_1),
-        has_SSE4_2_(has_SSE4_2), has_AVX_(has_AVX), has_AVX2_(has_AVX2),
-        prefers_locked_add_(prefers_locked_add) {
+                            bool has_AVX, bool has_AVX2,
+                            bool prefers_locked_add,
+                            bool has_POPCNT)
+      : InstructionSetFeatures(smp),
+        has_SSSE3_(has_SSSE3),
+        has_SSE4_1_(has_SSE4_1),
+        has_SSE4_2_(has_SSE4_2),
+        has_AVX_(has_AVX),
+        has_AVX2_(has_AVX2),
+        prefers_locked_add_(prefers_locked_add),
+        has_POPCNT_(has_POPCNT) {
   }
 
  private:
@@ -91,6 +100,7 @@
     kAvxBitfield = 16,
     kAvx2Bitfield = 32,
     kPrefersLockedAdd = 64,
+    kPopCntBitfield = 128,
   };
 
   const bool has_SSSE3_;   // x86 128bit SIMD - Supplemental SSE.
@@ -99,6 +109,7 @@
   const bool has_AVX_;     // x86 256bit SIMD AVX.
   const bool has_AVX2_;    // x86 256bit SIMD AVX 2.0.
   const bool prefers_locked_add_;  // x86 use locked add for memory synchronization.
+  const bool has_POPCNT_;  // x86 population count
 
   DISALLOW_COPY_AND_ASSIGN(X86InstructionSetFeatures);
 };
diff --git a/runtime/arch/x86/instruction_set_features_x86_test.cc b/runtime/arch/x86/instruction_set_features_x86_test.cc
index e8d01e6..a062c12 100644
--- a/runtime/arch/x86/instruction_set_features_x86_test.cc
+++ b/runtime/arch/x86/instruction_set_features_x86_test.cc
@@ -27,7 +27,7 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
                x86_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_features->AsBitmap(), 1U);
 }
@@ -40,7 +40,7 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add",
+  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add,-popcnt",
                x86_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_features->AsBitmap(), 67U);
 
@@ -50,7 +50,7 @@
   ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
                x86_default_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
 
@@ -60,7 +60,7 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add",
+  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add,-popcnt",
                x86_64_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_64_features->AsBitmap(), 67U);
 
@@ -77,9 +77,9 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add",
+  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add,popcnt",
                x86_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_features->AsBitmap(), 79U);
+  EXPECT_EQ(x86_features->AsBitmap(), 207U);
 
   // Build features for a 32-bit x86 default processor.
   std::unique_ptr<const InstructionSetFeatures> x86_default_features(
@@ -87,7 +87,7 @@
   ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
                x86_default_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
 
@@ -97,9 +97,9 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add",
+  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add,popcnt",
                x86_64_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_64_features->AsBitmap(), 79U);
+  EXPECT_EQ(x86_64_features->AsBitmap(), 207U);
 
   EXPECT_FALSE(x86_64_features->Equals(x86_features.get()));
   EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get()));
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64.h b/runtime/arch/x86_64/instruction_set_features_x86_64.h
index b8000d0..aba7234 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64.h
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64.h
@@ -74,9 +74,10 @@
 
  private:
   X86_64InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
-                               bool has_AVX, bool has_AVX2, bool prefers_locked_add)
+                               bool has_AVX, bool has_AVX2, bool prefers_locked_add,
+                               bool has_POPCNT)
       : X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                  has_AVX2, prefers_locked_add) {
+                                  has_AVX2, prefers_locked_add, has_POPCNT) {
   }
 
   friend class X86InstructionSetFeatures;
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
index 4562c64..78aeacf 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
@@ -27,7 +27,7 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
                x86_64_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_64_features->AsBitmap(), 1U);
 }
diff --git a/runtime/base/scoped_flock.cc b/runtime/base/scoped_flock.cc
index 71e0590..814cbd0 100644
--- a/runtime/base/scoped_flock.cc
+++ b/runtime/base/scoped_flock.cc
@@ -26,16 +26,25 @@
 namespace art {
 
 bool ScopedFlock::Init(const char* filename, std::string* error_msg) {
+  return Init(filename, O_CREAT | O_RDWR, true, error_msg);
+}
+
+bool ScopedFlock::Init(const char* filename, int flags, bool block, std::string* error_msg) {
   while (true) {
     if (file_.get() != nullptr) {
       UNUSED(file_->FlushCloseOrErase());  // Ignore result.
     }
-    file_.reset(OS::OpenFileWithFlags(filename, O_CREAT | O_RDWR));
+    file_.reset(OS::OpenFileWithFlags(filename, flags));
     if (file_.get() == nullptr) {
       *error_msg = StringPrintf("Failed to open file '%s': %s", filename, strerror(errno));
       return false;
     }
-    int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_EX));
+    int operation = block ? LOCK_EX : (LOCK_EX | LOCK_NB);
+    int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), operation));
+    if (flock_result == EWOULDBLOCK) {
+      // File is locked by someone else and we are required not to block;
+      return false;
+    }
     if (flock_result != 0) {
       *error_msg = StringPrintf("Failed to lock file '%s': %s", filename, strerror(errno));
       return false;
@@ -51,11 +60,23 @@
     if (stat_result != 0) {
       PLOG(WARNING) << "Failed to stat, will retry: " << filename;
       // ENOENT can happen if someone racing with us unlinks the file we created so just retry.
-      continue;
+      if (block) {
+        continue;
+      } else {
+        // Note that in theory we could race with someone here for a long time and end up retrying
+        // over and over again. This potential behavior does not fit well in the non-blocking
+        // semantics. Thus, if we are not require to block return failure when racing.
+        return false;
+      }
     }
     if (fstat_stat.st_dev != stat_stat.st_dev || fstat_stat.st_ino != stat_stat.st_ino) {
       LOG(WARNING) << "File changed while locking, will retry: " << filename;
-      continue;
+      if (block) {
+        continue;
+      } else {
+        // See comment above.
+        return false;
+      }
     }
     return true;
   }
@@ -78,7 +99,7 @@
   return true;
 }
 
-File* ScopedFlock::GetFile() {
+File* ScopedFlock::GetFile() const {
   CHECK(file_.get() != nullptr);
   return file_.get();
 }
diff --git a/runtime/base/scoped_flock.h b/runtime/base/scoped_flock.h
index 08612e3..cc22056 100644
--- a/runtime/base/scoped_flock.h
+++ b/runtime/base/scoped_flock.h
@@ -32,10 +32,15 @@
   // Attempts to acquire an exclusive file lock (see flock(2)) on the file
   // at filename, and blocks until it can do so.
   //
-  // Returns true if the lock could be acquired, or false if an error
-  // occurred. It is an error if the file does not exist, or if its inode
-  // changed (usually due to a new file being created at the same path)
-  // between attempts to lock it.
+  // Returns true if the lock could be acquired, or false if an error occurred.
+  // It is an error if its inode changed (usually due to a new file being
+  // created at the same path) between attempts to lock it. In blocking mode,
+  // locking will be retried if the file changed. In non-blocking mode, false
+  // is returned and no attempt is made to re-acquire the lock.
+  //
+  // The file is opened with the provided flags.
+  bool Init(const char* filename, int flags, bool block, std::string* error_msg);
+  // Calls Init(filename, O_CREAT | O_RDWR, true, errror_msg)
   bool Init(const char* filename, std::string* error_msg);
   // Attempt to acquire an exclusive file lock (see flock(2)) on 'file'.
   // Returns true if the lock could be acquired or false if an error
@@ -43,7 +48,7 @@
   bool Init(File* file, std::string* error_msg);
 
   // Returns the (locked) file associated with this instance.
-  File* GetFile();
+  File* GetFile() const;
 
   // Returns whether a file is held.
   bool HasFile();
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index 78bc3d5..e17bebb 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -316,4 +316,21 @@
   guard_state_ = GuardState::kNoCheck;
 }
 
+bool FdFile::ClearContent() {
+  if (SetLength(0) < 0) {
+    PLOG(art::ERROR) << "Failed to reset the length";
+    return false;
+  }
+  return ResetOffset();
+}
+
+bool FdFile::ResetOffset() {
+  off_t rc =  TEMP_FAILURE_RETRY(lseek(fd_, 0, SEEK_SET));
+  if (rc == static_cast<off_t>(-1)) {
+    PLOG(art::ERROR) << "Failed to reset the offset";
+    return false;
+  }
+  return true;
+}
+
 }  // namespace unix_file
diff --git a/runtime/base/unix_file/fd_file.h b/runtime/base/unix_file/fd_file.h
index 231a1ab..1e2d8af 100644
--- a/runtime/base/unix_file/fd_file.h
+++ b/runtime/base/unix_file/fd_file.h
@@ -79,6 +79,11 @@
 
   // Copy data from another file.
   bool Copy(FdFile* input_file, int64_t offset, int64_t size);
+  // Clears the file content and resets the file offset to 0.
+  // Returns true upon success, false otherwise.
+  bool ClearContent();
+  // Resets the file offset to the beginning of the file.
+  bool ResetOffset();
 
   // This enum is public so that we can define the << operator over it.
   enum class GuardState {
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index e89c5af..3df9101 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -232,7 +232,7 @@
   }
 
   if (founddir.empty()) {
-    ADD_FAILURE() << "Can not find Android tools directory.";
+    ADD_FAILURE() << "Cannot find Android tools directory.";
   }
   return founddir;
 }
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 6e11cf8..a0f875d 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -230,11 +230,11 @@
     Dbg::PostException(exception_object);
   }
 
-  // We only care about how many backward branches were executed in the Jit.
-  void BackwardBranch(Thread* /*thread*/, ArtMethod* method, int32_t dex_pc_offset)
+  // We only care about branches in the Jit.
+  void Branch(Thread* /*thread*/, ArtMethod* method, uint32_t dex_pc, int32_t dex_pc_offset)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
-    LOG(ERROR) << "Unexpected backward branch event in debugger " << PrettyMethod(method)
-               << " " << dex_pc_offset;
+    LOG(ERROR) << "Unexpected branch event in debugger " << PrettyMethod(method)
+               << " " << dex_pc << ", " << dex_pc_offset;
   }
 
   // We only care about invokes in the Jit.
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index bc8ba97..9b93c13 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -687,8 +687,8 @@
   return nullptr;
 }
 
-void DexFile::CreateTypeLookupTable() const {
-  lookup_table_.reset(TypeLookupTable::Create(*this));
+void DexFile::CreateTypeLookupTable(uint8_t* storage) const {
+  lookup_table_.reset(TypeLookupTable::Create(*this, storage));
 }
 
 // Given a signature place the type ids into the given vector
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 8a3db6c..968b37b 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -1094,11 +1094,11 @@
   int32_t GetLineNumFromPC(ArtMethod* method, uint32_t rel_pc) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Returns false if there is no debugging information or if it can not be decoded.
+  // Returns false if there is no debugging information or if it cannot be decoded.
   bool DecodeDebugLocalInfo(const CodeItem* code_item, bool is_static, uint32_t method_idx,
                             DexDebugNewLocalCb local_cb, void* context) const;
 
-  // Returns false if there is no debugging information or if it can not be decoded.
+  // Returns false if there is no debugging information or if it cannot be decoded.
   bool DecodeDebugPositionInfo(const CodeItem* code_item, DexDebugNewPositionCb position_cb,
                                void* context) const;
 
@@ -1157,7 +1157,7 @@
     return lookup_table_.get();
   }
 
-  void CreateTypeLookupTable() const;
+  void CreateTypeLookupTable(uint8_t* storage = nullptr) const;
 
  private:
   // Opens a .dex file
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index 272249c..b67af53 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -686,31 +686,6 @@
 // Set of dex files for interface method tests. As it's not as easy to mutate method names, it's
 // just easier to break up bad cases.
 
-// Interface with an instance constructor.
-//
-// .class public interface LInterfaceMethodFlags;
-// .super Ljava/lang/Object;
-//
-// .method public static constructor <clinit>()V
-// .registers 1
-//     return-void
-// .end method
-//
-// .method public constructor <init>()V
-// .registers 1
-//     return-void
-// .end method
-static const char kMethodFlagsInterfaceWithInit[] =
-    "ZGV4CjAzNQDRNt+hZ6X3I+xe66iVlCW7h9I38HmN4SvUAQAAcAAAAHhWNBIAAAAAAAAAAEwBAAAF"
-    "AAAAcAAAAAMAAACEAAAAAQAAAJAAAAAAAAAAAAAAAAIAAACcAAAAAQAAAKwAAAAIAQAAzAAAAMwA"
-    "AADWAAAA3gAAAPYAAAAKAQAAAgAAAAMAAAAEAAAABAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAQAA"
-    "AAAAAAABAgAAAQAAAAAAAAD/////AAAAADoBAAAAAAAACDxjbGluaXQ+AAY8aW5pdD4AFkxJbnRl"
-    "cmZhY2VNZXRob2RGbGFnczsAEkxqYXZhL2xhbmcvT2JqZWN0OwABVgAAAAAAAAAAAQAAAAAAAAAA"
-    "AAAAAQAAAA4AAAABAAEAAAAAAAAAAAABAAAADgAAAAIAAImABJQCAYGABKgCAAALAAAAAAAAAAEA"
-    "AAAAAAAAAQAAAAUAAABwAAAAAgAAAAMAAACEAAAAAwAAAAEAAACQAAAABQAAAAIAAACcAAAABgAA"
-    "AAEAAACsAAAAAiAAAAUAAADMAAAAAxAAAAEAAAAQAQAAASAAAAIAAAAUAQAAACAAAAEAAAA6AQAA"
-    "ABAAAAEAAABMAQAA";
-
 // Standard interface. Use declared-synchronized again for 3B encoding.
 //
 // .class public interface LInterfaceMethodFlags;
@@ -751,13 +726,6 @@
 }
 
 TEST_F(DexFileVerifierTest, MethodAccessFlagsInterfaces) {
-  // Reject interface with <init>.
-  VerifyModification(
-      kMethodFlagsInterfaceWithInit,
-      "method_flags_interface_with_init",
-      [](DexFile* dex_file ATTRIBUTE_UNUSED) {},
-      "Non-clinit interface method 1 should not have code");
-
   VerifyModification(
       kMethodFlagsInterface,
       "method_flags_interface_ok",
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 9a9f42b..0663b7e 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -193,10 +193,10 @@
       return nullptr;
     }
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    // Pass in false since the object can not be finalizable.
+    // Pass in false since the object cannot be finalizable.
     return klass->Alloc<kInstrumented, false>(self, heap->GetCurrentAllocator());
   }
-  // Pass in false since the object can not be finalizable.
+  // Pass in false since the object cannot be finalizable.
   return klass->Alloc<kInstrumented, false>(self, allocator_type);
 }
 
@@ -207,7 +207,7 @@
                                                       Thread* self,
                                                       gc::AllocatorType allocator_type) {
   DCHECK(klass != nullptr);
-  // Pass in false since the object can not be finalizable.
+  // Pass in false since the object cannot be finalizable.
   return klass->Alloc<kInstrumented, false>(self, allocator_type);
 }
 
@@ -410,10 +410,19 @@
     DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
     return nullptr;  // Failure.
   } else if (UNLIKELY(*this_object == nullptr && type != kStatic)) {
-    // Maintain interpreter-like semantics where NullPointerException is thrown
-    // after potential NoSuchMethodError from class linker.
-    ThrowNullPointerExceptionForMethodAccess(method_idx, type);
-    return nullptr;  // Failure.
+    if (UNLIKELY(resolved_method->GetDeclaringClass()->IsStringClass() &&
+                 resolved_method->IsConstructor())) {
+      // Hack for String init:
+      //
+      // We assume that the input of String.<init> in verified code is always
+      // an unitialized reference. If it is a null constant, it must have been
+      // optimized out by the compiler. Do not throw NullPointerException.
+    } else {
+      // Maintain interpreter-like semantics where NullPointerException is thrown
+      // after potential NoSuchMethodError from class linker.
+      ThrowNullPointerExceptionForMethodAccess(method_idx, type);
+      return nullptr;  // Failure.
+    }
   } else if (access_check) {
     mirror::Class* methods_class = resolved_method->GetDeclaringClass();
     bool can_access_resolved_method =
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index ce6467a..7727b2d 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -180,7 +180,7 @@
   t.NewTiming("ProcessCards");
   // Process dirty cards and add dirty cards to mod-union tables.
   heap_->ProcessCards(GetTimings(), false, false, true);
-  // Clear the whole card table since we can not Get any additional dirty cards during the
+  // Clear the whole card table since we cannot get any additional dirty cards during the
   // paused GC. This saves memory but only works for pause the world collectors.
   t.NewTiming("ClearCardTable");
   heap_->GetCardTable()->ClearCardTable();
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 99e98bb..2784693 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -227,7 +227,7 @@
   BindBitmaps();
   // Process dirty cards and add dirty cards to mod-union tables.
   heap_->ProcessCards(GetTimings(), kUseRememberedSet && generational_, false, true);
-  // Clear the whole card table since we can not Get any additional dirty cards during the
+  // Clear the whole card table since we cannot get any additional dirty cards during the
   // paused GC. This saves memory but only works for pause the world collectors.
   t.NewTiming("ClearCardTable");
   heap_->GetCardTable()->ClearCardTable();
diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h
index 416510d..c8e913c 100644
--- a/runtime/gc/collector_type.h
+++ b/runtime/gc/collector_type.h
@@ -34,7 +34,7 @@
   kCollectorTypeSS,
   // A generational variant of kCollectorTypeSS.
   kCollectorTypeGSS,
-  // Mark compact colector.
+  // Mark compact collector.
   kCollectorTypeMC,
   // Heap trimming collector, doesn't do any actual collecting.
   kCollectorTypeHeapTrim,
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index e7ea983..7b531ba 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -89,7 +89,6 @@
   class RegionSpace;
   class RosAllocSpace;
   class Space;
-  class SpaceTest;
   class ZygoteSpace;
 }  // namespace space
 
@@ -1335,7 +1334,6 @@
   friend class VerifyReferenceCardVisitor;
   friend class VerifyReferenceVisitor;
   friend class VerifyObjectVisitor;
-  friend class space::SpaceTest;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(Heap);
 };
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 39ba743..5e7f1a2 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -86,7 +86,7 @@
     // it to the mutator as long as the GC is not preserving references.
     if (LIKELY(collector_ != nullptr)) {
       // If it's null it means not marked, but it could become marked if the referent is reachable
-      // by finalizer referents. So we can not return in this case and must block. Otherwise, we
+      // by finalizer referents. So we cannot return in this case and must block. Otherwise, we
       // can return it to the mutator as long as the GC is not preserving references, in which
       // case only black nodes can be safely returned. If the GC is preserving references, the
       // mutator could take a white field from a grey or white node and move it somewhere else
diff --git a/runtime/gc/space/dlmalloc_space_base_test.cc b/runtime/gc/space/dlmalloc_space_base_test.cc
deleted file mode 100644
index 93fe155..0000000
--- a/runtime/gc/space/dlmalloc_space_base_test.cc
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "space_test.h"
-
-#include "dlmalloc_space.h"
-#include "scoped_thread_state_change.h"
-
-namespace art {
-namespace gc {
-namespace space {
-
-MallocSpace* CreateDlMallocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
-                                 size_t capacity, uint8_t* requested_begin) {
-  return DlMallocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin, false);
-}
-
-TEST_SPACE_CREATE_FN_BASE(DlMallocSpace, CreateDlMallocSpace)
-
-
-}  // namespace space
-}  // namespace gc
-}  // namespace art
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index 2798b21..e70fe21 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -521,7 +521,7 @@
   num_bytes_allocated_ += allocation_size;
   total_bytes_allocated_ += allocation_size;
   mirror::Object* obj = reinterpret_cast<mirror::Object*>(GetAddressForAllocationInfo(new_info));
-  // We always put our object at the start of the free block, there can not be another free block
+  // We always put our object at the start of the free block, there cannot be another free block
   // before it.
   if (kIsDebugBuild) {
     mprotect(obj, allocation_size, PROT_READ | PROT_WRITE);
diff --git a/runtime/gc/space/large_object_space_test.cc b/runtime/gc/space/large_object_space_test.cc
index 05b484a..ad38724 100644
--- a/runtime/gc/space/large_object_space_test.cc
+++ b/runtime/gc/space/large_object_space_test.cc
@@ -22,7 +22,7 @@
 namespace gc {
 namespace space {
 
-class LargeObjectSpaceTest : public SpaceTest {
+class LargeObjectSpaceTest : public SpaceTest<CommonRuntimeTest> {
  public:
   void LargeObjectTest();
 
diff --git a/runtime/gc/space/rosalloc_space_base_test.cc b/runtime/gc/space/rosalloc_space_base_test.cc
deleted file mode 100644
index 0c5be03..0000000
--- a/runtime/gc/space/rosalloc_space_base_test.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "space_test.h"
-
-namespace art {
-namespace gc {
-namespace space {
-
-MallocSpace* CreateRosAllocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
-                                 size_t capacity, uint8_t* requested_begin) {
-  return RosAllocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin,
-                               Runtime::Current()->GetHeap()->IsLowMemoryMode(), false);
-}
-
-TEST_SPACE_CREATE_FN_BASE(RosAllocSpace, CreateRosAllocSpace)
-
-
-}  // namespace space
-}  // namespace gc
-}  // namespace art
diff --git a/runtime/gc/space/space_create_test.cc b/runtime/gc/space/space_create_test.cc
new file mode 100644
index 0000000..aea2d9f
--- /dev/null
+++ b/runtime/gc/space/space_create_test.cc
@@ -0,0 +1,360 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "space_test.h"
+
+#include "dlmalloc_space.h"
+#include "rosalloc_space.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+enum MallocSpaceType {
+  kMallocSpaceDlMalloc,
+  kMallocSpaceRosAlloc,
+};
+
+class SpaceCreateTest : public SpaceTest<CommonRuntimeTestWithParam<MallocSpaceType>> {
+ public:
+  MallocSpace* CreateSpace(const std::string& name,
+                           size_t initial_size,
+                           size_t growth_limit,
+                           size_t capacity,
+                           uint8_t* requested_begin) {
+    const MallocSpaceType type = GetParam();
+    if (type == kMallocSpaceDlMalloc) {
+      return DlMallocSpace::Create(name,
+                                   initial_size,
+                                   growth_limit,
+                                   capacity,
+                                   requested_begin,
+                                   false);
+    }
+    DCHECK_EQ(static_cast<uint32_t>(type), static_cast<uint32_t>(kMallocSpaceRosAlloc));
+    return RosAllocSpace::Create(name,
+                                 initial_size,
+                                 growth_limit,
+                                 capacity,
+                                 requested_begin,
+                                 Runtime::Current()->GetHeap()->IsLowMemoryMode(),
+                                 false);
+  }
+};
+
+TEST_P(SpaceCreateTest, InitTestBody) {
+  // This will lead to error messages in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
+  {
+    // Init < max == growth
+    std::unique_ptr<Space> space(CreateSpace("test", 16 * MB, 32 * MB, 32 * MB, nullptr));
+    EXPECT_TRUE(space != nullptr);
+    // Init == max == growth
+    space.reset(CreateSpace("test", 16 * MB, 16 * MB, 16 * MB, nullptr));
+    EXPECT_TRUE(space != nullptr);
+    // Init > max == growth
+    space.reset(CreateSpace("test", 32 * MB, 16 * MB, 16 * MB, nullptr));
+    EXPECT_TRUE(space == nullptr);
+    // Growth == init < max
+    space.reset(CreateSpace("test", 16 * MB, 16 * MB, 32 * MB, nullptr));
+    EXPECT_TRUE(space != nullptr);
+    // Growth < init < max
+    space.reset(CreateSpace("test", 16 * MB, 8 * MB, 32 * MB, nullptr));
+    EXPECT_TRUE(space == nullptr);
+    // Init < growth < max
+    space.reset(CreateSpace("test", 8 * MB, 16 * MB, 32 * MB, nullptr));
+    EXPECT_TRUE(space != nullptr);
+    // Init < max < growth
+    space.reset(CreateSpace("test", 8 * MB, 32 * MB, 16 * MB, nullptr));
+    EXPECT_TRUE(space == nullptr);
+  }
+}
+
+// TODO: This test is not very good, we should improve it.
+// The test should do more allocations before the creation of the ZygoteSpace, and then do
+// allocations after the ZygoteSpace is created. The test should also do some GCs to ensure that
+// the GC works with the ZygoteSpace.
+TEST_P(SpaceCreateTest, ZygoteSpaceTestBody) {
+  size_t dummy;
+  MallocSpace* space(CreateSpace("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
+  ASSERT_TRUE(space != nullptr);
+
+  // Make space findable to the heap, will also delete space when runtime is cleaned up
+  AddSpace(space);
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  // Succeeds, fits without adjusting the footprint limit.
+  size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated;
+  StackHandleScope<3> hs(soa.Self());
+  MutableHandle<mirror::Object> ptr1(hs.NewHandle(Alloc(space,
+                                                        self,
+                                                        1 * MB,
+                                                        &ptr1_bytes_allocated,
+                                                        &ptr1_usable_size,
+                                                        &ptr1_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr1.Get() != nullptr);
+  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
+  EXPECT_LE(1U * MB, ptr1_usable_size);
+  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
+  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr2 == nullptr);
+
+  // Succeeds, adjusts the footprint.
+  size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated;
+  MutableHandle<mirror::Object> ptr3(hs.NewHandle(AllocWithGrowth(space,
+                                                                  self,
+                                                                  8 * MB,
+                                                                  &ptr3_bytes_allocated,
+                                                                  &ptr3_usable_size,
+                                                                  &ptr3_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr3.Get() != nullptr);
+  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
+  EXPECT_LE(8U * MB, ptr3_usable_size);
+  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
+  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr4 == nullptr);
+
+  // Also fails, requires a higher allowed footprint.
+  mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr5 == nullptr);
+
+  // Release some memory.
+  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
+  EXPECT_EQ(free3, ptr3_bytes_allocated);
+  EXPECT_EQ(free3, space->Free(self, ptr3.Assign(nullptr)));
+  EXPECT_LE(8U * MB, free3);
+
+  // Succeeds, now that memory has been freed.
+  size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated;
+  Handle<mirror::Object> ptr6(hs.NewHandle(AllocWithGrowth(space,
+                                                           self,
+                                                           9 * MB,
+                                                           &ptr6_bytes_allocated,
+                                                           &ptr6_usable_size,
+                                                           &ptr6_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr6.Get() != nullptr);
+  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
+  EXPECT_LE(9U * MB, ptr6_usable_size);
+  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
+  EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated);
+
+  // Final clean up.
+  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
+  space->Free(self, ptr1.Assign(nullptr));
+  EXPECT_LE(1U * MB, free1);
+
+  // Make sure that the zygote space isn't directly at the start of the space.
+  EXPECT_TRUE(space->Alloc(self, 1U * MB, &dummy, nullptr, &dummy) != nullptr);
+
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  space::Space* old_space = space;
+  heap->RemoveSpace(old_space);
+  heap->RevokeAllThreadLocalBuffers();
+  space::ZygoteSpace* zygote_space = space->CreateZygoteSpace("alloc space",
+                                                              heap->IsLowMemoryMode(),
+                                                              &space);
+  delete old_space;
+  // Add the zygote space.
+  AddSpace(zygote_space, false);
+
+  // Make space findable to the heap, will also delete space when runtime is cleaned up
+  AddSpace(space, false);
+
+  // Succeeds, fits without adjusting the footprint limit.
+  ptr1.Assign(Alloc(space,
+                    self,
+                    1 * MB,
+                    &ptr1_bytes_allocated,
+                    &ptr1_usable_size,
+                    &ptr1_bytes_tl_bulk_allocated));
+  EXPECT_TRUE(ptr1.Get() != nullptr);
+  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
+  EXPECT_LE(1U * MB, ptr1_usable_size);
+  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
+  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr2 == nullptr);
+
+  // Succeeds, adjusts the footprint.
+  ptr3.Assign(AllocWithGrowth(space,
+                              self,
+                              2 * MB,
+                              &ptr3_bytes_allocated,
+                              &ptr3_usable_size,
+                              &ptr3_bytes_tl_bulk_allocated));
+  EXPECT_TRUE(ptr3.Get() != nullptr);
+  EXPECT_LE(2U * MB, ptr3_bytes_allocated);
+  EXPECT_LE(2U * MB, ptr3_usable_size);
+  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
+  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
+  space->Free(self, ptr3.Assign(nullptr));
+
+  // Final clean up.
+  free1 = space->AllocationSize(ptr1.Get(), nullptr);
+  space->Free(self, ptr1.Assign(nullptr));
+  EXPECT_LE(1U * MB, free1);
+}
+
+TEST_P(SpaceCreateTest, AllocAndFreeTestBody) {
+  size_t dummy = 0;
+  MallocSpace* space(CreateSpace("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
+  ASSERT_TRUE(space != nullptr);
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  // Make space findable to the heap, will also delete space when runtime is cleaned up
+  AddSpace(space);
+
+  // Succeeds, fits without adjusting the footprint limit.
+  size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated;
+  StackHandleScope<3> hs(soa.Self());
+  MutableHandle<mirror::Object> ptr1(hs.NewHandle(Alloc(space,
+                                                        self,
+                                                        1 * MB,
+                                                        &ptr1_bytes_allocated,
+                                                        &ptr1_usable_size,
+                                                        &ptr1_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr1.Get() != nullptr);
+  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
+  EXPECT_LE(1U * MB, ptr1_usable_size);
+  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
+  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr2 == nullptr);
+
+  // Succeeds, adjusts the footprint.
+  size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated;
+  MutableHandle<mirror::Object> ptr3(hs.NewHandle(AllocWithGrowth(space,
+                                                                  self,
+                                                                  8 * MB,
+                                                                  &ptr3_bytes_allocated,
+                                                                  &ptr3_usable_size,
+                                                                  &ptr3_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr3.Get() != nullptr);
+  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
+  EXPECT_LE(8U * MB, ptr3_usable_size);
+  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
+  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  mirror::Object* ptr4 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr4 == nullptr);
+
+  // Also fails, requires a higher allowed footprint.
+  mirror::Object* ptr5 = AllocWithGrowth(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr5 == nullptr);
+
+  // Release some memory.
+  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
+  EXPECT_EQ(free3, ptr3_bytes_allocated);
+  space->Free(self, ptr3.Assign(nullptr));
+  EXPECT_LE(8U * MB, free3);
+
+  // Succeeds, now that memory has been freed.
+  size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated;
+  Handle<mirror::Object> ptr6(hs.NewHandle(AllocWithGrowth(space,
+                                                           self,
+                                                           9 * MB,
+                                                           &ptr6_bytes_allocated,
+                                                           &ptr6_usable_size,
+                                                           &ptr6_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr6.Get() != nullptr);
+  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
+  EXPECT_LE(9U * MB, ptr6_usable_size);
+  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
+  EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated);
+
+  // Final clean up.
+  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
+  space->Free(self, ptr1.Assign(nullptr));
+  EXPECT_LE(1U * MB, free1);
+}
+
+TEST_P(SpaceCreateTest, AllocAndFreeListTestBody) {
+  MallocSpace* space(CreateSpace("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
+  ASSERT_TRUE(space != nullptr);
+
+  // Make space findable to the heap, will also delete space when runtime is cleaned up
+  AddSpace(space);
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  // Succeeds, fits without adjusting the max allowed footprint.
+  mirror::Object* lots_of_objects[1024];
+  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
+    size_t allocation_size, usable_size, bytes_tl_bulk_allocated;
+    size_t size_of_zero_length_byte_array = SizeOfZeroLengthByteArray();
+    lots_of_objects[i] = Alloc(space,
+                               self,
+                               size_of_zero_length_byte_array,
+                               &allocation_size,
+                               &usable_size,
+                               &bytes_tl_bulk_allocated);
+    EXPECT_TRUE(lots_of_objects[i] != nullptr);
+    size_t computed_usable_size;
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
+    EXPECT_EQ(usable_size, computed_usable_size);
+    EXPECT_TRUE(bytes_tl_bulk_allocated == 0 ||
+                bytes_tl_bulk_allocated >= allocation_size);
+  }
+
+  // Release memory.
+  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
+
+  // Succeeds, fits by adjusting the max allowed footprint.
+  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
+    size_t allocation_size, usable_size, bytes_tl_bulk_allocated;
+    lots_of_objects[i] = AllocWithGrowth(space,
+                                         self,
+                                         1024,
+                                         &allocation_size,
+                                         &usable_size,
+                                         &bytes_tl_bulk_allocated);
+    EXPECT_TRUE(lots_of_objects[i] != nullptr);
+    size_t computed_usable_size;
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
+    EXPECT_EQ(usable_size, computed_usable_size);
+    EXPECT_TRUE(bytes_tl_bulk_allocated == 0 ||
+                bytes_tl_bulk_allocated >= allocation_size);
+  }
+
+  // Release memory.
+  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
+}
+
+INSTANTIATE_TEST_CASE_P(CreateRosAllocSpace,
+                        SpaceCreateTest,
+                        testing::Values(kMallocSpaceRosAlloc));
+INSTANTIATE_TEST_CASE_P(CreateDlMallocSpace,
+                        SpaceCreateTest,
+                        testing::Values(kMallocSpaceDlMalloc));
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h
index 4d2db11..e588eb3 100644
--- a/runtime/gc/space/space_test.h
+++ b/runtime/gc/space/space_test.h
@@ -33,12 +33,10 @@
 namespace gc {
 namespace space {
 
-class SpaceTest : public CommonRuntimeTest {
+template <class Super>
+class SpaceTest : public Super {
  public:
-  jobject byte_array_class_;
-
-  SpaceTest() : byte_array_class_(nullptr) {
-  }
+  jobject byte_array_class_ = nullptr;
 
   void AddSpace(ContinuousSpace* space, bool revoke = true) {
     Heap* heap = Runtime::Current()->GetHeap();
@@ -62,13 +60,19 @@
     return reinterpret_cast<mirror::Class*>(self->DecodeJObject(byte_array_class_));
   }
 
-  mirror::Object* Alloc(space::MallocSpace* alloc_space, Thread* self, size_t bytes,
-                        size_t* bytes_allocated, size_t* usable_size,
+  mirror::Object* Alloc(space::MallocSpace* alloc_space,
+                        Thread* self,
+                        size_t bytes,
+                        size_t* bytes_allocated,
+                        size_t* usable_size,
                         size_t* bytes_tl_bulk_allocated)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     StackHandleScope<1> hs(self);
     Handle<mirror::Class> byte_array_class(hs.NewHandle(GetByteArrayClass(self)));
-    mirror::Object* obj = alloc_space->Alloc(self, bytes, bytes_allocated, usable_size,
+    mirror::Object* obj = alloc_space->Alloc(self,
+                                             bytes,
+                                             bytes_allocated,
+                                             usable_size,
                                              bytes_tl_bulk_allocated);
     if (obj != nullptr) {
       InstallClass(obj, byte_array_class.Get(), bytes);
@@ -76,8 +80,11 @@
     return obj;
   }
 
-  mirror::Object* AllocWithGrowth(space::MallocSpace* alloc_space, Thread* self, size_t bytes,
-                                  size_t* bytes_allocated, size_t* usable_size,
+  mirror::Object* AllocWithGrowth(space::MallocSpace* alloc_space,
+                                  Thread* self,
+                                  size_t bytes,
+                                  size_t* bytes_allocated,
+                                  size_t* usable_size,
                                   size_t* bytes_tl_bulk_allocated)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     StackHandleScope<1> hs(self);
@@ -117,10 +124,6 @@
 
   typedef MallocSpace* (*CreateSpaceFn)(const std::string& name, size_t initial_size, size_t growth_limit,
                                         size_t capacity, uint8_t* requested_begin);
-  void InitTestBody(CreateSpaceFn create_space);
-  void ZygoteSpaceTestBody(CreateSpaceFn create_space);
-  void AllocAndFreeTestBody(CreateSpaceFn create_space);
-  void AllocAndFreeListTestBody(CreateSpaceFn create_space);
 
   void SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space, intptr_t object_size,
                                            int round, size_t growth_limit);
@@ -132,278 +135,11 @@
   return *seed;
 }
 
-void SpaceTest::InitTestBody(CreateSpaceFn create_space) {
-  // This will lead to error messages in the log.
-  ScopedLogSeverity sls(LogSeverity::FATAL);
-
-  {
-    // Init < max == growth
-    std::unique_ptr<Space> space(create_space("test", 16 * MB, 32 * MB, 32 * MB, nullptr));
-    EXPECT_TRUE(space.get() != nullptr);
-  }
-  {
-    // Init == max == growth
-    std::unique_ptr<Space> space(create_space("test", 16 * MB, 16 * MB, 16 * MB, nullptr));
-    EXPECT_TRUE(space.get() != nullptr);
-  }
-  {
-    // Init > max == growth
-    std::unique_ptr<Space> space(create_space("test", 32 * MB, 16 * MB, 16 * MB, nullptr));
-    EXPECT_TRUE(space.get() == nullptr);
-  }
-  {
-    // Growth == init < max
-    std::unique_ptr<Space> space(create_space("test", 16 * MB, 16 * MB, 32 * MB, nullptr));
-    EXPECT_TRUE(space.get() != nullptr);
-  }
-  {
-    // Growth < init < max
-    std::unique_ptr<Space> space(create_space("test", 16 * MB, 8 * MB, 32 * MB, nullptr));
-    EXPECT_TRUE(space.get() == nullptr);
-  }
-  {
-    // Init < growth < max
-    std::unique_ptr<Space> space(create_space("test", 8 * MB, 16 * MB, 32 * MB, nullptr));
-    EXPECT_TRUE(space.get() != nullptr);
-  }
-  {
-    // Init < max < growth
-    std::unique_ptr<Space> space(create_space("test", 8 * MB, 32 * MB, 16 * MB, nullptr));
-    EXPECT_TRUE(space.get() == nullptr);
-  }
-}
-
-// TODO: This test is not very good, we should improve it.
-// The test should do more allocations before the creation of the ZygoteSpace, and then do
-// allocations after the ZygoteSpace is created. The test should also do some GCs to ensure that
-// the GC works with the ZygoteSpace.
-void SpaceTest::ZygoteSpaceTestBody(CreateSpaceFn create_space) {
-  size_t dummy;
-  MallocSpace* space(create_space("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
-  ASSERT_TRUE(space != nullptr);
-
-  // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddSpace(space);
-  Thread* self = Thread::Current();
-  ScopedObjectAccess soa(self);
-
-  // Succeeds, fits without adjusting the footprint limit.
-  size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated;
-  StackHandleScope<3> hs(soa.Self());
-  MutableHandle<mirror::Object> ptr1(
-      hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size,
-                         &ptr1_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr1.Get() != nullptr);
-  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
-  EXPECT_LE(1U * MB, ptr1_usable_size);
-  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
-  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr2 == nullptr);
-
-  // Succeeds, adjusts the footprint.
-  size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated;
-  MutableHandle<mirror::Object> ptr3(
-      hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size,
-                                   &ptr3_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr3.Get() != nullptr);
-  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
-  EXPECT_LE(8U * MB, ptr3_usable_size);
-  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
-  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr4 == nullptr);
-
-  // Also fails, requires a higher allowed footprint.
-  mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr5 == nullptr);
-
-  // Release some memory.
-  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
-  EXPECT_EQ(free3, ptr3_bytes_allocated);
-  EXPECT_EQ(free3, space->Free(self, ptr3.Assign(nullptr)));
-  EXPECT_LE(8U * MB, free3);
-
-  // Succeeds, now that memory has been freed.
-  size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated;
-  Handle<mirror::Object> ptr6(
-      hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size,
-                                   &ptr6_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr6.Get() != nullptr);
-  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
-  EXPECT_LE(9U * MB, ptr6_usable_size);
-  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
-  EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated);
-
-  // Final clean up.
-  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
-  space->Free(self, ptr1.Assign(nullptr));
-  EXPECT_LE(1U * MB, free1);
-
-  // Make sure that the zygote space isn't directly at the start of the space.
-  EXPECT_TRUE(space->Alloc(self, 1U * MB, &dummy, nullptr, &dummy) != nullptr);
-
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  space::Space* old_space = space;
-  heap->RemoveSpace(old_space);
-  heap->RevokeAllThreadLocalBuffers();
-  space::ZygoteSpace* zygote_space = space->CreateZygoteSpace("alloc space",
-                                                              heap->IsLowMemoryMode(),
-                                                              &space);
-  delete old_space;
-  // Add the zygote space.
-  AddSpace(zygote_space, false);
-
-  // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddSpace(space, false);
-
-  // Succeeds, fits without adjusting the footprint limit.
-  ptr1.Assign(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size,
-                    &ptr1_bytes_tl_bulk_allocated));
-  EXPECT_TRUE(ptr1.Get() != nullptr);
-  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
-  EXPECT_LE(1U * MB, ptr1_usable_size);
-  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
-  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr2 == nullptr);
-
-  // Succeeds, adjusts the footprint.
-  ptr3.Assign(AllocWithGrowth(space, self, 2 * MB, &ptr3_bytes_allocated, &ptr3_usable_size,
-                              &ptr3_bytes_tl_bulk_allocated));
-  EXPECT_TRUE(ptr3.Get() != nullptr);
-  EXPECT_LE(2U * MB, ptr3_bytes_allocated);
-  EXPECT_LE(2U * MB, ptr3_usable_size);
-  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
-  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
-  space->Free(self, ptr3.Assign(nullptr));
-
-  // Final clean up.
-  free1 = space->AllocationSize(ptr1.Get(), nullptr);
-  space->Free(self, ptr1.Assign(nullptr));
-  EXPECT_LE(1U * MB, free1);
-}
-
-void SpaceTest::AllocAndFreeTestBody(CreateSpaceFn create_space) {
-  size_t dummy = 0;
-  MallocSpace* space(create_space("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
-  ASSERT_TRUE(space != nullptr);
-  Thread* self = Thread::Current();
-  ScopedObjectAccess soa(self);
-
-  // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddSpace(space);
-
-  // Succeeds, fits without adjusting the footprint limit.
-  size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated;
-  StackHandleScope<3> hs(soa.Self());
-  MutableHandle<mirror::Object> ptr1(
-      hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size,
-                         &ptr1_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr1.Get() != nullptr);
-  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
-  EXPECT_LE(1U * MB, ptr1_usable_size);
-  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
-  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr2 == nullptr);
-
-  // Succeeds, adjusts the footprint.
-  size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated;
-  MutableHandle<mirror::Object> ptr3(
-      hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size,
-                                   &ptr3_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr3.Get() != nullptr);
-  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
-  EXPECT_LE(8U * MB, ptr3_usable_size);
-  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
-  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  mirror::Object* ptr4 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr4 == nullptr);
-
-  // Also fails, requires a higher allowed footprint.
-  mirror::Object* ptr5 = AllocWithGrowth(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr5 == nullptr);
-
-  // Release some memory.
-  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
-  EXPECT_EQ(free3, ptr3_bytes_allocated);
-  space->Free(self, ptr3.Assign(nullptr));
-  EXPECT_LE(8U * MB, free3);
-
-  // Succeeds, now that memory has been freed.
-  size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated;
-  Handle<mirror::Object> ptr6(
-      hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size,
-                                   &ptr6_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr6.Get() != nullptr);
-  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
-  EXPECT_LE(9U * MB, ptr6_usable_size);
-  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
-  EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated);
-
-  // Final clean up.
-  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
-  space->Free(self, ptr1.Assign(nullptr));
-  EXPECT_LE(1U * MB, free1);
-}
-
-void SpaceTest::AllocAndFreeListTestBody(CreateSpaceFn create_space) {
-  MallocSpace* space(create_space("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
-  ASSERT_TRUE(space != nullptr);
-
-  // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddSpace(space);
-  Thread* self = Thread::Current();
-  ScopedObjectAccess soa(self);
-
-  // Succeeds, fits without adjusting the max allowed footprint.
-  mirror::Object* lots_of_objects[1024];
-  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    size_t allocation_size, usable_size, bytes_tl_bulk_allocated;
-    size_t size_of_zero_length_byte_array = SizeOfZeroLengthByteArray();
-    lots_of_objects[i] = Alloc(space, self, size_of_zero_length_byte_array, &allocation_size,
-                               &usable_size, &bytes_tl_bulk_allocated);
-    EXPECT_TRUE(lots_of_objects[i] != nullptr);
-    size_t computed_usable_size;
-    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
-    EXPECT_EQ(usable_size, computed_usable_size);
-    EXPECT_TRUE(bytes_tl_bulk_allocated == 0 ||
-                bytes_tl_bulk_allocated >= allocation_size);
-  }
-
-  // Release memory.
-  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
-
-  // Succeeds, fits by adjusting the max allowed footprint.
-  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    size_t allocation_size, usable_size, bytes_tl_bulk_allocated;
-    lots_of_objects[i] = AllocWithGrowth(space, self, 1024, &allocation_size, &usable_size,
-                                         &bytes_tl_bulk_allocated);
-    EXPECT_TRUE(lots_of_objects[i] != nullptr);
-    size_t computed_usable_size;
-    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
-    EXPECT_EQ(usable_size, computed_usable_size);
-    EXPECT_TRUE(bytes_tl_bulk_allocated == 0 ||
-                bytes_tl_bulk_allocated >= allocation_size);
-  }
-
-  // Release memory.
-  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
-}
-
-void SpaceTest::SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space, intptr_t object_size,
-                                                    int round, size_t growth_limit) {
+template <class Super>
+void SpaceTest<Super>::SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space,
+                                                           intptr_t object_size,
+                                                           int round,
+                                                           size_t growth_limit) {
   if (((object_size > 0 && object_size >= static_cast<intptr_t>(growth_limit))) ||
       ((object_size < 0 && -object_size >= static_cast<intptr_t>(growth_limit)))) {
     // No allocation can succeed
@@ -576,7 +312,9 @@
   EXPECT_LE(space->Size(), growth_limit);
 }
 
-void SpaceTest::SizeFootPrintGrowthLimitAndTrimDriver(size_t object_size, CreateSpaceFn create_space) {
+template <class Super>
+void SpaceTest<Super>::SizeFootPrintGrowthLimitAndTrimDriver(size_t object_size,
+                                                             CreateSpaceFn create_space) {
   if (object_size < SizeOfZeroLengthByteArray()) {
     // Too small for the object layout/model.
     return;
@@ -614,25 +352,8 @@
     SizeFootPrintGrowthLimitAndTrimDriver(-size, spaceFn); \
   }
 
-#define TEST_SPACE_CREATE_FN_BASE(spaceName, spaceFn) \
-  class spaceName##BaseTest : public SpaceTest { \
-  }; \
-  \
-  TEST_F(spaceName##BaseTest, Init) { \
-    InitTestBody(spaceFn); \
-  } \
-  TEST_F(spaceName##BaseTest, ZygoteSpace) { \
-    ZygoteSpaceTestBody(spaceFn); \
-  } \
-  TEST_F(spaceName##BaseTest, AllocAndFree) { \
-    AllocAndFreeTestBody(spaceFn); \
-  } \
-  TEST_F(spaceName##BaseTest, AllocAndFreeList) { \
-    AllocAndFreeListTestBody(spaceFn); \
-  }
-
 #define TEST_SPACE_CREATE_FN_STATIC(spaceName, spaceFn) \
-  class spaceName##StaticTest : public SpaceTest { \
+  class spaceName##StaticTest : public SpaceTest<CommonRuntimeTest> { \
   }; \
   \
   TEST_SizeFootPrintGrowthLimitAndTrimStatic(12B, spaceName, spaceFn, 12) \
@@ -648,7 +369,7 @@
   TEST_SizeFootPrintGrowthLimitAndTrimStatic(8MB, spaceName, spaceFn, 8 * MB)
 
 #define TEST_SPACE_CREATE_FN_RANDOM(spaceName, spaceFn) \
-  class spaceName##RandomTest : public SpaceTest { \
+  class spaceName##RandomTest : public SpaceTest<CommonRuntimeTest> { \
   }; \
   \
   TEST_SizeFootPrintGrowthLimitAndTrimRandom(16B, spaceName, spaceFn, 16) \
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index dfc1f5f..bb35ec7 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -419,18 +419,13 @@
   Hprof(const char* output_filename, int fd, bool direct_to_ddms)
       : filename_(output_filename),
         fd_(fd),
-        direct_to_ddms_(direct_to_ddms),
-        start_ns_(NanoTime()),
-        output_(nullptr),
-        current_heap_(HPROF_HEAP_DEFAULT),
-        objects_in_segment_(0),
-        next_string_id_(0x400000),
-        next_class_serial_number_(1) {
+        direct_to_ddms_(direct_to_ddms) {
     LOG(INFO) << "hprof: heap dump \"" << filename_ << "\" starting...";
   }
 
   void Dump()
-    REQUIRES(Locks::mutator_lock_, !Locks::heap_bitmap_lock_, !Locks::alloc_tracker_lock_) {
+    REQUIRES(Locks::mutator_lock_)
+    REQUIRES(!Locks::heap_bitmap_lock_, !Locks::alloc_tracker_lock_) {
     {
       MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
       if (Runtime::Current()->GetHeap()->IsAllocTrackingEnabled()) {
@@ -462,10 +457,11 @@
     }
 
     if (okay) {
-      uint64_t duration = NanoTime() - start_ns_;
-      LOG(INFO) << "hprof: heap dump completed ("
-          << PrettySize(RoundUp(overall_size, 1024))
-          << ") in " << PrettyDuration(duration);
+      const uint64_t duration = NanoTime() - start_ns_;
+      LOG(INFO) << "hprof: heap dump completed (" << PrettySize(RoundUp(overall_size, KB))
+                << ") in " << PrettyDuration(duration)
+                << " objects " << total_objects_
+                << " objects with stack traces " << total_objects_with_stack_trace_;
     }
   }
 
@@ -855,7 +851,7 @@
     }
     CHECK_EQ(traces_.size(), next_trace_sn - kHprofNullStackTrace - 1);
     CHECK_EQ(frames_.size(), next_frame_id);
-    VLOG(heap) << "hprof: found " << count << " objects with allocation stack traces";
+    total_objects_with_stack_trace_ = count;
   }
 
   // If direct_to_ddms_ is set, "filename_" and "fd" will be ignored.
@@ -865,16 +861,19 @@
   int fd_;
   bool direct_to_ddms_;
 
-  uint64_t start_ns_;
+  uint64_t start_ns_ = NanoTime();
 
-  EndianOutput* output_;
+  EndianOutput* output_ = nullptr;
 
-  HprofHeapId current_heap_;  // Which heap we're currently dumping.
-  size_t objects_in_segment_;
+  HprofHeapId current_heap_ = HPROF_HEAP_DEFAULT;  // Which heap we're currently dumping.
+  size_t objects_in_segment_ = 0;
 
-  HprofStringId next_string_id_;
+  size_t total_objects_ = 0u;
+  size_t total_objects_with_stack_trace_ = 0u;
+
+  HprofStringId next_string_id_ = 0x400000;
   SafeMap<std::string, HprofStringId> strings_;
-  HprofClassSerialNumber next_class_serial_number_;
+  HprofClassSerialNumber next_class_serial_number_ = 1;
   SafeMap<mirror::Class*, HprofClassSerialNumber> classes_;
 
   std::unordered_map<const gc::AllocRecordStackTrace*, HprofStackTraceSerialNumber,
@@ -1064,6 +1063,8 @@
     return;
   }
 
+  ++total_objects_;
+
   GcRootVisitor visitor(this);
   obj->VisitReferences(visitor, VoidFunctor());
 
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 7d60264..c57b1bb 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -78,7 +78,7 @@
       have_field_read_listeners_(false),
       have_field_write_listeners_(false),
       have_exception_caught_listeners_(false),
-      have_backward_branch_listeners_(false),
+      have_branch_listeners_(false),
       have_invoke_virtual_or_interface_listeners_(false),
       deoptimized_methods_lock_("deoptimized methods lock"),
       deoptimization_enabled_(false),
@@ -431,11 +431,11 @@
                            method_unwind_listeners_,
                            listener,
                            &have_method_unwind_listeners_);
-  PotentiallyAddListenerTo(kBackwardBranch,
+  PotentiallyAddListenerTo(kBranch,
                            events,
-                           backward_branch_listeners_,
+                           branch_listeners_,
                            listener,
-                           &have_backward_branch_listeners_);
+                           &have_branch_listeners_);
   PotentiallyAddListenerTo(kInvokeVirtualOrInterface,
                            events,
                            invoke_virtual_or_interface_listeners_,
@@ -508,11 +508,11 @@
                                 method_unwind_listeners_,
                                 listener,
                                 &have_method_unwind_listeners_);
-  PotentiallyRemoveListenerFrom(kBackwardBranch,
+  PotentiallyRemoveListenerFrom(kBranch,
                                 events,
-                                backward_branch_listeners_,
+                                branch_listeners_,
                                 listener,
-                                &have_backward_branch_listeners_);
+                                &have_branch_listeners_);
   PotentiallyRemoveListenerFrom(kInvokeVirtualOrInterface,
                                 events,
                                 invoke_virtual_or_interface_listeners_,
@@ -917,11 +917,13 @@
   }
 }
 
-void Instrumentation::BackwardBranchImpl(Thread* thread, ArtMethod* method,
-                                         int32_t offset) const {
-  for (InstrumentationListener* listener : backward_branch_listeners_) {
+void Instrumentation::BranchImpl(Thread* thread,
+                                 ArtMethod* method,
+                                 uint32_t dex_pc,
+                                 int32_t offset) const {
+  for (InstrumentationListener* listener : branch_listeners_) {
     if (listener != nullptr) {
-      listener->BackwardBranch(thread, method, offset);
+      listener->Branch(thread, method, dex_pc, offset);
     }
   }
 }
@@ -931,7 +933,7 @@
                                                    ArtMethod* caller,
                                                    uint32_t dex_pc,
                                                    ArtMethod* callee) const {
-  // We can not have thread suspension since that would cause the this_object parameter to
+  // We cannot have thread suspension since that would cause the this_object parameter to
   // potentially become a dangling pointer. An alternative could be to put it in a handle instead.
   ScopedAssertNoThreadSuspension ants(thread, __FUNCTION__);
   for (InstrumentationListener* listener : invoke_virtual_or_interface_listeners_) {
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index b29245f..56aeefc 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -94,8 +94,11 @@
   virtual void ExceptionCaught(Thread* thread, mirror::Throwable* exception_object)
       SHARED_REQUIRES(Locks::mutator_lock_) = 0;
 
-  // Call-back for when we get a backward branch.
-  virtual void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
+  // Call-back for when we execute a branch.
+  virtual void Branch(Thread* thread,
+                      ArtMethod* method,
+                      uint32_t dex_pc,
+                      int32_t dex_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) = 0;
 
   // Call-back for when we get an invokevirtual or an invokeinterface.
@@ -122,7 +125,7 @@
     kFieldRead = 0x10,
     kFieldWritten = 0x20,
     kExceptionCaught = 0x40,
-    kBackwardBranch = 0x80,
+    kBranch = 0x80,
     kInvokeVirtualOrInterface = 0x100,
   };
 
@@ -276,8 +279,8 @@
     return have_exception_caught_listeners_;
   }
 
-  bool HasBackwardBranchListeners() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    return have_backward_branch_listeners_;
+  bool HasBranchListeners() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return have_branch_listeners_;
   }
 
   bool HasInvokeVirtualOrInterfaceListeners() const SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -324,11 +327,11 @@
     }
   }
 
-  // Inform listeners that a backward branch has been taken (only supported by the interpreter).
-  void BackwardBranch(Thread* thread, ArtMethod* method, int32_t offset) const
+  // Inform listeners that a branch has been taken (only supported by the interpreter).
+  void Branch(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t offset) const
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (UNLIKELY(HasBackwardBranchListeners())) {
-      BackwardBranchImpl(thread, method, offset);
+    if (UNLIKELY(HasBranchListeners())) {
+      BranchImpl(thread, method, dex_pc, offset);
     }
   }
 
@@ -442,7 +445,7 @@
   void DexPcMovedEventImpl(Thread* thread, mirror::Object* this_object,
                            ArtMethod* method, uint32_t dex_pc) const
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void BackwardBranchImpl(Thread* thread, ArtMethod* method, int32_t offset) const
+  void BranchImpl(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t offset) const
       SHARED_REQUIRES(Locks::mutator_lock_);
   void InvokeVirtualOrInterfaceImpl(Thread* thread,
                                     mirror::Object* this_object,
@@ -513,8 +516,8 @@
   // Do we have any exception caught listeners? Short-cut to avoid taking the instrumentation_lock_.
   bool have_exception_caught_listeners_ GUARDED_BY(Locks::mutator_lock_);
 
-  // Do we have any backward branch listeners? Short-cut to avoid taking the instrumentation_lock_.
-  bool have_backward_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  // Do we have any branch listeners? Short-cut to avoid taking the instrumentation_lock_.
+  bool have_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
 
   // Do we have any invoke listeners? Short-cut to avoid taking the instrumentation_lock_.
   bool have_invoke_virtual_or_interface_listeners_ GUARDED_BY(Locks::mutator_lock_);
@@ -537,7 +540,7 @@
   std::list<InstrumentationListener*> method_entry_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> method_exit_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> method_unwind_listeners_ GUARDED_BY(Locks::mutator_lock_);
-  std::list<InstrumentationListener*> backward_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  std::list<InstrumentationListener*> branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> invoke_virtual_or_interface_listeners_
       GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> dex_pc_listeners_ GUARDED_BY(Locks::mutator_lock_);
diff --git a/runtime/instrumentation_test.cc b/runtime/instrumentation_test.cc
index e4688a2..56e3bc5 100644
--- a/runtime/instrumentation_test.cc
+++ b/runtime/instrumentation_test.cc
@@ -37,7 +37,7 @@
     : received_method_enter_event(false), received_method_exit_event(false),
       received_method_unwind_event(false), received_dex_pc_moved_event(false),
       received_field_read_event(false), received_field_written_event(false),
-      received_exception_caught_event(false), received_backward_branch_event(false),
+      received_exception_caught_event(false), received_branch_event(false),
       received_invoke_virtual_or_interface_event(false) {}
 
   virtual ~TestInstrumentationListener() {}
@@ -100,11 +100,12 @@
     received_exception_caught_event = true;
   }
 
-  void BackwardBranch(Thread* thread ATTRIBUTE_UNUSED,
-                      ArtMethod* method ATTRIBUTE_UNUSED,
-                      int32_t dex_pc_offset ATTRIBUTE_UNUSED)
+  void Branch(Thread* thread ATTRIBUTE_UNUSED,
+              ArtMethod* method ATTRIBUTE_UNUSED,
+              uint32_t dex_pc ATTRIBUTE_UNUSED,
+              int32_t dex_pc_offset ATTRIBUTE_UNUSED)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
-    received_backward_branch_event = true;
+    received_branch_event = true;
   }
 
   void InvokeVirtualOrInterface(Thread* thread ATTRIBUTE_UNUSED,
@@ -124,7 +125,7 @@
     received_field_read_event = false;
     received_field_written_event = false;
     received_exception_caught_event = false;
-    received_backward_branch_event = false;
+    received_branch_event = false;
     received_invoke_virtual_or_interface_event = false;
   }
 
@@ -135,7 +136,7 @@
   bool received_field_read_event;
   bool received_field_written_event;
   bool received_exception_caught_event;
-  bool received_backward_branch_event;
+  bool received_branch_event;
   bool received_invoke_virtual_or_interface_event;
 
  private:
@@ -305,8 +306,8 @@
         return instr->HasFieldWriteListeners();
       case instrumentation::Instrumentation::kExceptionCaught:
         return instr->HasExceptionCaughtListeners();
-      case instrumentation::Instrumentation::kBackwardBranch:
-        return instr->HasBackwardBranchListeners();
+      case instrumentation::Instrumentation::kBranch:
+        return instr->HasBranchListeners();
       case instrumentation::Instrumentation::kInvokeVirtualOrInterface:
         return instr->HasInvokeVirtualOrInterfaceListeners();
       default:
@@ -349,8 +350,8 @@
         self->ClearException();
         break;
       }
-      case instrumentation::Instrumentation::kBackwardBranch:
-        instr->BackwardBranch(self, method, dex_pc);
+      case instrumentation::Instrumentation::kBranch:
+        instr->Branch(self, method, dex_pc, -1);
         break;
       case instrumentation::Instrumentation::kInvokeVirtualOrInterface:
         instr->InvokeVirtualOrInterface(self, obj, method, dex_pc, method);
@@ -378,8 +379,8 @@
         return listener.received_field_written_event;
       case instrumentation::Instrumentation::kExceptionCaught:
         return listener.received_exception_caught_event;
-      case instrumentation::Instrumentation::kBackwardBranch:
-        return listener.received_backward_branch_event;
+      case instrumentation::Instrumentation::kBranch:
+        return listener.received_branch_event;
       case instrumentation::Instrumentation::kInvokeVirtualOrInterface:
         return listener.received_invoke_virtual_or_interface_event;
       default:
@@ -441,8 +442,8 @@
   TestEvent(instrumentation::Instrumentation::kExceptionCaught);
 }
 
-TEST_F(InstrumentationTest, BackwardBranchEvent) {
-  TestEvent(instrumentation::Instrumentation::kBackwardBranch);
+TEST_F(InstrumentationTest, BranchEvent) {
+  TestEvent(instrumentation::Instrumentation::kBranch);
 }
 
 TEST_F(InstrumentationTest, InvokeVirtualOrInterfaceEvent) {
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 8f715a3..2b2176e 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -61,7 +61,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
   // Only used by image writer. Special version that may not cause thread suspension since the GC
-  // can not be running while we are doing image writing. Maybe be called while while holding a
+  // cannot be running while we are doing image writing. Maybe be called while while holding a
   // lock since there will not be thread suspension.
   mirror::String* InternStrongImageString(mirror::String* s)
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 6b5218d..ec63fdf 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -239,7 +239,7 @@
 }
 
 #if !defined(__clang__)
-#if defined(__arm__)
+#if (defined(__arm__) || defined(__i386__))
 // TODO: remove when all targets implemented.
 static constexpr InterpreterImplKind kInterpreterImplKind = kMterpImplKind;
 #else
@@ -247,7 +247,7 @@
 #endif
 #else
 // Clang 3.4 fails to build the goto interpreter implementation.
-#if defined(__arm__)
+#if (defined(__arm__) || defined(__i386__))
 static constexpr InterpreterImplKind kInterpreterImplKind = kMterpImplKind;
 #else
 static constexpr InterpreterImplKind kInterpreterImplKind = kSwitchImplKind;
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 18fb0d8..ecd4de9 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -592,6 +592,10 @@
   //
   // (at this point the ArtMethod has already been replaced,
   // so we just need to fix-up the arguments)
+  //
+  // Note that FindMethodFromCode in entrypoint_utils-inl.h was also special-cased
+  // to handle the compiler optimization of replacing `this` with null without
+  // throwing NullPointerException.
   uint32_t string_init_vreg_this = is_range ? vregC : arg[0];
   if (UNLIKELY(string_init)) {
     DCHECK_GT(num_regs, 0u);  // As the method is an instance method, there should be at least 1.
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 9766299..ca00621 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -63,10 +63,10 @@
   currentHandlersTable = handlersTable[ \
       Runtime::Current()->GetInstrumentation()->GetInterpreterHandlerTable()]
 
-#define BACKWARD_BRANCH_INSTRUMENTATION(offset) \
+#define BRANCH_INSTRUMENTATION(offset) \
   do { \
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); \
-    instrumentation->BackwardBranch(self, shadow_frame.GetMethod(), offset); \
+    instrumentation->Branch(self, shadow_frame.GetMethod(), dex_pc, offset); \
   } while (false)
 
 #define UNREACHABLE_CODE_CHECK()                \
@@ -633,8 +633,8 @@
 
   HANDLE_INSTRUCTION_START(GOTO) {
     int8_t offset = inst->VRegA_10t(inst_data);
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -646,8 +646,8 @@
 
   HANDLE_INSTRUCTION_START(GOTO_16) {
     int16_t offset = inst->VRegA_20t();
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -659,8 +659,8 @@
 
   HANDLE_INSTRUCTION_START(GOTO_32) {
     int32_t offset = inst->VRegA_30t();
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -672,8 +672,8 @@
 
   HANDLE_INSTRUCTION_START(PACKED_SWITCH) {
     int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -685,8 +685,8 @@
 
   HANDLE_INSTRUCTION_START(SPARSE_SWITCH) {
     int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -788,8 +788,8 @@
   HANDLE_INSTRUCTION_START(IF_EQ) {
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) == shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -806,8 +806,8 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) !=
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -824,8 +824,8 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -842,8 +842,8 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >=
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -860,8 +860,8 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >
     shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -878,8 +878,8 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <=
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -895,8 +895,8 @@
   HANDLE_INSTRUCTION_START(IF_EQZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) == 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -912,8 +912,8 @@
   HANDLE_INSTRUCTION_START(IF_NEZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) != 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -929,8 +929,8 @@
   HANDLE_INSTRUCTION_START(IF_LTZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) < 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -946,8 +946,8 @@
   HANDLE_INSTRUCTION_START(IF_GEZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) >= 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -963,8 +963,8 @@
   HANDLE_INSTRUCTION_START(IF_GTZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) > 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -980,8 +980,8 @@
   HANDLE_INSTRUCTION_START(IF_LEZ)  {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) <= 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index bab0d40..c3b75b2 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -69,9 +69,9 @@
     }                                                                                           \
   } while (false)
 
-#define BACKWARD_BRANCH_INSTRUMENTATION(offset) \
+#define BRANCH_INSTRUMENTATION(offset) \
   do { \
-    instrumentation->BackwardBranch(self, shadow_frame.GetMethod(), offset); \
+    instrumentation->Branch(self, shadow_frame.GetMethod(), dex_pc, offset); \
   } while (false)
 
 static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
@@ -565,8 +565,8 @@
       case Instruction::GOTO: {
         PREAMBLE();
         int8_t offset = inst->VRegA_10t(inst_data);
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -575,8 +575,8 @@
       case Instruction::GOTO_16: {
         PREAMBLE();
         int16_t offset = inst->VRegA_20t();
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -585,8 +585,8 @@
       case Instruction::GOTO_32: {
         PREAMBLE();
         int32_t offset = inst->VRegA_30t();
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -595,8 +595,8 @@
       case Instruction::PACKED_SWITCH: {
         PREAMBLE();
         int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -605,8 +605,8 @@
       case Instruction::SPARSE_SWITCH: {
         PREAMBLE();
         int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -709,8 +709,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) ==
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -724,8 +724,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) !=
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -739,8 +739,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -754,8 +754,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >=
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -769,8 +769,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -784,8 +784,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <=
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -798,8 +798,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) == 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -812,8 +812,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) != 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -826,8 +826,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) < 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -840,8 +840,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) >= 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -854,8 +854,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) > 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -868,8 +868,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) <= 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
diff --git a/runtime/interpreter/mterp/config_x86 b/runtime/interpreter/mterp/config_x86
index 277817d..5fab379 100644
--- a/runtime/interpreter/mterp/config_x86
+++ b/runtime/interpreter/mterp/config_x86
@@ -36,262 +36,262 @@
     # (override example:) op OP_SUB_FLOAT_2ADDR arm-vfp
     # (fallback example:) op OP_SUB_FLOAT_2ADDR FALLBACK
 
-    op op_nop FALLBACK
-    op op_move FALLBACK
-    op op_move_from16 FALLBACK
-    op op_move_16 FALLBACK
-    op op_move_wide FALLBACK
-    op op_move_wide_from16 FALLBACK
-    op op_move_wide_16 FALLBACK
-    op op_move_object FALLBACK
-    op op_move_object_from16 FALLBACK
-    op op_move_object_16 FALLBACK
-    op op_move_result FALLBACK
-    op op_move_result_wide FALLBACK
-    op op_move_result_object FALLBACK
-    op op_move_exception FALLBACK
-    op op_return_void FALLBACK
-    op op_return FALLBACK
-    op op_return_wide FALLBACK
-    op op_return_object FALLBACK
-    op op_const_4 FALLBACK
-    op op_const_16 FALLBACK
-    op op_const FALLBACK
-    op op_const_high16 FALLBACK
-    op op_const_wide_16 FALLBACK
-    op op_const_wide_32 FALLBACK
-    op op_const_wide FALLBACK
-    op op_const_wide_high16 FALLBACK
-    op op_const_string FALLBACK
-    op op_const_string_jumbo FALLBACK
-    op op_const_class FALLBACK
-    op op_monitor_enter FALLBACK
-    op op_monitor_exit FALLBACK
-    op op_check_cast FALLBACK
-    op op_instance_of FALLBACK
-    op op_array_length FALLBACK
-    op op_new_instance FALLBACK
-    op op_new_array FALLBACK
-    op op_filled_new_array FALLBACK
-    op op_filled_new_array_range FALLBACK
-    op op_fill_array_data FALLBACK
-    op op_throw FALLBACK
-    op op_goto FALLBACK
-    op op_goto_16 FALLBACK
-    op op_goto_32 FALLBACK
-    op op_packed_switch FALLBACK
-    op op_sparse_switch FALLBACK
-    op op_cmpl_float FALLBACK
-    op op_cmpg_float FALLBACK
-    op op_cmpl_double FALLBACK
-    op op_cmpg_double FALLBACK
-    op op_cmp_long FALLBACK
-    op op_if_eq FALLBACK
-    op op_if_ne FALLBACK
-    op op_if_lt FALLBACK
-    op op_if_ge FALLBACK
-    op op_if_gt FALLBACK
-    op op_if_le FALLBACK
-    op op_if_eqz FALLBACK
-    op op_if_nez FALLBACK
-    op op_if_ltz FALLBACK
-    op op_if_gez FALLBACK
-    op op_if_gtz FALLBACK
-    op op_if_lez FALLBACK
-    op_unused_3e FALLBACK
-    op_unused_3f FALLBACK
-    op_unused_40 FALLBACK
-    op_unused_41 FALLBACK
-    op_unused_42 FALLBACK
-    op_unused_43 FALLBACK
-    op op_aget FALLBACK
-    op op_aget_wide FALLBACK
-    op op_aget_object FALLBACK
-    op op_aget_boolean FALLBACK
-    op op_aget_byte FALLBACK
-    op op_aget_char FALLBACK
-    op op_aget_short FALLBACK
-    op op_aput FALLBACK
-    op op_aput_wide FALLBACK
-    op op_aput_object FALLBACK
-    op op_aput_boolean FALLBACK
-    op op_aput_byte FALLBACK
-    op op_aput_char FALLBACK
-    op op_aput_short FALLBACK
-    op op_iget FALLBACK
-    op op_iget_wide FALLBACK
-    op op_iget_object FALLBACK
-    op op_iget_boolean FALLBACK
-    op op_iget_byte FALLBACK
-    op op_iget_char FALLBACK
-    op op_iget_short FALLBACK
-    op op_iput FALLBACK
-    op op_iput_wide FALLBACK
-    op op_iput_object FALLBACK
-    op op_iput_boolean FALLBACK
-    op op_iput_byte FALLBACK
-    op op_iput_char FALLBACK
-    op op_iput_short FALLBACK
-    op op_sget FALLBACK
-    op op_sget_wide FALLBACK
-    op op_sget_object FALLBACK
-    op op_sget_boolean FALLBACK
-    op op_sget_byte FALLBACK
-    op op_sget_char FALLBACK
-    op op_sget_short FALLBACK
-    op op_sput FALLBACK
-    op op_sput_wide FALLBACK
-    op op_sput_object FALLBACK
-    op op_sput_boolean FALLBACK
-    op op_sput_byte FALLBACK
-    op op_sput_char FALLBACK
-    op op_sput_short FALLBACK
-    op op_invoke_virtual FALLBACK
-    op op_invoke_super FALLBACK
-    op op_invoke_direct FALLBACK
-    op op_invoke_static FALLBACK
-    op op_invoke_interface FALLBACK
-    op op_return_void_no_barrier FALLBACK
-    op op_invoke_virtual_range FALLBACK
-    op op_invoke_super_range FALLBACK
-    op op_invoke_direct_range FALLBACK
-    op op_invoke_static_range FALLBACK
-    op op_invoke_interface_range FALLBACK
-    op_unused_79 FALLBACK
-    op_unused_7a FALLBACK
-    op op_neg_int FALLBACK
-    op op_not_int FALLBACK
-    op op_neg_long FALLBACK
-    op op_not_long FALLBACK
-    op op_neg_float FALLBACK
-    op op_neg_double FALLBACK
-    op op_int_to_long FALLBACK
-    op op_int_to_float FALLBACK
-    op op_int_to_double FALLBACK
-    op op_long_to_int FALLBACK
-    op op_long_to_float FALLBACK
-    op op_long_to_double FALLBACK
-    op op_float_to_int FALLBACK
-    op op_float_to_long FALLBACK
-    op op_float_to_double FALLBACK
-    op op_double_to_int FALLBACK
-    op op_double_to_long FALLBACK
-    op op_double_to_float FALLBACK
-    op op_int_to_byte FALLBACK
-    op op_int_to_char FALLBACK
-    op op_int_to_short FALLBACK
-    op op_add_int FALLBACK
-    op op_sub_int FALLBACK
-    op op_mul_int FALLBACK
-    op op_div_int FALLBACK
-    op op_rem_int FALLBACK
-    op op_and_int FALLBACK
-    op op_or_int FALLBACK
-    op op_xor_int FALLBACK
-    op op_shl_int FALLBACK
-    op op_shr_int FALLBACK
-    op op_ushr_int FALLBACK
-    op op_add_long FALLBACK
-    op op_sub_long FALLBACK
-    op op_mul_long FALLBACK
-    op op_div_long FALLBACK
-    op op_rem_long FALLBACK
-    op op_and_long FALLBACK
-    op op_or_long FALLBACK
-    op op_xor_long FALLBACK
-    op op_shl_long FALLBACK
-    op op_shr_long FALLBACK
-    op op_ushr_long FALLBACK
-    op op_add_float FALLBACK
-    op op_sub_float FALLBACK
-    op op_mul_float FALLBACK
-    op op_div_float FALLBACK
-    op op_rem_float FALLBACK
-    op op_add_double FALLBACK
-    op op_sub_double FALLBACK
-    op op_mul_double FALLBACK
-    op op_div_double FALLBACK
-    op op_rem_double FALLBACK
-    op op_add_int_2addr FALLBACK
-    op op_sub_int_2addr FALLBACK
-    op op_mul_int_2addr FALLBACK
-    op op_div_int_2addr FALLBACK
-    op op_rem_int_2addr FALLBACK
-    op op_and_int_2addr FALLBACK
-    op op_or_int_2addr FALLBACK
-    op op_xor_int_2addr FALLBACK
-    op op_shl_int_2addr FALLBACK
-    op op_shr_int_2addr FALLBACK
-    op op_ushr_int_2addr FALLBACK
-    op op_add_long_2addr FALLBACK
-    op op_sub_long_2addr FALLBACK
-    op op_mul_long_2addr FALLBACK
-    op op_div_long_2addr FALLBACK
-    op op_rem_long_2addr FALLBACK
-    op op_and_long_2addr FALLBACK
-    op op_or_long_2addr FALLBACK
-    op op_xor_long_2addr FALLBACK
-    op op_shl_long_2addr FALLBACK
-    op op_shr_long_2addr FALLBACK
-    op op_ushr_long_2addr FALLBACK
-    op op_add_float_2addr FALLBACK
-    op op_sub_float_2addr FALLBACK
-    op op_mul_float_2addr FALLBACK
-    op op_div_float_2addr FALLBACK
-    op op_rem_float_2addr FALLBACK
-    op op_add_double_2addr FALLBACK
-    op op_sub_double_2addr FALLBACK
-    op op_mul_double_2addr FALLBACK
-    op op_div_double_2addr FALLBACK
-    op op_rem_double_2addr FALLBACK
-    op op_add_int_lit16 FALLBACK
-    op op_rsub_int FALLBACK
-    op op_mul_int_lit16 FALLBACK
-    op op_div_int_lit16 FALLBACK
-    op op_rem_int_lit16 FALLBACK
-    op op_and_int_lit16 FALLBACK
-    op op_or_int_lit16 FALLBACK
-    op op_xor_int_lit16 FALLBACK
-    op op_add_int_lit8 FALLBACK
-    op op_rsub_int_lit8 FALLBACK
-    op op_mul_int_lit8 FALLBACK
-    op op_div_int_lit8 FALLBACK
-    op op_rem_int_lit8 FALLBACK
-    op op_and_int_lit8 FALLBACK
-    op op_or_int_lit8 FALLBACK
-    op op_xor_int_lit8 FALLBACK
-    op op_shl_int_lit8 FALLBACK
-    op op_shr_int_lit8 FALLBACK
-    op op_ushr_int_lit8 FALLBACK
-    op op_iget_quick FALLBACK
-    op op_iget_wide_quick FALLBACK
-    op op_iget_object_quick FALLBACK
-    op op_iput_quick FALLBACK
-    op op_iput_wide_quick FALLBACK
-    op op_iput_object_quick FALLBACK
-    op op_invoke_virtual_quick FALLBACK
-    op op_invoke_virtual_range_quick FALLBACK
-    op op_iput_boolean_quick FALLBACK
-    op op_iput_byte_quick FALLBACK
-    op op_iput_char_quick FALLBACK
-    op op_iput_short_quick FALLBACK
-    op op_iget_boolean_quick FALLBACK
-    op op_iget_byte_quick FALLBACK
-    op op_iget_char_quick FALLBACK
-    op op_iget_short_quick FALLBACK
-    op_unused_f3 FALLBACK
-    op_unused_f4 FALLBACK
-    op_unused_f5 FALLBACK
-    op_unused_f6 FALLBACK
-    op_unused_f7 FALLBACK
-    op_unused_f8 FALLBACK
-    op_unused_f9 FALLBACK
-    op_unused_fa FALLBACK
-    op_unused_fb FALLBACK
-    op_unused_fc FALLBACK
-    op_unused_fd FALLBACK
-    op_unused_fe FALLBACK
-    op_unused_ff FALLBACK
+    # op op_nop FALLBACK
+    # op op_move FALLBACK
+    # op op_move_from16 FALLBACK
+    # op op_move_16 FALLBACK
+    # op op_move_wide FALLBACK
+    # op op_move_wide_from16 FALLBACK
+    # op op_move_wide_16 FALLBACK
+    # op op_move_object FALLBACK
+    # op op_move_object_from16 FALLBACK
+    # op op_move_object_16 FALLBACK
+    # op op_move_result FALLBACK
+    # op op_move_result_wide FALLBACK
+    # op op_move_result_object FALLBACK
+    # op op_move_exception FALLBACK
+    # op op_return_void FALLBACK
+    # op op_return FALLBACK
+    # op op_return_wide FALLBACK
+    # op op_return_object FALLBACK
+    # op op_const_4 FALLBACK
+    # op op_const_16 FALLBACK
+    # op op_const FALLBACK
+    # op op_const_high16 FALLBACK
+    # op op_const_wide_16 FALLBACK
+    # op op_const_wide_32 FALLBACK
+    # op op_const_wide FALLBACK
+    # op op_const_wide_high16 FALLBACK
+    # op op_const_string FALLBACK
+    # op op_const_string_jumbo FALLBACK
+    # op op_const_class FALLBACK
+    # op op_monitor_enter FALLBACK
+    # op op_monitor_exit FALLBACK
+    # op op_check_cast FALLBACK
+    # op op_instance_of FALLBACK
+    # op op_array_length FALLBACK
+    # op op_new_instance FALLBACK
+    # op op_new_array FALLBACK
+    # op op_filled_new_array FALLBACK
+    # op op_filled_new_array_range FALLBACK
+    # op op_fill_array_data FALLBACK
+    # op op_throw FALLBACK
+    # op op_goto FALLBACK
+    # op op_goto_16 FALLBACK
+    # op op_goto_32 FALLBACK
+    # op op_packed_switch FALLBACK
+    # op op_sparse_switch FALLBACK
+    # op op_cmpl_float FALLBACK
+    # op op_cmpg_float FALLBACK
+    # op op_cmpl_double FALLBACK
+    # op op_cmpg_double FALLBACK
+    # op op_cmp_long FALLBACK
+    # op op_if_eq FALLBACK
+    # op op_if_ne FALLBACK
+    # op op_if_lt FALLBACK
+    # op op_if_ge FALLBACK
+    # op op_if_gt FALLBACK
+    # op op_if_le FALLBACK
+    # op op_if_eqz FALLBACK
+    # op op_if_nez FALLBACK
+    # op op_if_ltz FALLBACK
+    # op op_if_gez FALLBACK
+    # op op_if_gtz FALLBACK
+    # op op_if_lez FALLBACK
+    # op op_unused_3e FALLBACK
+    # op op_unused_3f FALLBACK
+    # op op_unused_40 FALLBACK
+    # op op_unused_41 FALLBACK
+    # op op_unused_42 FALLBACK
+    # op op_unused_43 FALLBACK
+    # op op_aget FALLBACK
+    # op op_aget_wide FALLBACK
+    # op op_aget_object FALLBACK
+    # op op_aget_boolean FALLBACK
+    # op op_aget_byte FALLBACK
+    # op op_aget_char FALLBACK
+    # op op_aget_short FALLBACK
+    # op op_aput FALLBACK
+    # op op_aput_wide FALLBACK
+    # op op_aput_object FALLBACK
+    # op op_aput_boolean FALLBACK
+    # op op_aput_byte FALLBACK
+    # op op_aput_char FALLBACK
+    # op op_aput_short FALLBACK
+    # op op_iget FALLBACK
+    # op op_iget_wide FALLBACK
+    # op op_iget_object FALLBACK
+    # op op_iget_boolean FALLBACK
+    # op op_iget_byte FALLBACK
+    # op op_iget_char FALLBACK
+    # op op_iget_short FALLBACK
+    # op op_iput FALLBACK
+    # op op_iput_wide FALLBACK
+    # op op_iput_object FALLBACK
+    # op op_iput_boolean FALLBACK
+    # op op_iput_byte FALLBACK
+    # op op_iput_char FALLBACK
+    # op op_iput_short FALLBACK
+    # op op_sget FALLBACK
+    # op op_sget_wide FALLBACK
+    # op op_sget_object FALLBACK
+    # op op_sget_boolean FALLBACK
+    # op op_sget_byte FALLBACK
+    # op op_sget_char FALLBACK
+    # op op_sget_short FALLBACK
+    # op op_sput FALLBACK
+    # op op_sput_wide FALLBACK
+    # op op_sput_object FALLBACK
+    # op op_sput_boolean FALLBACK
+    # op op_sput_byte FALLBACK
+    # op op_sput_char FALLBACK
+    # op op_sput_short FALLBACK
+    # op op_invoke_virtual FALLBACK
+    # op op_invoke_super FALLBACK
+    # op op_invoke_direct FALLBACK
+    # op op_invoke_static FALLBACK
+    # op op_invoke_interface FALLBACK
+    # op op_return_void_no_barrier FALLBACK
+    # op op_invoke_virtual_range FALLBACK
+    # op op_invoke_super_range FALLBACK
+    # op op_invoke_direct_range FALLBACK
+    # op op_invoke_static_range FALLBACK
+    # op op_invoke_interface_range FALLBACK
+    # op op_unused_79 FALLBACK
+    # op op_unused_7a FALLBACK
+    # op op_neg_int FALLBACK
+    # op op_not_int FALLBACK
+    # op op_neg_long FALLBACK
+    # op op_not_long FALLBACK
+    # op op_neg_float FALLBACK
+    # op op_neg_double FALLBACK
+    # op op_int_to_long FALLBACK
+    # op op_int_to_float FALLBACK
+    # op op_int_to_double FALLBACK
+    # op op_long_to_int FALLBACK
+    # op op_long_to_float FALLBACK
+    # op op_long_to_double FALLBACK
+    # op op_float_to_int FALLBACK
+    # op op_float_to_long FALLBACK
+    # op op_float_to_double FALLBACK
+    # op op_double_to_int FALLBACK
+    # op op_double_to_long FALLBACK
+    # op op_double_to_float FALLBACK
+    # op op_int_to_byte FALLBACK
+    # op op_int_to_char FALLBACK
+    # op op_int_to_short FALLBACK
+    # op op_add_int FALLBACK
+    # op op_sub_int FALLBACK
+    # op op_mul_int FALLBACK
+    # op op_div_int FALLBACK
+    # op op_rem_int FALLBACK
+    # op op_and_int FALLBACK
+    # op op_or_int FALLBACK
+    # op op_xor_int FALLBACK
+    # op op_shl_int FALLBACK
+    # op op_shr_int FALLBACK
+    # op op_ushr_int FALLBACK
+    # op op_add_long FALLBACK
+    # op op_sub_long FALLBACK
+    # op op_mul_long FALLBACK
+    # op op_div_long FALLBACK
+    # op op_rem_long FALLBACK
+    # op op_and_long FALLBACK
+    # op op_or_long FALLBACK
+    # op op_xor_long FALLBACK
+    # op op_shl_long FALLBACK
+    # op op_shr_long FALLBACK
+    # op op_ushr_long FALLBACK
+    # op op_add_float FALLBACK
+    # op op_sub_float FALLBACK
+    # op op_mul_float FALLBACK
+    # op op_div_float FALLBACK
+    # op op_rem_float FALLBACK
+    # op op_add_double FALLBACK
+    # op op_sub_double FALLBACK
+    # op op_mul_double FALLBACK
+    # op op_div_double FALLBACK
+    # op op_rem_double FALLBACK
+    # op op_add_int_2addr FALLBACK
+    # op op_sub_int_2addr FALLBACK
+    # op op_mul_int_2addr FALLBACK
+    # op op_div_int_2addr FALLBACK
+    # op op_rem_int_2addr FALLBACK
+    # op op_and_int_2addr FALLBACK
+    # op op_or_int_2addr FALLBACK
+    # op op_xor_int_2addr FALLBACK
+    # op op_shl_int_2addr FALLBACK
+    # op op_shr_int_2addr FALLBACK
+    # op op_ushr_int_2addr FALLBACK
+    # op op_add_long_2addr FALLBACK
+    # op op_sub_long_2addr FALLBACK
+    # op op_mul_long_2addr FALLBACK
+    # op op_div_long_2addr FALLBACK
+    # op op_rem_long_2addr FALLBACK
+    # op op_and_long_2addr FALLBACK
+    # op op_or_long_2addr FALLBACK
+    # op op_xor_long_2addr FALLBACK
+    # op op_shl_long_2addr FALLBACK
+    # op op_shr_long_2addr FALLBACK
+    # op op_ushr_long_2addr FALLBACK
+    # op op_add_float_2addr FALLBACK
+    # op op_sub_float_2addr FALLBACK
+    # op op_mul_float_2addr FALLBACK
+    # op op_div_float_2addr FALLBACK
+    # op op_rem_float_2addr FALLBACK
+    # op op_add_double_2addr FALLBACK
+    # op op_sub_double_2addr FALLBACK
+    # op op_mul_double_2addr FALLBACK
+    # op op_div_double_2addr FALLBACK
+    # op op_rem_double_2addr FALLBACK
+    # op op_add_int_lit16 FALLBACK
+    # op op_rsub_int FALLBACK
+    # op op_mul_int_lit16 FALLBACK
+    # op op_div_int_lit16 FALLBACK
+    # op op_rem_int_lit16 FALLBACK
+    # op op_and_int_lit16 FALLBACK
+    # op op_or_int_lit16 FALLBACK
+    # op op_xor_int_lit16 FALLBACK
+    # op op_add_int_lit8 FALLBACK
+    # op op_rsub_int_lit8 FALLBACK
+    # op op_mul_int_lit8 FALLBACK
+    # op op_div_int_lit8 FALLBACK
+    # op op_rem_int_lit8 FALLBACK
+    # op op_and_int_lit8 FALLBACK
+    # op op_or_int_lit8 FALLBACK
+    # op op_xor_int_lit8 FALLBACK
+    # op op_shl_int_lit8 FALLBACK
+    # op op_shr_int_lit8 FALLBACK
+    # op op_ushr_int_lit8 FALLBACK
+    # op op_iget_quick FALLBACK
+    # op op_iget_wide_quick FALLBACK
+    # op op_iget_object_quick FALLBACK
+    # op op_iput_quick FALLBACK
+    # op op_iput_wide_quick FALLBACK
+    # op op_iput_object_quick FALLBACK
+    # op op_invoke_virtual_quick FALLBACK
+    # op op_invoke_virtual_range_quick FALLBACK
+    # op op_iput_boolean_quick FALLBACK
+    # op op_iput_byte_quick FALLBACK
+    # op op_iput_char_quick FALLBACK
+    # op op_iput_short_quick FALLBACK
+    # op op_iget_boolean_quick FALLBACK
+    # op op_iget_byte_quick FALLBACK
+    # op op_iget_char_quick FALLBACK
+    # op op_iget_short_quick FALLBACK
+    op op_invoke_lambda FALLBACK
+    # op op_unused_f4 FALLBACK
+    op op_capture_variable FALLBACK
+    op op_create_lambda FALLBACK
+    op op_liberate_variable FALLBACK
+    op op_box_lambda FALLBACK
+    op op_unbox_lambda FALLBACK
+    # op op_unused_fa FALLBACK
+    # op op_unused_fb FALLBACK
+    # op op_unused_fc FALLBACK
+    # op op_unused_fd FALLBACK
+    # op op_unused_fe FALLBACK
+    # op op_unused_ff FALLBACK
 op-end
 
 # common subroutines for asm
diff --git a/runtime/interpreter/mterp/out/mterp_x86.S b/runtime/interpreter/mterp/out/mterp_x86.S
new file mode 100644
index 0000000..923d502
--- /dev/null
+++ b/runtime/interpreter/mterp/out/mterp_x86.S
@@ -0,0 +1,12945 @@
+/*
+ * This file was generated automatically by gen-mterp.py for 'x86'.
+ *
+ * --> DO NOT EDIT <--
+ */
+
+/* File: x86/header.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat rFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via rFP &
+  number_of_vregs_.
+
+ */
+
+/*
+x86 ABI general notes:
+
+Caller save set:
+   eax, edx, ecx, st(0)-st(7)
+Callee save set:
+   ebx, esi, edi, ebp
+Return regs:
+   32-bit in eax
+   64-bit in edx:eax (low-order 32 in eax)
+   fp on top of fp stack st(0)
+
+Parameters passed on stack, pushed right-to-left.  On entry to target, first
+parm is at 4(%esp).  Traditional entry code is:
+
+functEntry:
+    push    %ebp             # save old frame pointer
+    mov     %ebp,%esp        # establish new frame pointer
+    sub     FrameSize,%esp   # Allocate storage for spill, locals & outs
+
+Once past the prologue, arguments are referenced at ((argno + 2)*4)(%ebp)
+
+Stack must be 16-byte aligned to support SSE in native code.
+
+If we're not doing variable stack allocation (alloca), the frame pointer can be
+eliminated and all arg references adjusted to be esp relative.
+*/
+
+/*
+Mterp and x86 notes:
+
+Some key interpreter variables will be assigned to registers.
+
+  nick     reg   purpose
+  rPC      esi   interpreted program counter, used for fetching instructions
+  rFP      edi   interpreted frame pointer, used for accessing locals and args
+  rINSTw   bx    first 16-bit code of current instruction
+  rINSTbl  bl    opcode portion of instruction word
+  rINSTbh  bh    high byte of inst word, usually contains src/tgt reg names
+  rIBASE   edx   base of instruction handler table
+  rREFS    ebp   base of object references in shadow frame.
+
+Notes:
+   o High order 16 bits of ebx must be zero on entry to handler
+   o rPC, rFP, rINSTw/rINSTbl valid on handler entry and exit
+   o eax and ecx are scratch, rINSTw/ebx sometimes scratch
+
+Macros are provided for common operations.  Each macro MUST emit only
+one instruction to make instruction-counting easier.  They MUST NOT alter
+unspecified registers or condition codes.
+*/
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+/* Frame size must be 16-byte aligned.
+ * Remember about 4 bytes for return address
+ */
+#define FRAME_SIZE     44
+
+/* Frame diagram while executing ExecuteMterpImpl, high to low addresses */
+#define IN_ARG3        (FRAME_SIZE + 16)
+#define IN_ARG2        (FRAME_SIZE + 12)
+#define IN_ARG1        (FRAME_SIZE +  8)
+#define IN_ARG0        (FRAME_SIZE +  4)
+#define CALLER_RP      (FRAME_SIZE +  0)
+/* Spill offsets relative to %esp */
+#define EBP_SPILL      (FRAME_SIZE -  4)
+#define EDI_SPILL      (FRAME_SIZE -  8)
+#define ESI_SPILL      (FRAME_SIZE - 12)
+#define EBX_SPILL      (FRAME_SIZE - 16)
+#define LOCAL0         (FRAME_SIZE - 20)
+#define LOCAL1         (FRAME_SIZE - 24)
+#define LOCAL2         (FRAME_SIZE - 28)
+/* Out Arg offsets, relative to %esp */
+#define OUT_ARG3       ( 12)
+#define OUT_ARG2       (  8)
+#define OUT_ARG1       (  4)
+#define OUT_ARG0       (  0)  /* <- ExecuteMterpImpl esp + 0 */
+
+/* During bringup, we'll use the shadow frame model instead of rFP */
+/* single-purpose registers, given names for clarity */
+#define rSELF    IN_ARG0(%esp)
+#define rPC      %esi
+#define rFP      %edi
+#define rINST    %ebx
+#define rINSTw   %bx
+#define rINSTbh  %bh
+#define rINSTbl  %bl
+#define rIBASE   %edx
+#define rREFS    %ebp
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
+/*
+ *
+ * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
+ * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
+ * mterp should do so as well.
+ */
+#define MTERP_SUSPEND 0
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    movl    rPC, OFF_FP_DEX_PC_PTR(rFP)
+.endm
+
+/*
+ * Refresh handler table.
+ * IBase handles uses the caller save register so we must restore it after each call.
+ * Also it is used as a result of some 64-bit operations (like imul) and we should
+ * restore it in such cases also.
+ *
+ * TODO: Consider spilling the IBase instead of restoring it from Thread structure.
+ */
+.macro REFRESH_IBASE
+    movl    rSELF, rIBASE
+    movl    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
+.endm
+
+/*
+ * If rSELF is already loaded then we can use it from known reg.
+ */
+.macro REFRESH_IBASE_FROM_SELF _reg
+    movl    THREAD_CURRENT_IBASE_OFFSET(\_reg), rIBASE
+.endm
+
+/*
+ * Refresh rINST.
+ * At enter to handler rINST does not contain the opcode number.
+ * However some utilities require the full value, so this macro
+ * restores the opcode number.
+ */
+.macro REFRESH_INST _opnum
+    movb    rINSTbl, rINSTbh
+    movb    $\_opnum, rINSTbl
+.endm
+
+/*
+ * Fetch the next instruction from rPC into rINSTw.  Does not advance rPC.
+ */
+.macro FETCH_INST
+    movzwl  (rPC), rINST
+.endm
+
+/*
+ * Remove opcode from rINST, compute the address of handler and jump to it.
+ */
+.macro GOTO_NEXT
+    movzx   rINSTbl,%eax
+    movzbl  rINSTbh,rINST
+    shll    $7, %eax
+    addl    rIBASE, %eax
+    jmp     *%eax
+.endm
+
+/*
+ * Advance rPC by instruction count.
+ */
+.macro ADVANCE_PC _count
+    leal    2*\_count(rPC), rPC
+.endm
+
+/*
+ * Advance rPC by instruction count, fetch instruction and jump to handler.
+ */
+.macro ADVANCE_PC_FETCH_AND_GOTO_NEXT _count
+    ADVANCE_PC \_count
+    FETCH_INST
+    GOTO_NEXT
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+#define VREG_ADDRESS(_vreg) (rFP,_vreg,4)
+#define VREG_HIGH_ADDRESS(_vreg) 4(rFP,_vreg,4)
+#define VREG_REF_ADDRESS(_vreg) (rREFS,_vreg,4)
+#define VREG_REF_HIGH_ADDRESS(_vreg) 4(rREFS,_vreg,4)
+
+.macro GET_VREG _reg _vreg
+    movl    (rFP,\_vreg,4), \_reg
+.endm
+
+/* Read wide value to xmm. */
+.macro GET_WIDE_FP_VREG _reg _vreg
+    movq    (rFP,\_vreg,4), \_reg
+.endm
+
+.macro SET_VREG _reg _vreg
+    movl    \_reg, (rFP,\_vreg,4)
+    movl    $0, (rREFS,\_vreg,4)
+.endm
+
+/* Write wide value from xmm. xmm is clobbered. */
+.macro SET_WIDE_FP_VREG _reg _vreg
+    movq    \_reg, (rFP,\_vreg,4)
+    pxor    \_reg, \_reg
+    movq    \_reg, (rREFS,\_vreg,4)
+.endm
+
+.macro SET_VREG_OBJECT _reg _vreg
+    movl    \_reg, (rFP,\_vreg,4)
+    movl    \_reg, (rREFS,\_vreg,4)
+.endm
+
+.macro GET_VREG_HIGH _reg _vreg
+    movl    4(rFP,\_vreg,4), \_reg
+.endm
+
+.macro SET_VREG_HIGH _reg _vreg
+    movl    \_reg, 4(rFP,\_vreg,4)
+    movl    $0, 4(rREFS,\_vreg,4)
+.endm
+
+.macro CLEAR_REF _vreg
+    movl    $0,  (rREFS,\_vreg,4)
+.endm
+
+.macro CLEAR_WIDE_REF _vreg
+    movl    $0,  (rREFS,\_vreg,4)
+    movl    $0, 4(rREFS,\_vreg,4)
+.endm
+
+/* File: x86/entry.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Interpreter entry point.
+ */
+
+    .text
+    .global ExecuteMterpImpl
+    .type   ExecuteMterpImpl, %function
+
+/*
+ * On entry:
+ *  0  Thread* self
+ *  1  code_item
+ *  2  ShadowFrame
+ *  3  JValue* result_register
+ *
+ */
+
+ExecuteMterpImpl:
+    .cfi_startproc
+    /* Allocate frame */
+    subl    $FRAME_SIZE, %esp
+    .cfi_adjust_cfa_offset FRAME_SIZE
+
+    /* Spill callee save regs */
+    movl    %ebp, EBP_SPILL(%esp)
+    movl    %edi, EDI_SPILL(%esp)
+    movl    %esi, ESI_SPILL(%esp)
+    movl    %ebx, EBX_SPILL(%esp)
+
+    /* Load ShadowFrame pointer */
+    movl    IN_ARG2(%esp), %edx
+
+    /* Remember the return register */
+    movl    IN_ARG3(%esp), %eax
+    movl    %eax, SHADOWFRAME_RESULT_REGISTER_OFFSET(%edx)
+
+    /* Remember the code_item */
+    movl    IN_ARG1(%esp), %ecx
+    movl    %ecx, SHADOWFRAME_CODE_ITEM_OFFSET(%edx)
+
+    /* set up "named" registers */
+    movl    SHADOWFRAME_NUMBER_OF_VREGS_OFFSET(%edx), %eax
+    leal    SHADOWFRAME_VREGS_OFFSET(%edx), rFP
+    leal    (rFP, %eax, 4), rREFS
+    movl    SHADOWFRAME_DEX_PC_OFFSET(%edx), %eax
+    lea     CODEITEM_INSNS_OFFSET(%ecx), rPC
+    lea     (rPC, %eax, 2), rPC
+    EXPORT_PC
+
+    /* Starting ibase */
+    REFRESH_IBASE
+
+    /* start executing the instruction at rPC */
+    FETCH_INST
+    GOTO_NEXT
+    /* NOTE: no fallthrough */
+
+
+    .global artMterpAsmInstructionStart
+    .type   artMterpAsmInstructionStart, %function
+artMterpAsmInstructionStart = .L_op_nop
+    .text
+
+/* ------------------------------ */
+    .balign 128
+.L_op_nop: /* 0x00 */
+/* File: x86/op_nop.S */
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move: /* 0x01 */
+/* File: x86/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- BA
+    andb    $0xf, %al                      # eax <- A
+    shrl    $4, rINST                      # rINST <- B
+    GET_VREG rINST rINST
+    .if 0
+    SET_VREG_OBJECT rINST %eax              # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST %eax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_from16: /* 0x02 */
+/* File: x86/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    movzx   rINSTbl, %eax                   # eax <- AA
+    movw    2(rPC), rINSTw                  # rINSTw <- BBBB
+    GET_VREG rINST rINST                    # rINST <- fp[BBBB]
+    .if 0
+    SET_VREG_OBJECT rINST %eax              # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST %eax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_16: /* 0x03 */
+/* File: x86/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    movzwl  4(rPC), %ecx                    # ecx <- BBBB
+    movzwl  2(rPC), %eax                    # eax <- AAAA
+    GET_VREG rINST %ecx
+    .if 0
+    SET_VREG_OBJECT rINST %eax              # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST %eax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide: /* 0x04 */
+/* File: x86/op_move_wide.S */
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    GET_WIDE_FP_VREG %xmm0 rINST            # xmm0 <- v[B]
+    SET_WIDE_FP_VREG %xmm0 %ecx             # v[A] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_from16: /* 0x05 */
+/* File: x86/op_move_wide_from16.S */
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzwl  2(rPC), %ecx                    # ecx <- BBBB
+    movzbl  rINSTbl, %eax                   # eax <- AAAA
+    GET_WIDE_FP_VREG %xmm0 %ecx             # xmm0 <- v[B]
+    SET_WIDE_FP_VREG %xmm0 %eax             # v[A] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_16: /* 0x06 */
+/* File: x86/op_move_wide_16.S */
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzwl  4(rPC), %ecx                    # ecx<- BBBB
+    movzwl  2(rPC), %eax                    # eax<- AAAA
+    GET_WIDE_FP_VREG %xmm0 %ecx             # xmm0 <- v[B]
+    SET_WIDE_FP_VREG %xmm0 %eax             # v[A] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object: /* 0x07 */
+/* File: x86/op_move_object.S */
+/* File: x86/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- BA
+    andb    $0xf, %al                      # eax <- A
+    shrl    $4, rINST                      # rINST <- B
+    GET_VREG rINST rINST
+    .if 1
+    SET_VREG_OBJECT rINST %eax              # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST %eax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_from16: /* 0x08 */
+/* File: x86/op_move_object_from16.S */
+/* File: x86/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    movzx   rINSTbl, %eax                   # eax <- AA
+    movw    2(rPC), rINSTw                  # rINSTw <- BBBB
+    GET_VREG rINST rINST                    # rINST <- fp[BBBB]
+    .if 1
+    SET_VREG_OBJECT rINST %eax              # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST %eax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_16: /* 0x09 */
+/* File: x86/op_move_object_16.S */
+/* File: x86/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    movzwl  4(rPC), %ecx                    # ecx <- BBBB
+    movzwl  2(rPC), %eax                    # eax <- AAAA
+    GET_VREG rINST %ecx
+    .if 1
+    SET_VREG_OBJECT rINST %eax              # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST %eax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result: /* 0x0a */
+/* File: x86/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    movl    OFF_FP_RESULT_REGISTER(rFP), %eax    # get pointer to result JType.
+    movl    (%eax), %eax                    # r0 <- result.i.
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- fp[B]
+    .else
+    SET_VREG %eax rINST                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_wide: /* 0x0b */
+/* File: x86/op_move_result_wide.S */
+    /* move-result-wide vAA */
+    movl    OFF_FP_RESULT_REGISTER(rFP), %eax    # get pointer to result JType.
+    movl    4(%eax), %ecx                   # Get high
+    movl    (%eax), %eax                    # Get low
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    SET_VREG_HIGH %ecx rINST                # v[AA+1] <- ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_object: /* 0x0c */
+/* File: x86/op_move_result_object.S */
+/* File: x86/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    movl    OFF_FP_RESULT_REGISTER(rFP), %eax    # get pointer to result JType.
+    movl    (%eax), %eax                    # r0 <- result.i.
+    .if 1
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- fp[B]
+    .else
+    SET_VREG %eax rINST                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_exception: /* 0x0d */
+/* File: x86/op_move_exception.S */
+    /* move-exception vAA */
+    movl    rSELF, %ecx
+    movl    THREAD_EXCEPTION_OFFSET(%ecx), %eax
+    SET_VREG_OBJECT %eax rINST              # fp[AA] <- exception object
+    movl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void: /* 0x0e */
+/* File: x86/op_return_void.S */
+    .extern MterpThreadFenceForConstructor
+    call    MterpThreadFenceForConstructor
+    xorl    %eax, %eax
+    xorl    %ecx, %ecx
+    jmp     MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return: /* 0x0f */
+/* File: x86/op_return.S */
+/*
+ * Return a 32-bit value.
+ *
+ * for: return, return-object
+ */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    call    MterpThreadFenceForConstructor
+    GET_VREG %eax rINST                     # eax <- vAA
+    xorl    %ecx, %ecx
+    jmp     MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_wide: /* 0x10 */
+/* File: x86/op_return_wide.S */
+/*
+ * Return a 64-bit value.
+ */
+    /* return-wide vAA */
+    .extern MterpThreadFenceForConstructor
+    call    MterpThreadFenceForConstructor
+    GET_VREG %eax rINST                     # eax <- v[AA+0]
+    GET_VREG_HIGH %ecx rINST                # ecx <- v[AA+1]
+    jmp     MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_object: /* 0x11 */
+/* File: x86/op_return_object.S */
+/* File: x86/op_return.S */
+/*
+ * Return a 32-bit value.
+ *
+ * for: return, return-object
+ */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    call    MterpThreadFenceForConstructor
+    GET_VREG %eax rINST                     # eax <- vAA
+    xorl    %ecx, %ecx
+    jmp     MterpReturn
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_4: /* 0x12 */
+/* File: x86/op_const_4.S */
+    /* const/4 vA, #+B */
+    movsx   rINSTbl, %eax                   # eax <-ssssssBx
+    movl    $0xf, rINST
+    andl    %eax, rINST                     # rINST <- A
+    sarl    $4, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_16: /* 0x13 */
+/* File: x86/op_const_16.S */
+    /* const/16 vAA, #+BBBB */
+    movswl  2(rPC), %ecx                    # ecx <- ssssBBBB
+    SET_VREG %ecx rINST                     # vAA <- ssssBBBB
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const: /* 0x14 */
+/* File: x86/op_const.S */
+    /* const vAA, #+BBBBbbbb */
+    movl    2(rPC), %eax                    # grab all 32 bits at once
+    SET_VREG %eax rINST                     # vAA<- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_high16: /* 0x15 */
+/* File: x86/op_const_high16.S */
+    /* const/high16 vAA, #+BBBB0000 */
+    movzwl  2(rPC), %eax                    # eax <- 0000BBBB
+    sall    $16, %eax                      # eax <- BBBB0000
+    SET_VREG %eax rINST                     # vAA <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_16: /* 0x16 */
+/* File: x86/op_const_wide_16.S */
+    /* const-wide/16 vAA, #+BBBB */
+    movswl  2(rPC), %eax                    # eax <- ssssBBBB
+    movl    rIBASE, %ecx                    # preserve rIBASE (cltd trashes it)
+    cltd                                    # rIBASE:eax <- ssssssssssssBBBB
+    SET_VREG_HIGH rIBASE rINST              # store msw
+    SET_VREG %eax rINST                     # store lsw
+    movl    %ecx, rIBASE                    # restore rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_32: /* 0x17 */
+/* File: x86/op_const_wide_32.S */
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    movl    2(rPC), %eax                    # eax <- BBBBbbbb
+    movl    rIBASE, %ecx                    # preserve rIBASE (cltd trashes it)
+    cltd                                    # rIBASE:eax <- ssssssssssssBBBB
+    SET_VREG_HIGH rIBASE rINST              # store msw
+    SET_VREG %eax rINST                     # store lsw
+    movl    %ecx, rIBASE                    # restore rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide: /* 0x18 */
+/* File: x86/op_const_wide.S */
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    movl    2(rPC), %eax                    # eax <- lsw
+    movzbl  rINSTbl, %ecx                   # ecx <- AA
+    movl    6(rPC), rINST                   # rINST <- msw
+    SET_VREG %eax %ecx
+    SET_VREG_HIGH  rINST %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 5
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_high16: /* 0x19 */
+/* File: x86/op_const_wide_high16.S */
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    movzwl  2(rPC), %eax                    # eax <- 0000BBBB
+    sall    $16, %eax                      # eax <- BBBB0000
+    SET_VREG_HIGH %eax rINST                # v[AA+1] <- eax
+    xorl    %eax, %eax
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string: /* 0x1a */
+/* File: x86/op_const_string.S */
+    /* const/string vAA, String@BBBB */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, OUT_ARG1(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    MterpConstString                # (index, tgt_reg, shadow_frame, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string_jumbo: /* 0x1b */
+/* File: x86/op_const_string_jumbo.S */
+    /* const/string vAA, String@BBBBBBBB */
+    EXPORT_PC
+    movl    2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, OUT_ARG1(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    MterpConstString                # (index, tgt_reg, shadow_frame, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_class: /* 0x1c */
+/* File: x86/op_const_class.S */
+    /* const/class vAA, Class@BBBB */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, OUT_ARG1(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    MterpConstClass                 # (index, tgt_reg, shadow_frame, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_enter: /* 0x1d */
+/* File: x86/op_monitor_enter.S */
+/*
+ * Synchronize on an object.
+ */
+    /* monitor-enter vAA */
+    EXPORT_PC
+    GET_VREG %ecx rINST
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    artLockObjectFromCode           # (object, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_exit: /* 0x1e */
+/* File: x86/op_monitor_exit.S */
+/*
+ * Unlock an object.
+ *
+ * Exceptions that occur when unlocking a monitor need to appear as
+ * if they happened at the following instruction.  See the Dalvik
+ * instruction spec.
+ */
+    /* monitor-exit vAA */
+    EXPORT_PC
+    GET_VREG %ecx rINST
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    artUnlockObjectFromCode         # (object, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_check_cast: /* 0x1f */
+/* File: x86/op_check_cast.S */
+/*
+ * Check to see if a cast from one class to another is allowed.
+ */
+    /* check-cast vAA, class@BBBB */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    GET_VREG %ecx rINST
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    OFF_FP_METHOD(rFP),%eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    MterpCheckCast                  # (index, obj, method, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_instance_of: /* 0x20 */
+/* File: x86/op_instance_of.S */
+/*
+ * Check to see if an object reference is an instance of a class.
+ *
+ * Most common situation is a non-null object, being compared against
+ * an already-resolved class.
+ */
+    /* instance-of vA, vB, class@CCCC */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, %eax                     # eax <- BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %ecx %eax                      # Get object
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    OFF_FP_METHOD(rFP),%eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    MterpInstanceOf                 # (index, obj, method, self)
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    andb    $0xf, rINSTbl                  # rINSTbl <- A
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_array_length: /* 0x21 */
+/* File: x86/op_array_length.S */
+/*
+ * Return the length of an array.
+ */
+    mov     rINST, %eax                     # eax <- BA
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %ecx rINST                     # ecx <- vB (object ref)
+    testl   %ecx, %ecx                      # is null?
+    je      common_errNullObject
+    andb    $0xf, %al                      # eax <- A
+    movl    MIRROR_ARRAY_LENGTH_OFFSET(%ecx), rINST
+    SET_VREG rINST %eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_instance: /* 0x22 */
+/* File: x86/op_new_instance.S */
+/*
+ * Create a new instance of a class.
+ */
+    /* new-instance vAA, class@BBBB */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    REFRESH_INST 34
+    movl    rINST, OUT_ARG2(%esp)
+    call    MterpNewInstance
+    REFRESH_IBASE
+    testl   %eax, %eax                 # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_array: /* 0x23 */
+/* File: x86/op_new_array.S */
+/*
+ * Allocate an array of objects, specified with the array class
+ * and a count.
+ *
+ * The verifier guarantees that this is an array class, so we don't
+ * check for it here.
+ */
+    /* new-array vA, vB, class@CCCC */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST 35
+    movl    rINST, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    MterpNewArray
+    REFRESH_IBASE
+    testl   %eax, %eax                      # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array: /* 0x24 */
+/* File: x86/op_filled_new_array.S */
+/*
+ * Create a new array with elements filled from registers.
+ *
+ * for: filled-new-array, filled-new-array/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+    .extern MterpFilledNewArray
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)
+    call    MterpFilledNewArray
+    REFRESH_IBASE
+    testl   %eax, %eax                      # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array_range: /* 0x25 */
+/* File: x86/op_filled_new_array_range.S */
+/* File: x86/op_filled_new_array.S */
+/*
+ * Create a new array with elements filled from registers.
+ *
+ * for: filled-new-array, filled-new-array/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+    .extern MterpFilledNewArrayRange
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)
+    call    MterpFilledNewArrayRange
+    REFRESH_IBASE
+    testl   %eax, %eax                      # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_fill_array_data: /* 0x26 */
+/* File: x86/op_fill_array_data.S */
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC
+    movl    2(rPC), %ecx                    # ecx <- BBBBbbbb
+    leal    (rPC,%ecx,2), %ecx              # ecx <- PC + BBBBbbbb*2
+    GET_VREG %eax rINST                     # eax <- vAA (array object)
+    movl    %eax, OUT_ARG0(%esp)
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpFillArrayData              # (obj, payload)
+    REFRESH_IBASE
+    testl   %eax, %eax                      # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_throw: /* 0x27 */
+/* File: x86/op_throw.S */
+/*
+ * Throw an exception object in the current thread.
+ */
+    /* throw vAA */
+    EXPORT_PC
+    GET_VREG %eax rINST                     # eax<- vAA (exception object)
+    testl   %eax, %eax
+    jz      common_errNullObject
+    movl    rSELF,%ecx
+    movl    %eax, THREAD_EXCEPTION_OFFSET(%ecx)
+    jmp     MterpException
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto: /* 0x28 */
+/* File: x86/op_goto.S */
+/*
+ * Unconditional branch, 8-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ */
+    /* goto +AA */
+    movsbl  rINSTbl, %eax                   # eax <- ssssssAA
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      1f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+1:
+    GOTO_NEXT
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_16: /* 0x29 */
+/* File: x86/op_goto_16.S */
+/*
+ * Unconditional branch, 16-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ */
+    /* goto/16 +AAAA */
+    movswl  2(rPC), %eax                    # eax <- ssssAAAA
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      1f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+1:
+    GOTO_NEXT
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_32: /* 0x2a */
+/* File: x86/op_goto_32.S */
+/*
+ * Unconditional branch, 32-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ *
+ * Unlike most opcodes, this one is allowed to branch to itself, so
+ * our "backward branch" test must be "<=0" instead of "<0".  Because
+ * we need the V bit set, we'll use an adds to convert from Dalvik
+ * offset to byte offset.
+ */
+    /* goto/32 +AAAAAAAA */
+    movl    2(rPC), %eax                    # eax <- AAAAAAAA
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      1f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+1:
+    GOTO_NEXT
+
+/* ------------------------------ */
+    .balign 128
+.L_op_packed_switch: /* 0x2b */
+/* File: x86/op_packed_switch.S */
+/*
+ * Handle a packed-switch or sparse-switch instruction.  In both cases
+ * we decode it and hand it off to a helper function.
+ *
+ * We don't really expect backward branches in a switch statement, but
+ * they're perfectly legal, so we check for them here.
+ *
+ * for: packed-switch, sparse-switch
+ */
+    /* op vAA, +BBBB */
+    movl    2(rPC), %ecx                    # ecx <- BBBBbbbb
+    GET_VREG %eax rINST                     # eax <- vAA
+    leal    (rPC,%ecx,2), %ecx              # ecx <- PC + BBBBbbbb*2
+    movl    %eax, OUT_ARG1(%esp)            # ARG1 <- vAA
+    movl    %ecx, OUT_ARG0(%esp)            # ARG0 <- switchData
+    call    MterpDoPackedSwitch
+    addl    %eax, %eax
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    REFRESH_IBASE
+    jg      1f
+#if MTERP_SUSPEND
+    #     REFRESH_IBASE - we did it above.
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+1:
+    GOTO_NEXT
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sparse_switch: /* 0x2c */
+/* File: x86/op_sparse_switch.S */
+/* File: x86/op_packed_switch.S */
+/*
+ * Handle a packed-switch or sparse-switch instruction.  In both cases
+ * we decode it and hand it off to a helper function.
+ *
+ * We don't really expect backward branches in a switch statement, but
+ * they're perfectly legal, so we check for them here.
+ *
+ * for: packed-switch, sparse-switch
+ */
+    /* op vAA, +BBBB */
+    movl    2(rPC), %ecx                    # ecx <- BBBBbbbb
+    GET_VREG %eax rINST                     # eax <- vAA
+    leal    (rPC,%ecx,2), %ecx              # ecx <- PC + BBBBbbbb*2
+    movl    %eax, OUT_ARG1(%esp)            # ARG1 <- vAA
+    movl    %ecx, OUT_ARG0(%esp)            # ARG0 <- switchData
+    call    MterpDoSparseSwitch
+    addl    %eax, %eax
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    REFRESH_IBASE
+    jg      1f
+#if MTERP_SUSPEND
+    #     REFRESH_IBASE - we did it above.
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+1:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_float: /* 0x2d */
+/* File: x86/op_cmpl_float.S */
+/* File: x86/fpcmp.S */
+/*
+ * Compare two floating-point values.  Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ *     if (x == y) {
+ *         return 0;
+ *     } else if (x < y) {
+ *         return -1;
+ *     } else if (x > y) {
+ *         return 1;
+ *     } else {
+ *         return nanval ? 1 : -1;
+ *     }
+ * }
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx<- CC
+    movzbl  2(rPC), %eax                    # eax<- BB
+    movss VREG_ADDRESS(%eax), %xmm0
+    xor     %eax, %eax
+    ucomiss VREG_ADDRESS(%ecx), %xmm0
+    jp      .Lop_cmpl_float_nan_is_neg
+    je      .Lop_cmpl_float_finish
+    jb      .Lop_cmpl_float_less
+.Lop_cmpl_float_nan_is_pos:
+    incl    %eax
+    jmp     .Lop_cmpl_float_finish
+.Lop_cmpl_float_nan_is_neg:
+.Lop_cmpl_float_less:
+    decl    %eax
+.Lop_cmpl_float_finish:
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_float: /* 0x2e */
+/* File: x86/op_cmpg_float.S */
+/* File: x86/fpcmp.S */
+/*
+ * Compare two floating-point values.  Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ *     if (x == y) {
+ *         return 0;
+ *     } else if (x < y) {
+ *         return -1;
+ *     } else if (x > y) {
+ *         return 1;
+ *     } else {
+ *         return nanval ? 1 : -1;
+ *     }
+ * }
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx<- CC
+    movzbl  2(rPC), %eax                    # eax<- BB
+    movss VREG_ADDRESS(%eax), %xmm0
+    xor     %eax, %eax
+    ucomiss VREG_ADDRESS(%ecx), %xmm0
+    jp      .Lop_cmpg_float_nan_is_pos
+    je      .Lop_cmpg_float_finish
+    jb      .Lop_cmpg_float_less
+.Lop_cmpg_float_nan_is_pos:
+    incl    %eax
+    jmp     .Lop_cmpg_float_finish
+.Lop_cmpg_float_nan_is_neg:
+.Lop_cmpg_float_less:
+    decl    %eax
+.Lop_cmpg_float_finish:
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_double: /* 0x2f */
+/* File: x86/op_cmpl_double.S */
+/* File: x86/fpcmp.S */
+/*
+ * Compare two floating-point values.  Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ *     if (x == y) {
+ *         return 0;
+ *     } else if (x < y) {
+ *         return -1;
+ *     } else if (x > y) {
+ *         return 1;
+ *     } else {
+ *         return nanval ? 1 : -1;
+ *     }
+ * }
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx<- CC
+    movzbl  2(rPC), %eax                    # eax<- BB
+    movsd VREG_ADDRESS(%eax), %xmm0
+    xor     %eax, %eax
+    ucomisd VREG_ADDRESS(%ecx), %xmm0
+    jp      .Lop_cmpl_double_nan_is_neg
+    je      .Lop_cmpl_double_finish
+    jb      .Lop_cmpl_double_less
+.Lop_cmpl_double_nan_is_pos:
+    incl    %eax
+    jmp     .Lop_cmpl_double_finish
+.Lop_cmpl_double_nan_is_neg:
+.Lop_cmpl_double_less:
+    decl    %eax
+.Lop_cmpl_double_finish:
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_double: /* 0x30 */
+/* File: x86/op_cmpg_double.S */
+/* File: x86/fpcmp.S */
+/*
+ * Compare two floating-point values.  Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ *     if (x == y) {
+ *         return 0;
+ *     } else if (x < y) {
+ *         return -1;
+ *     } else if (x > y) {
+ *         return 1;
+ *     } else {
+ *         return nanval ? 1 : -1;
+ *     }
+ * }
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx<- CC
+    movzbl  2(rPC), %eax                    # eax<- BB
+    movsd VREG_ADDRESS(%eax), %xmm0
+    xor     %eax, %eax
+    ucomisd VREG_ADDRESS(%ecx), %xmm0
+    jp      .Lop_cmpg_double_nan_is_pos
+    je      .Lop_cmpg_double_finish
+    jb      .Lop_cmpg_double_less
+.Lop_cmpg_double_nan_is_pos:
+    incl    %eax
+    jmp     .Lop_cmpg_double_finish
+.Lop_cmpg_double_nan_is_neg:
+.Lop_cmpg_double_less:
+    decl    %eax
+.Lop_cmpg_double_finish:
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmp_long: /* 0x31 */
+/* File: x86/op_cmp_long.S */
+/*
+ * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
+ * register based on the results of the comparison.
+ */
+    /* cmp-long vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG_HIGH %eax %eax                 # eax <- v[BB+1], BB is clobbered
+    cmpl    VREG_HIGH_ADDRESS(%ecx), %eax
+    jl      .Lop_cmp_long_smaller
+    jg      .Lop_cmp_long_bigger
+    movzbl  2(rPC), %eax                    # eax <- BB, restore BB
+    GET_VREG %eax %eax                      # eax <- v[BB]
+    sub     VREG_ADDRESS(%ecx), %eax
+    ja      .Lop_cmp_long_bigger
+    jb      .Lop_cmp_long_smaller
+.Lop_cmp_long_finish:
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.Lop_cmp_long_bigger:
+    movl    $1, %eax
+    jmp     .Lop_cmp_long_finish
+
+.Lop_cmp_long_smaller:
+    movl    $-1, %eax
+    jmp     .Lop_cmp_long_finish
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eq: /* 0x32 */
+/* File: x86/op_if_eq.S */
+/* File: x86/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax %ecx                      # eax <- vA
+    sarl    $4, rINST                      # rINST <- B
+    cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    movl    $2, %eax                       # assume not taken
+    jne   1f
+    movswl  2(rPC),%eax                     # Get signed branch offset
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ne: /* 0x33 */
+/* File: x86/op_if_ne.S */
+/* File: x86/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax %ecx                      # eax <- vA
+    sarl    $4, rINST                      # rINST <- B
+    cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    movl    $2, %eax                       # assume not taken
+    je   1f
+    movswl  2(rPC),%eax                     # Get signed branch offset
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lt: /* 0x34 */
+/* File: x86/op_if_lt.S */
+/* File: x86/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax %ecx                      # eax <- vA
+    sarl    $4, rINST                      # rINST <- B
+    cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    movl    $2, %eax                       # assume not taken
+    jge   1f
+    movswl  2(rPC),%eax                     # Get signed branch offset
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ge: /* 0x35 */
+/* File: x86/op_if_ge.S */
+/* File: x86/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax %ecx                      # eax <- vA
+    sarl    $4, rINST                      # rINST <- B
+    cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    movl    $2, %eax                       # assume not taken
+    jl   1f
+    movswl  2(rPC),%eax                     # Get signed branch offset
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gt: /* 0x36 */
+/* File: x86/op_if_gt.S */
+/* File: x86/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax %ecx                      # eax <- vA
+    sarl    $4, rINST                      # rINST <- B
+    cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    movl    $2, %eax                       # assume not taken
+    jle   1f
+    movswl  2(rPC),%eax                     # Get signed branch offset
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_le: /* 0x37 */
+/* File: x86/op_if_le.S */
+/* File: x86/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax %ecx                      # eax <- vA
+    sarl    $4, rINST                      # rINST <- B
+    cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    movl    $2, %eax                       # assume not taken
+    jg   1f
+    movswl  2(rPC),%eax                     # Get signed branch offset
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eqz: /* 0x38 */
+/* File: x86/op_if_eqz.S */
+/* File: x86/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    movl    $2, %eax                       # assume branch not taken
+    jne   1f
+    movswl  2(rPC),%eax                     # fetch signed displacement
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_nez: /* 0x39 */
+/* File: x86/op_if_nez.S */
+/* File: x86/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    movl    $2, %eax                       # assume branch not taken
+    je   1f
+    movswl  2(rPC),%eax                     # fetch signed displacement
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ltz: /* 0x3a */
+/* File: x86/op_if_ltz.S */
+/* File: x86/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    movl    $2, %eax                       # assume branch not taken
+    jge   1f
+    movswl  2(rPC),%eax                     # fetch signed displacement
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gez: /* 0x3b */
+/* File: x86/op_if_gez.S */
+/* File: x86/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    movl    $2, %eax                       # assume branch not taken
+    jl   1f
+    movswl  2(rPC),%eax                     # fetch signed displacement
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gtz: /* 0x3c */
+/* File: x86/op_if_gtz.S */
+/* File: x86/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    movl    $2, %eax                       # assume branch not taken
+    jle   1f
+    movswl  2(rPC),%eax                     # fetch signed displacement
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lez: /* 0x3d */
+/* File: x86/op_if_lez.S */
+/* File: x86/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    movl    $2, %eax                       # assume branch not taken
+    jg   1f
+    movswl  2(rPC),%eax                     # fetch signed displacement
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3e: /* 0x3e */
+/* File: x86/op_unused_3e.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3f: /* 0x3f */
+/* File: x86/op_unused_3f.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_40: /* 0x40 */
+/* File: x86/op_unused_40.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_41: /* 0x41 */
+/* File: x86/op_unused_41.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_42: /* 0x42 */
+/* File: x86/op_unused_42.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_43: /* 0x43 */
+/* File: x86/op_unused_43.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget: /* 0x44 */
+/* File: x86/op_aget.S */
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    movl   MIRROR_INT_ARRAY_DATA_OFFSET(%eax,%ecx,4), %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_wide: /* 0x45 */
+/* File: x86/op_aget_wide.S */
+/*
+ * Array get, 64 bits.  vAA <- vBB[vCC].
+ */
+    /* aget-wide vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_WIDE_ARRAY_DATA_OFFSET(%eax,%ecx,8), %eax
+    movq    (%eax), %xmm0                   # xmm0 <- vBB[vCC]
+    SET_WIDE_FP_VREG %xmm0 rINST            # vAA <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_object: /* 0x46 */
+/* File: x86/op_aget_object.S */
+/*
+ * Array object get.  vAA <- vBB[vCC].
+ *
+ * for: aget-object
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecs <- vCC (requested index)
+    EXPORT_PC
+    movl    %eax, OUT_ARG0(%esp)
+    movl    %ecx, OUT_ARG1(%esp)
+    call    artAGetObjectFromMterp          # (array, index)
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    SET_VREG_OBJECT %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_boolean: /* 0x47 */
+/* File: x86/op_aget_boolean.S */
+/* File: x86/op_aget.S */
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    movzbl   MIRROR_BOOLEAN_ARRAY_DATA_OFFSET(%eax,%ecx,1), %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_byte: /* 0x48 */
+/* File: x86/op_aget_byte.S */
+/* File: x86/op_aget.S */
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    movsbl   MIRROR_BYTE_ARRAY_DATA_OFFSET(%eax,%ecx,1), %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_char: /* 0x49 */
+/* File: x86/op_aget_char.S */
+/* File: x86/op_aget.S */
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    movzwl   MIRROR_CHAR_ARRAY_DATA_OFFSET(%eax,%ecx,2), %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_short: /* 0x4a */
+/* File: x86/op_aget_short.S */
+/* File: x86/op_aget.S */
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    movswl   MIRROR_SHORT_ARRAY_DATA_OFFSET(%eax,%ecx,2), %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput: /* 0x4b */
+/* File: x86/op_aput.S */
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_INT_ARRAY_DATA_OFFSET(%eax,%ecx,4), %eax
+    GET_VREG rINST rINST
+    movl  rINST, (%eax)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_wide: /* 0x4c */
+/* File: x86/op_aput_wide.S */
+/*
+ * Array put, 64 bits.  vBB[vCC] <- vAA.
+ *
+ */
+    /* aput-wide vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_WIDE_ARRAY_DATA_OFFSET(%eax,%ecx,8), %eax
+    GET_WIDE_FP_VREG %xmm0 rINST            # xmm0 <- vAA
+    movq    %xmm0, (%eax)                   # vBB[vCC] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_object: /* 0x4d */
+/* File: x86/op_aput_object.S */
+/*
+ * Store an object into an array.  vBB[vCC] <- vAA.
+ */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST 77
+    movl    rINST, OUT_ARG2(%esp)
+    call    MterpAputObject            # (array, index)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_boolean: /* 0x4e */
+/* File: x86/op_aput_boolean.S */
+/* File: x86/op_aput.S */
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_BOOLEAN_ARRAY_DATA_OFFSET(%eax,%ecx,1), %eax
+    GET_VREG rINST rINST
+    movb  rINSTbl, (%eax)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_byte: /* 0x4f */
+/* File: x86/op_aput_byte.S */
+/* File: x86/op_aput.S */
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_BYTE_ARRAY_DATA_OFFSET(%eax,%ecx,1), %eax
+    GET_VREG rINST rINST
+    movb  rINSTbl, (%eax)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_char: /* 0x50 */
+/* File: x86/op_aput_char.S */
+/* File: x86/op_aput.S */
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_CHAR_ARRAY_DATA_OFFSET(%eax,%ecx,2), %eax
+    GET_VREG rINST rINST
+    movw  rINSTw, (%eax)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_short: /* 0x51 */
+/* File: x86/op_aput_short.S */
+/* File: x86/op_aput.S */
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_SHORT_ARRAY_DATA_OFFSET(%eax,%ecx,2), %eax
+    GET_VREG rINST rINST
+    movw  rINSTw, (%eax)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget: /* 0x52 */
+/* File: x86/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artGet32InstanceFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <-value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <-value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide: /* 0x53 */
+/* File: x86/op_iget_wide.S */
+/*
+ * 64-bit instance field get.
+ *
+ * for: iget-wide
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artGet64InstanceFromCode
+    mov     rSELF, %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    SET_VREG %eax rINST
+    SET_VREG_HIGH %edx rINST
+    REFRESH_IBASE_FROM_SELF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object: /* 0x54 */
+/* File: x86/op_iget_object.S */
+/* File: x86/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artGetObjInstanceFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 1
+    SET_VREG_OBJECT %eax rINST              # fp[A] <-value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <-value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean: /* 0x55 */
+/* File: x86/op_iget_boolean.S */
+/* File: x86/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artGetBooleanInstanceFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <-value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <-value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte: /* 0x56 */
+/* File: x86/op_iget_byte.S */
+/* File: x86/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artGetByteInstanceFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <-value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <-value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char: /* 0x57 */
+/* File: x86/op_iget_char.S */
+/* File: x86/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artGetCharInstanceFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <-value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <-value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short: /* 0x58 */
+/* File: x86/op_iget_short.S */
+/* File: x86/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artGetShortInstanceFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <-value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <-value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput: /* 0x59 */
+/* File: x86/op_iput.S */
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern artSet32InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $0xf, rINSTbl                  # rINST<- A
+    GET_VREG %eax, rINST
+    movl    %eax, OUT_ARG2(%esp)            # fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    artSet32InstanceFromMterp
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide: /* 0x5a */
+/* File: x86/op_iput_wide.S */
+    /* iput-wide vA, vB, field@CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl,%ecx                    # ecx <- BA
+    sarl    $4,%ecx                        # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $0xf,rINSTbl                   # rINST <- A
+    leal    VREG_ADDRESS(rINST), %eax
+    movl    %eax, OUT_ARG2(%esp)            # &fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    artSet64InstanceFromMterp
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object: /* 0x5b */
+/* File: x86/op_iput_object.S */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST 91
+    movl    rINST, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    MterpIputObject
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean: /* 0x5c */
+/* File: x86/op_iput_boolean.S */
+/* File: x86/op_iput.S */
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $0xf, rINSTbl                  # rINST<- A
+    GET_VREG %eax, rINST
+    movl    %eax, OUT_ARG2(%esp)            # fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    artSet8InstanceFromMterp
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte: /* 0x5d */
+/* File: x86/op_iput_byte.S */
+/* File: x86/op_iput.S */
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $0xf, rINSTbl                  # rINST<- A
+    GET_VREG %eax, rINST
+    movl    %eax, OUT_ARG2(%esp)            # fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    artSet8InstanceFromMterp
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char: /* 0x5e */
+/* File: x86/op_iput_char.S */
+/* File: x86/op_iput.S */
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $0xf, rINSTbl                  # rINST<- A
+    GET_VREG %eax, rINST
+    movl    %eax, OUT_ARG2(%esp)            # fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    artSet16InstanceFromMterp
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short: /* 0x5f */
+/* File: x86/op_iput_short.S */
+/* File: x86/op_iput.S */
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $0xf, rINSTbl                  # rINST<- A
+    GET_VREG %eax, rINST
+    movl    %eax, OUT_ARG2(%esp)            # fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    artSet16InstanceFromMterp
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget: /* 0x60 */
+/* File: x86/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artGet32StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    artGet32StaticFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_wide: /* 0x61 */
+/* File: x86/op_sget_wide.S */
+/*
+ * SGET_WIDE handler wrapper.
+ *
+ */
+    /* sget-wide vAA, field@BBBB */
+    .extern artGet64StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    artGet64StaticFromCode
+    movl    rSELF, %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    SET_VREG %eax rINST                     # fp[A]<- low part
+    SET_VREG_HIGH %edx rINST                # fp[A+1]<- high part
+    REFRESH_IBASE_FROM_SELF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_object: /* 0x62 */
+/* File: x86/op_sget_object.S */
+/* File: x86/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artGetObjStaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    artGetObjStaticFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    .if 1
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_boolean: /* 0x63 */
+/* File: x86/op_sget_boolean.S */
+/* File: x86/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artGetBooleanStaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    artGetBooleanStaticFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_byte: /* 0x64 */
+/* File: x86/op_sget_byte.S */
+/* File: x86/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artGetByteStaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    artGetByteStaticFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_char: /* 0x65 */
+/* File: x86/op_sget_char.S */
+/* File: x86/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artGetCharStaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    artGetCharStaticFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_short: /* 0x66 */
+/* File: x86/op_sget_short.S */
+/* File: x86/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artGetShortStaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    artGetShortStaticFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput: /* 0x67 */
+/* File: x86/op_sput.S */
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artSet32StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    GET_VREG rINST rINST
+    movl    rINST, OUT_ARG1(%esp)           # fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artSet32StaticFromCode
+    testl   %eax, %eax
+    jnz     MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_wide: /* 0x68 */
+/* File: x86/op_sput_wide.S */
+/*
+ * SPUT_WIDE handler wrapper.
+ *
+ */
+    /* sput-wide vAA, field@BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    leal    VREG_ADDRESS(rINST), %eax
+    movl    %eax, OUT_ARG2(%esp)            # &fp[AA]
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artSet64IndirectStaticFromMterp
+    testl   %eax, %eax
+    jnz     MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_object: /* 0x69 */
+/* File: x86/op_sput_object.S */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST 105
+    movl    rINST, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    MterpSputObject
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_boolean: /* 0x6a */
+/* File: x86/op_sput_boolean.S */
+/* File: x86/op_sput.S */
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artSet8StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    GET_VREG rINST rINST
+    movl    rINST, OUT_ARG1(%esp)           # fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artSet8StaticFromCode
+    testl   %eax, %eax
+    jnz     MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_byte: /* 0x6b */
+/* File: x86/op_sput_byte.S */
+/* File: x86/op_sput.S */
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artSet8StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    GET_VREG rINST rINST
+    movl    rINST, OUT_ARG1(%esp)           # fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artSet8StaticFromCode
+    testl   %eax, %eax
+    jnz     MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_char: /* 0x6c */
+/* File: x86/op_sput_char.S */
+/* File: x86/op_sput.S */
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artSet16StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    GET_VREG rINST rINST
+    movl    rINST, OUT_ARG1(%esp)           # fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artSet16StaticFromCode
+    testl   %eax, %eax
+    jnz     MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_short: /* 0x6d */
+/* File: x86/op_sput_short.S */
+/* File: x86/op_sput.S */
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artSet16StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    GET_VREG rINST rINST
+    movl    rINST, OUT_ARG1(%esp)           # fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artSet16StaticFromCode
+    testl   %eax, %eax
+    jnz     MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual: /* 0x6e */
+/* File: x86/op_invoke_virtual.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtual
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 110
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeVirtual
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/*
+ * Handle a virtual method call.
+ *
+ * for: invoke-virtual, invoke-virtual/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super: /* 0x6f */
+/* File: x86/op_invoke_super.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuper
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 111
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeSuper
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/*
+ * Handle a "super" method call.
+ *
+ * for: invoke-super, invoke-super/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct: /* 0x70 */
+/* File: x86/op_invoke_direct.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirect
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 112
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeDirect
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static: /* 0x71 */
+/* File: x86/op_invoke_static.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStatic
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 113
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeStatic
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface: /* 0x72 */
+/* File: x86/op_invoke_interface.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterface
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 114
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeInterface
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/*
+ * Handle an interface method call.
+ *
+ * for: invoke-interface, invoke-interface/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void_no_barrier: /* 0x73 */
+/* File: x86/op_return_void_no_barrier.S */
+    xorl    %eax, %eax
+    xorl    %ecx, %ecx
+    jmp     MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range: /* 0x74 */
+/* File: x86/op_invoke_virtual_range.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualRange
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 116
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeVirtualRange
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super_range: /* 0x75 */
+/* File: x86/op_invoke_super_range.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuperRange
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 117
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeSuperRange
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct_range: /* 0x76 */
+/* File: x86/op_invoke_direct_range.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirectRange
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 118
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeDirectRange
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static_range: /* 0x77 */
+/* File: x86/op_invoke_static_range.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStaticRange
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 119
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeStaticRange
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface_range: /* 0x78 */
+/* File: x86/op_invoke_interface_range.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterfaceRange
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 120
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeInterfaceRange
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_79: /* 0x79 */
+/* File: x86/op_unused_79.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_7a: /* 0x7a */
+/* File: x86/op_unused_7a.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_int: /* 0x7b */
+/* File: x86/op_neg_int.S */
+/* File: x86/unop.S */
+/*
+ * Generic 32-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf,%cl                       # ecx <- A
+    negl    %eax
+    SET_VREG %eax %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_int: /* 0x7c */
+/* File: x86/op_not_int.S */
+/* File: x86/unop.S */
+/*
+ * Generic 32-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf,%cl                       # ecx <- A
+    notl %eax
+    SET_VREG %eax %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_long: /* 0x7d */
+/* File: x86/op_neg_long.S */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax %ecx                      # eax <- v[B+0]
+    GET_VREG_HIGH %ecx %ecx                 # ecx <- v[B+1]
+    negl    %eax
+    adcl    $0, %ecx
+    negl    %ecx
+    SET_VREG %eax rINST                     # v[A+0] <- eax
+    SET_VREG_HIGH %ecx rINST                # v[A+1] <- ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_long: /* 0x7e */
+/* File: x86/op_not_long.S */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax %ecx                      # eax <- v[B+0]
+    GET_VREG_HIGH %ecx %ecx                 # ecx <- v[B+1]
+    notl    %eax
+    notl    %ecx
+    SET_VREG %eax rINST                     # v[A+0] <- eax
+    SET_VREG_HIGH %ecx rINST                # v[A+1] <- ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_float: /* 0x7f */
+/* File: x86/op_neg_float.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    flds   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    fchs
+    fstps  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 0
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_double: /* 0x80 */
+/* File: x86/op_neg_double.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fldl   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    fchs
+    fstpl  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 1
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_long: /* 0x81 */
+/* File: x86/op_int_to_long.S */
+    /* int to long vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- +A
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    movl    rIBASE, %ecx                    # cltd trashes rIBASE/edx
+    cltd                                    # rINST:eax<- sssssssBBBBBBBB
+    SET_VREG_HIGH rIBASE rINST              # v[A+1] <- rIBASE
+    SET_VREG %eax rINST                     # v[A+0] <- %eax
+    movl    %ecx, rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_float: /* 0x82 */
+/* File: x86/op_int_to_float.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fildl   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    
+    fstps  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 0
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_double: /* 0x83 */
+/* File: x86/op_int_to_double.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fildl   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    
+    fstpl  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 1
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_int: /* 0x84 */
+/* File: x86/op_long_to_int.S */
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+/* File: x86/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- BA
+    andb    $0xf, %al                      # eax <- A
+    shrl    $4, rINST                      # rINST <- B
+    GET_VREG rINST rINST
+    .if 0
+    SET_VREG_OBJECT rINST %eax              # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST %eax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_float: /* 0x85 */
+/* File: x86/op_long_to_float.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fildll   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    
+    fstps  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 0
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_double: /* 0x86 */
+/* File: x86/op_long_to_double.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fildll   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    
+    fstpl  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 1
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_int: /* 0x87 */
+/* File: x86/op_float_to_int.S */
+/* File: x86/cvtfp_int.S */
+/* On fp to int conversions, Java requires that
+ * if the result > maxint, it should be clamped to maxint.  If it is less
+ * than minint, it should be clamped to minint.  If it is a nan, the result
+ * should be zero.  Further, the rounding mode is to truncate.  This model
+ * differs from what is delivered normally via the x86 fpu, so we have
+ * to play some games.
+ */
+    /* float/double to int/long vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    .if 0
+    fldl    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .else
+    flds    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .endif
+    ftst
+    fnstcw  LOCAL0(%esp)                    # remember original rounding mode
+    movzwl  LOCAL0(%esp), %eax
+    movb    $0xc, %ah
+    movw    %ax, LOCAL0+2(%esp)
+    fldcw   LOCAL0+2(%esp)                  # set "to zero" rounding mode
+    andb    $0xf, %cl                      # ecx <- A
+    .if 0
+    fistpll VREG_ADDRESS(%ecx)              # convert and store
+    .else
+    fistpl  VREG_ADDRESS(%ecx)              # convert and store
+    .endif
+    fldcw   LOCAL0(%esp)                    # restore previous rounding mode
+    .if 0
+    movl    $0x80000000, %eax
+    xorl    VREG_HIGH_ADDRESS(%ecx), %eax
+    orl     VREG_ADDRESS(%ecx), %eax
+    .else
+    cmpl    $0x80000000, VREG_ADDRESS(%ecx)
+    .endif
+    je      .Lop_float_to_int_special_case # fix up result
+
+.Lop_float_to_int_finish:
+    xor     %eax, %eax
+    mov     %eax, VREG_REF_ADDRESS(%ecx)
+    .if 0
+    mov     %eax, VREG_REF_HIGH_ADDRESS(%ecx)
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.Lop_float_to_int_special_case:
+    fnstsw  %ax
+    sahf
+    jp      .Lop_float_to_int_isNaN
+    adcl    $-1, VREG_ADDRESS(%ecx)
+    .if 0
+    adcl    $-1, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+   jmp      .Lop_float_to_int_finish
+.Lop_float_to_int_isNaN:
+    movl    $0, VREG_ADDRESS(%ecx)
+    .if 0
+    movl    $0, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+    jmp     .Lop_float_to_int_finish
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_long: /* 0x88 */
+/* File: x86/op_float_to_long.S */
+/* File: x86/cvtfp_int.S */
+/* On fp to int conversions, Java requires that
+ * if the result > maxint, it should be clamped to maxint.  If it is less
+ * than minint, it should be clamped to minint.  If it is a nan, the result
+ * should be zero.  Further, the rounding mode is to truncate.  This model
+ * differs from what is delivered normally via the x86 fpu, so we have
+ * to play some games.
+ */
+    /* float/double to int/long vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    .if 0
+    fldl    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .else
+    flds    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .endif
+    ftst
+    fnstcw  LOCAL0(%esp)                    # remember original rounding mode
+    movzwl  LOCAL0(%esp), %eax
+    movb    $0xc, %ah
+    movw    %ax, LOCAL0+2(%esp)
+    fldcw   LOCAL0+2(%esp)                  # set "to zero" rounding mode
+    andb    $0xf, %cl                      # ecx <- A
+    .if 1
+    fistpll VREG_ADDRESS(%ecx)              # convert and store
+    .else
+    fistpl  VREG_ADDRESS(%ecx)              # convert and store
+    .endif
+    fldcw   LOCAL0(%esp)                    # restore previous rounding mode
+    .if 1
+    movl    $0x80000000, %eax
+    xorl    VREG_HIGH_ADDRESS(%ecx), %eax
+    orl     VREG_ADDRESS(%ecx), %eax
+    .else
+    cmpl    $0x80000000, VREG_ADDRESS(%ecx)
+    .endif
+    je      .Lop_float_to_long_special_case # fix up result
+
+.Lop_float_to_long_finish:
+    xor     %eax, %eax
+    mov     %eax, VREG_REF_ADDRESS(%ecx)
+    .if 1
+    mov     %eax, VREG_REF_HIGH_ADDRESS(%ecx)
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.Lop_float_to_long_special_case:
+    fnstsw  %ax
+    sahf
+    jp      .Lop_float_to_long_isNaN
+    adcl    $-1, VREG_ADDRESS(%ecx)
+    .if 1
+    adcl    $-1, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+   jmp      .Lop_float_to_long_finish
+.Lop_float_to_long_isNaN:
+    movl    $0, VREG_ADDRESS(%ecx)
+    .if 1
+    movl    $0, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+    jmp     .Lop_float_to_long_finish
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_double: /* 0x89 */
+/* File: x86/op_float_to_double.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    flds   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    
+    fstpl  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 1
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_int: /* 0x8a */
+/* File: x86/op_double_to_int.S */
+/* File: x86/cvtfp_int.S */
+/* On fp to int conversions, Java requires that
+ * if the result > maxint, it should be clamped to maxint.  If it is less
+ * than minint, it should be clamped to minint.  If it is a nan, the result
+ * should be zero.  Further, the rounding mode is to truncate.  This model
+ * differs from what is delivered normally via the x86 fpu, so we have
+ * to play some games.
+ */
+    /* float/double to int/long vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    .if 1
+    fldl    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .else
+    flds    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .endif
+    ftst
+    fnstcw  LOCAL0(%esp)                    # remember original rounding mode
+    movzwl  LOCAL0(%esp), %eax
+    movb    $0xc, %ah
+    movw    %ax, LOCAL0+2(%esp)
+    fldcw   LOCAL0+2(%esp)                  # set "to zero" rounding mode
+    andb    $0xf, %cl                      # ecx <- A
+    .if 0
+    fistpll VREG_ADDRESS(%ecx)              # convert and store
+    .else
+    fistpl  VREG_ADDRESS(%ecx)              # convert and store
+    .endif
+    fldcw   LOCAL0(%esp)                    # restore previous rounding mode
+    .if 0
+    movl    $0x80000000, %eax
+    xorl    VREG_HIGH_ADDRESS(%ecx), %eax
+    orl     VREG_ADDRESS(%ecx), %eax
+    .else
+    cmpl    $0x80000000, VREG_ADDRESS(%ecx)
+    .endif
+    je      .Lop_double_to_int_special_case # fix up result
+
+.Lop_double_to_int_finish:
+    xor     %eax, %eax
+    mov     %eax, VREG_REF_ADDRESS(%ecx)
+    .if 0
+    mov     %eax, VREG_REF_HIGH_ADDRESS(%ecx)
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.Lop_double_to_int_special_case:
+    fnstsw  %ax
+    sahf
+    jp      .Lop_double_to_int_isNaN
+    adcl    $-1, VREG_ADDRESS(%ecx)
+    .if 0
+    adcl    $-1, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+   jmp      .Lop_double_to_int_finish
+.Lop_double_to_int_isNaN:
+    movl    $0, VREG_ADDRESS(%ecx)
+    .if 0
+    movl    $0, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+    jmp     .Lop_double_to_int_finish
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_long: /* 0x8b */
+/* File: x86/op_double_to_long.S */
+/* File: x86/cvtfp_int.S */
+/* On fp to int conversions, Java requires that
+ * if the result > maxint, it should be clamped to maxint.  If it is less
+ * than minint, it should be clamped to minint.  If it is a nan, the result
+ * should be zero.  Further, the rounding mode is to truncate.  This model
+ * differs from what is delivered normally via the x86 fpu, so we have
+ * to play some games.
+ */
+    /* float/double to int/long vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    .if 1
+    fldl    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .else
+    flds    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .endif
+    ftst
+    fnstcw  LOCAL0(%esp)                    # remember original rounding mode
+    movzwl  LOCAL0(%esp), %eax
+    movb    $0xc, %ah
+    movw    %ax, LOCAL0+2(%esp)
+    fldcw   LOCAL0+2(%esp)                  # set "to zero" rounding mode
+    andb    $0xf, %cl                      # ecx <- A
+    .if 1
+    fistpll VREG_ADDRESS(%ecx)              # convert and store
+    .else
+    fistpl  VREG_ADDRESS(%ecx)              # convert and store
+    .endif
+    fldcw   LOCAL0(%esp)                    # restore previous rounding mode
+    .if 1
+    movl    $0x80000000, %eax
+    xorl    VREG_HIGH_ADDRESS(%ecx), %eax
+    orl     VREG_ADDRESS(%ecx), %eax
+    .else
+    cmpl    $0x80000000, VREG_ADDRESS(%ecx)
+    .endif
+    je      .Lop_double_to_long_special_case # fix up result
+
+.Lop_double_to_long_finish:
+    xor     %eax, %eax
+    mov     %eax, VREG_REF_ADDRESS(%ecx)
+    .if 1
+    mov     %eax, VREG_REF_HIGH_ADDRESS(%ecx)
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.Lop_double_to_long_special_case:
+    fnstsw  %ax
+    sahf
+    jp      .Lop_double_to_long_isNaN
+    adcl    $-1, VREG_ADDRESS(%ecx)
+    .if 1
+    adcl    $-1, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+   jmp      .Lop_double_to_long_finish
+.Lop_double_to_long_isNaN:
+    movl    $0, VREG_ADDRESS(%ecx)
+    .if 1
+    movl    $0, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+    jmp     .Lop_double_to_long_finish
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_float: /* 0x8c */
+/* File: x86/op_double_to_float.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fldl   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    
+    fstps  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 0
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_byte: /* 0x8d */
+/* File: x86/op_int_to_byte.S */
+/* File: x86/unop.S */
+/*
+ * Generic 32-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf,%cl                       # ecx <- A
+    movsbl  %al, %eax
+    SET_VREG %eax %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_char: /* 0x8e */
+/* File: x86/op_int_to_char.S */
+/* File: x86/unop.S */
+/*
+ * Generic 32-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf,%cl                       # ecx <- A
+    movzwl  %ax,%eax
+    SET_VREG %eax %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_short: /* 0x8f */
+/* File: x86/op_int_to_short.S */
+/* File: x86/unop.S */
+/*
+ * Generic 32-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf,%cl                       # ecx <- A
+    movswl %ax, %eax
+    SET_VREG %eax %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int: /* 0x90 */
+/* File: x86/op_add_int.S */
+/* File: x86/binop.S */
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    addl    (rFP,%ecx,4), %eax                                  # ex: addl    (rFP,%ecx,4),%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int: /* 0x91 */
+/* File: x86/op_sub_int.S */
+/* File: x86/binop.S */
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    subl    (rFP,%ecx,4), %eax                                  # ex: addl    (rFP,%ecx,4),%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int: /* 0x92 */
+/* File: x86/op_mul_int.S */
+    /*
+     * 32-bit binary multiplication.
+     */
+    /* mul vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    mov     rIBASE, LOCAL0(%esp)
+    imull   (rFP,%ecx,4), %eax              # trashes rIBASE/edx
+    mov     LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int: /* 0x93 */
+/* File: x86/op_div_int.S */
+/* File: x86/bindiv.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    GET_VREG %ecx %ecx                      # ecx <- vCC
+    mov     rIBASE, LOCAL0(%esp)
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    movl    %eax, %edx
+    orl     %ecx, %edx
+    test    $0xFFFFFF00, %edx              # If both arguments are less
+                                            #   than 8-bit and +ve
+    jz      .Lop_div_int_8                   # Do 8-bit divide
+    test    $0xFFFF0000, %edx              # If both arguments are less
+                                            #   than 16-bit and +ve
+    jz      .Lop_div_int_16                  # Do 16-bit divide
+    cmpl    $-1, %ecx
+    jne     .Lop_div_int_32
+    cmpl    $0x80000000, %eax
+    jne     .Lop_div_int_32
+    movl    $0x80000000, %eax
+    jmp     .Lop_div_int_finish
+.Lop_div_int_32:
+    cltd
+    idivl   %ecx
+    jmp     .Lop_div_int_finish
+.Lop_div_int_8:
+    div     %cl                             # 8-bit divide otherwise.
+                                            # Remainder in %ah, quotient in %al
+    .if 0
+    movl    %eax, %edx
+    shr     $8, %edx
+    .else
+    andl    $0x000000FF, %eax
+    .endif
+    jmp     .Lop_div_int_finish
+.Lop_div_int_16:
+    xorl    %edx, %edx                      # Clear %edx before divide
+    div     %cx
+.Lop_div_int_finish:
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int: /* 0x94 */
+/* File: x86/op_rem_int.S */
+/* File: x86/bindiv.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    GET_VREG %ecx %ecx                      # ecx <- vCC
+    mov     rIBASE, LOCAL0(%esp)
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    movl    %eax, %edx
+    orl     %ecx, %edx
+    test    $0xFFFFFF00, %edx              # If both arguments are less
+                                            #   than 8-bit and +ve
+    jz      .Lop_rem_int_8                   # Do 8-bit divide
+    test    $0xFFFF0000, %edx              # If both arguments are less
+                                            #   than 16-bit and +ve
+    jz      .Lop_rem_int_16                  # Do 16-bit divide
+    cmpl    $-1, %ecx
+    jne     .Lop_rem_int_32
+    cmpl    $0x80000000, %eax
+    jne     .Lop_rem_int_32
+    movl    $0, rIBASE
+    jmp     .Lop_rem_int_finish
+.Lop_rem_int_32:
+    cltd
+    idivl   %ecx
+    jmp     .Lop_rem_int_finish
+.Lop_rem_int_8:
+    div     %cl                             # 8-bit divide otherwise.
+                                            # Remainder in %ah, quotient in %al
+    .if 1
+    movl    %eax, %edx
+    shr     $8, %edx
+    .else
+    andl    $0x000000FF, %eax
+    .endif
+    jmp     .Lop_rem_int_finish
+.Lop_rem_int_16:
+    xorl    %edx, %edx                      # Clear %edx before divide
+    div     %cx
+.Lop_rem_int_finish:
+    SET_VREG rIBASE rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int: /* 0x95 */
+/* File: x86/op_and_int.S */
+/* File: x86/binop.S */
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    andl    (rFP,%ecx,4), %eax                                  # ex: addl    (rFP,%ecx,4),%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int: /* 0x96 */
+/* File: x86/op_or_int.S */
+/* File: x86/binop.S */
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    orl     (rFP,%ecx,4), %eax                                  # ex: addl    (rFP,%ecx,4),%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int: /* 0x97 */
+/* File: x86/op_xor_int.S */
+/* File: x86/binop.S */
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    xorl    (rFP,%ecx,4), %eax                                  # ex: addl    (rFP,%ecx,4),%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int: /* 0x98 */
+/* File: x86/op_shl_int.S */
+/* File: x86/binop1.S */
+/*
+ * Generic 32-bit binary operation in which both operands loaded to
+ * registers (op0 in eax, op1 in ecx).
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    sall    %cl, %eax                                  # ex: addl    %ecx,%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int: /* 0x99 */
+/* File: x86/op_shr_int.S */
+/* File: x86/binop1.S */
+/*
+ * Generic 32-bit binary operation in which both operands loaded to
+ * registers (op0 in eax, op1 in ecx).
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    sarl    %cl, %eax                                  # ex: addl    %ecx,%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int: /* 0x9a */
+/* File: x86/op_ushr_int.S */
+/* File: x86/binop1.S */
+/*
+ * Generic 32-bit binary operation in which both operands loaded to
+ * registers (op0 in eax, op1 in ecx).
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    shrl    %cl, %eax                                  # ex: addl    %ecx,%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long: /* 0x9b */
+/* File: x86/op_add_long.S */
+/* File: x86/binopWide.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    movl    rIBASE,LOCAL0(%esp)             # save rIBASE
+    GET_VREG rIBASE %eax                    # rIBASE <- v[BB+0]
+    GET_VREG_HIGH %eax %eax                 # eax <- v[BB+1]
+    addl    (rFP,%ecx,4), rIBASE                                 # ex: addl   (rFP,%ecx,4),rIBASE
+    adcl    4(rFP,%ecx,4), %eax                                 # ex: adcl   4(rFP,%ecx,4),%eax
+    SET_VREG rIBASE rINST                   # v[AA+0] <- rIBASE
+    movl    LOCAL0(%esp),rIBASE             # restore rIBASE
+    SET_VREG_HIGH %eax rINST                # v[AA+1] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long: /* 0x9c */
+/* File: x86/op_sub_long.S */
+/* File: x86/binopWide.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    movl    rIBASE,LOCAL0(%esp)             # save rIBASE
+    GET_VREG rIBASE %eax                    # rIBASE <- v[BB+0]
+    GET_VREG_HIGH %eax %eax                 # eax <- v[BB+1]
+    subl    (rFP,%ecx,4), rIBASE                                 # ex: addl   (rFP,%ecx,4),rIBASE
+    sbbl    4(rFP,%ecx,4), %eax                                 # ex: adcl   4(rFP,%ecx,4),%eax
+    SET_VREG rIBASE rINST                   # v[AA+0] <- rIBASE
+    movl    LOCAL0(%esp),rIBASE             # restore rIBASE
+    SET_VREG_HIGH %eax rINST                # v[AA+1] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long: /* 0x9d */
+/* File: x86/op_mul_long.S */
+/*
+ * Signed 64-bit integer multiply.
+ *
+ * We could definately use more free registers for
+ * this code.   We spill rINSTw (ebx),
+ * giving us eax, ebc, ecx and edx as computational
+ * temps.  On top of that, we'll spill edi (rFP)
+ * for use as the vB pointer and esi (rPC) for use
+ * as the vC pointer.  Yuck.
+ *
+ */
+    /* mul-long vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- B
+    movzbl  3(rPC), %ecx                    # ecx <- C
+    mov     rPC, LOCAL0(%esp)               # save Interpreter PC
+    mov     rFP, LOCAL1(%esp)               # save FP
+    mov     rIBASE, LOCAL2(%esp)            # save rIBASE
+    leal    (rFP,%eax,4), %esi              # esi <- &v[B]
+    leal    (rFP,%ecx,4), rFP               # rFP <- &v[C]
+    movl    4(%esi), %ecx                   # ecx <- Bmsw
+    imull   (rFP), %ecx                     # ecx <- (Bmsw*Clsw)
+    movl    4(rFP), %eax                    # eax <- Cmsw
+    imull   (%esi), %eax                    # eax <- (Cmsw*Blsw)
+    addl    %eax, %ecx                      # ecx <- (Bmsw*Clsw)+(Cmsw*Blsw)
+    movl    (rFP), %eax                     # eax <- Clsw
+    mull    (%esi)                          # eax <- (Clsw*Alsw)
+    mov     LOCAL0(%esp), rPC               # restore Interpreter PC
+    mov     LOCAL1(%esp), rFP               # restore FP
+    leal    (%ecx,rIBASE), rIBASE           # full result now in rIBASE:%eax
+    SET_VREG_HIGH rIBASE rINST              # v[B+1] <- rIBASE
+    mov     LOCAL2(%esp), rIBASE            # restore IBASE
+    SET_VREG %eax rINST                     # v[B] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long: /* 0x9e */
+/* File: x86/op_div_long.S */
+/* art_quick_* methods has quick abi,
+ *   so use eax, ecx, edx, ebx for args
+ */
+    /* div vAA, vBB, vCC */
+    .extern art_quick_ldiv
+    mov     rIBASE, LOCAL0(%esp)            # save rIBASE/%edx
+    mov     rINST, LOCAL1(%esp)             # save rINST/%ebx
+    movzbl  3(rPC), %eax                    # eax <- CC
+    GET_VREG %ecx %eax
+    GET_VREG_HIGH %ebx %eax
+    movl    %ecx, %edx
+    orl     %ebx, %ecx
+    jz      common_errDivideByZero
+    movzbl  2(rPC), %eax                    # eax <- BB
+    GET_VREG_HIGH %ecx %eax
+    GET_VREG %eax %eax
+    call    art_quick_ldiv
+    mov     LOCAL1(%esp), rINST             # restore rINST/%ebx
+    SET_VREG_HIGH rIBASE rINST
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE            # restore rIBASE/%edx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long: /* 0x9f */
+/* File: x86/op_rem_long.S */
+/* File: x86/op_div_long.S */
+/* art_quick_* methods has quick abi,
+ *   so use eax, ecx, edx, ebx for args
+ */
+    /* div vAA, vBB, vCC */
+    .extern art_quick_lmod
+    mov     rIBASE, LOCAL0(%esp)            # save rIBASE/%edx
+    mov     rINST, LOCAL1(%esp)             # save rINST/%ebx
+    movzbl  3(rPC), %eax                    # eax <- CC
+    GET_VREG %ecx %eax
+    GET_VREG_HIGH %ebx %eax
+    movl    %ecx, %edx
+    orl     %ebx, %ecx
+    jz      common_errDivideByZero
+    movzbl  2(rPC), %eax                    # eax <- BB
+    GET_VREG_HIGH %ecx %eax
+    GET_VREG %eax %eax
+    call    art_quick_lmod
+    mov     LOCAL1(%esp), rINST             # restore rINST/%ebx
+    SET_VREG_HIGH rIBASE rINST
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE            # restore rIBASE/%edx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long: /* 0xa0 */
+/* File: x86/op_and_long.S */
+/* File: x86/binopWide.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    movl    rIBASE,LOCAL0(%esp)             # save rIBASE
+    GET_VREG rIBASE %eax                    # rIBASE <- v[BB+0]
+    GET_VREG_HIGH %eax %eax                 # eax <- v[BB+1]
+    andl    (rFP,%ecx,4), rIBASE                                 # ex: addl   (rFP,%ecx,4),rIBASE
+    andl    4(rFP,%ecx,4), %eax                                 # ex: adcl   4(rFP,%ecx,4),%eax
+    SET_VREG rIBASE rINST                   # v[AA+0] <- rIBASE
+    movl    LOCAL0(%esp),rIBASE             # restore rIBASE
+    SET_VREG_HIGH %eax rINST                # v[AA+1] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long: /* 0xa1 */
+/* File: x86/op_or_long.S */
+/* File: x86/binopWide.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    movl    rIBASE,LOCAL0(%esp)             # save rIBASE
+    GET_VREG rIBASE %eax                    # rIBASE <- v[BB+0]
+    GET_VREG_HIGH %eax %eax                 # eax <- v[BB+1]
+    orl     (rFP,%ecx,4), rIBASE                                 # ex: addl   (rFP,%ecx,4),rIBASE
+    orl     4(rFP,%ecx,4), %eax                                 # ex: adcl   4(rFP,%ecx,4),%eax
+    SET_VREG rIBASE rINST                   # v[AA+0] <- rIBASE
+    movl    LOCAL0(%esp),rIBASE             # restore rIBASE
+    SET_VREG_HIGH %eax rINST                # v[AA+1] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long: /* 0xa2 */
+/* File: x86/op_xor_long.S */
+/* File: x86/binopWide.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    movl    rIBASE,LOCAL0(%esp)             # save rIBASE
+    GET_VREG rIBASE %eax                    # rIBASE <- v[BB+0]
+    GET_VREG_HIGH %eax %eax                 # eax <- v[BB+1]
+    xorl    (rFP,%ecx,4), rIBASE                                 # ex: addl   (rFP,%ecx,4),rIBASE
+    xorl    4(rFP,%ecx,4), %eax                                 # ex: adcl   4(rFP,%ecx,4),%eax
+    SET_VREG rIBASE rINST                   # v[AA+0] <- rIBASE
+    movl    LOCAL0(%esp),rIBASE             # restore rIBASE
+    SET_VREG_HIGH %eax rINST                # v[AA+1] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long: /* 0xa3 */
+/* File: x86/op_shl_long.S */
+/*
+ * Long integer shift.  This is different from the generic 32/64-bit
+ * binary operations because vAA/vBB are 64-bit but vCC (the shift
+ * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+ * 6 bits of the shift distance.  x86 shifts automatically mask off
+ * the low 5 bits of %cl, so have to handle the 64 > shiftcount > 31
+ * case specially.
+ */
+    /* shl-long vAA, vBB, vCC */
+    /* ecx gets shift count */
+    /* Need to spill rINST */
+    /* rINSTw gets AA */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE %eax               # ecx <- v[BB+1]
+    GET_VREG %ecx %ecx                      # ecx <- vCC
+    GET_VREG %eax %eax                      # eax <- v[BB+0]
+    shldl   %eax,rIBASE
+    sall    %cl, %eax
+    testb   $32, %cl
+    je      2f
+    movl    %eax, rIBASE
+    xorl    %eax, %eax
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[AA+0] <- %eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long: /* 0xa4 */
+/* File: x86/op_shr_long.S */
+/*
+ * Long integer shift.  This is different from the generic 32/64-bit
+ * binary operations because vAA/vBB are 64-bit but vCC (the shift
+ * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+ * 6 bits of the shift distance.  x86 shifts automatically mask off
+ * the low 5 bits of %cl, so have to handle the 64 > shiftcount > 31
+ * case specially.
+ */
+    /* shr-long vAA, vBB, vCC */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE %eax               # rIBASE<- v[BB+1]
+    GET_VREG %ecx %ecx                      # ecx <- vCC
+    GET_VREG %eax %eax                      # eax <- v[BB+0]
+    shrdl   rIBASE, %eax
+    sarl    %cl, rIBASE
+    testb   $32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    sarl    $31, rIBASE
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long: /* 0xa5 */
+/* File: x86/op_ushr_long.S */
+/*
+ * Long integer shift.  This is different from the generic 32/64-bit
+ * binary operations because vAA/vBB are 64-bit but vCC (the shift
+ * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+ * 6 bits of the shift distance.  x86 shifts automatically mask off
+ * the low 5 bits of %cl, so have to handle the 64 > shiftcount > 31
+ * case specially.
+ */
+    /* shr-long vAA, vBB, vCC */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE %eax               # rIBASE <- v[BB+1]
+    GET_VREG %ecx %ecx                      # ecx <- vCC
+    GET_VREG %eax %eax                      # eax <- v[BB+0]
+    shrdl   rIBASE, %eax
+    shrl    %cl, rIBASE
+    testb   $32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    xorl    rIBASE, rIBASE
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[BB+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float: /* 0xa6 */
+/* File: x86/op_add_float.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movss   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    addss VREG_ADDRESS(%eax), %xmm0
+    movss   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movss   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float: /* 0xa7 */
+/* File: x86/op_sub_float.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movss   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    subss VREG_ADDRESS(%eax), %xmm0
+    movss   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movss   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float: /* 0xa8 */
+/* File: x86/op_mul_float.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movss   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    mulss VREG_ADDRESS(%eax), %xmm0
+    movss   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movss   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float: /* 0xa9 */
+/* File: x86/op_div_float.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movss   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    divss VREG_ADDRESS(%eax), %xmm0
+    movss   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movss   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float: /* 0xaa */
+/* File: x86/op_rem_float.S */
+    /* rem_float vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx <- BB
+    movzbl  2(rPC), %eax                    # eax <- CC
+    flds    VREG_ADDRESS(%ecx)              # vBB to fp stack
+    flds    VREG_ADDRESS(%eax)              # vCC to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstps   VREG_ADDRESS(rINST)             # %st to vAA
+    CLEAR_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double: /* 0xab */
+/* File: x86/op_add_double.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movsd   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    addsd VREG_ADDRESS(%eax), %xmm0
+    movsd   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double: /* 0xac */
+/* File: x86/op_sub_double.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movsd   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    subsd VREG_ADDRESS(%eax), %xmm0
+    movsd   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double: /* 0xad */
+/* File: x86/op_mul_double.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movsd   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    mulsd VREG_ADDRESS(%eax), %xmm0
+    movsd   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double: /* 0xae */
+/* File: x86/op_div_double.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movsd   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    divsd VREG_ADDRESS(%eax), %xmm0
+    movsd   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double: /* 0xaf */
+/* File: x86/op_rem_double.S */
+    /* rem_double vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx <- BB
+    movzbl  2(rPC), %eax                    # eax <- CC
+    fldl    VREG_ADDRESS(%ecx)              # %st1 <- fp[vBB]
+    fldl    VREG_ADDRESS(%eax)              # %st0 <- fp[vCC]
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstpl   VREG_ADDRESS(rINST)             # fp[vAA] <- %st
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_2addr: /* 0xb0 */
+/* File: x86/op_add_int_2addr.S */
+/* File: x86/binop2addr.S */
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    addl    %eax, (rFP,%ecx,4)                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int_2addr: /* 0xb1 */
+/* File: x86/op_sub_int_2addr.S */
+/* File: x86/binop2addr.S */
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    subl    %eax, (rFP,%ecx,4)                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_2addr: /* 0xb2 */
+/* File: x86/op_mul_int_2addr.S */
+    /* mul vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    mov     rIBASE, LOCAL0(%esp)
+    imull   (rFP,%ecx,4), %eax              # trashes rIBASE/edx
+    mov     LOCAL0(%esp), rIBASE
+    SET_VREG %eax %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_2addr: /* 0xb3 */
+/* File: x86/op_div_int_2addr.S */
+/* File: x86/bindiv2addr.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # eax <- BA
+    mov     rIBASE, LOCAL0(%esp)
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- vBB
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $-1, %ecx
+    jne     .Lop_div_int_2addr_continue_div2addr
+    cmpl    $0x80000000, %eax
+    jne     .Lop_div_int_2addr_continue_div2addr
+    movl    $0x80000000, %eax
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.Lop_div_int_2addr_continue_div2addr:
+    cltd
+    idivl   %ecx
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_2addr: /* 0xb4 */
+/* File: x86/op_rem_int_2addr.S */
+/* File: x86/bindiv2addr.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # eax <- BA
+    mov     rIBASE, LOCAL0(%esp)
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- vBB
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $-1, %ecx
+    jne     .Lop_rem_int_2addr_continue_div2addr
+    cmpl    $0x80000000, %eax
+    jne     .Lop_rem_int_2addr_continue_div2addr
+    movl    $0, rIBASE
+    SET_VREG rIBASE rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.Lop_rem_int_2addr_continue_div2addr:
+    cltd
+    idivl   %ecx
+    SET_VREG rIBASE rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_2addr: /* 0xb5 */
+/* File: x86/op_and_int_2addr.S */
+/* File: x86/binop2addr.S */
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    andl    %eax, (rFP,%ecx,4)                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_2addr: /* 0xb6 */
+/* File: x86/op_or_int_2addr.S */
+/* File: x86/binop2addr.S */
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    orl     %eax, (rFP,%ecx,4)                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_2addr: /* 0xb7 */
+/* File: x86/op_xor_int_2addr.S */
+/* File: x86/binop2addr.S */
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    xorl    %eax, (rFP,%ecx,4)                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_2addr: /* 0xb8 */
+/* File: x86/op_shl_int_2addr.S */
+/* File: x86/shop2addr.S */
+/*
+ * Generic 32-bit "shift/2addr" operation.
+ */
+    /* shift/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # eax <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- vAA
+    sall    %cl, %eax                                  # ex: sarl %cl, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_2addr: /* 0xb9 */
+/* File: x86/op_shr_int_2addr.S */
+/* File: x86/shop2addr.S */
+/*
+ * Generic 32-bit "shift/2addr" operation.
+ */
+    /* shift/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # eax <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- vAA
+    sarl    %cl, %eax                                  # ex: sarl %cl, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_2addr: /* 0xba */
+/* File: x86/op_ushr_int_2addr.S */
+/* File: x86/shop2addr.S */
+/*
+ * Generic 32-bit "shift/2addr" operation.
+ */
+    /* shift/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # eax <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- vAA
+    shrl    %cl, %eax                                  # ex: sarl %cl, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long_2addr: /* 0xbb */
+/* File: x86/op_add_long_2addr.S */
+/* File: x86/binopWide2addr.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx<- BA
+    sarl    $4,%ecx                        # ecx<- B
+    GET_VREG %eax %ecx                      # eax<- v[B+0]
+    GET_VREG_HIGH %ecx %ecx                 # eax<- v[B+1]
+    andb    $0xF,rINSTbl                   # rINST<- A
+    addl    %eax, (rFP,rINST,4)                                 # ex: addl   %eax,(rFP,rINST,4)
+    adcl    %ecx, 4(rFP,rINST,4)                                 # ex: adcl   %ecx,4(rFP,rINST,4)
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long_2addr: /* 0xbc */
+/* File: x86/op_sub_long_2addr.S */
+/* File: x86/binopWide2addr.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx<- BA
+    sarl    $4,%ecx                        # ecx<- B
+    GET_VREG %eax %ecx                      # eax<- v[B+0]
+    GET_VREG_HIGH %ecx %ecx                 # eax<- v[B+1]
+    andb    $0xF,rINSTbl                   # rINST<- A
+    subl    %eax, (rFP,rINST,4)                                 # ex: addl   %eax,(rFP,rINST,4)
+    sbbl    %ecx, 4(rFP,rINST,4)                                 # ex: adcl   %ecx,4(rFP,rINST,4)
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long_2addr: /* 0xbd */
+/* File: x86/op_mul_long_2addr.S */
+/*
+ * Signed 64-bit integer multiply, 2-addr version
+ *
+ * We could definately use more free registers for
+ * this code.  We must spill %edx (rIBASE) because it
+ * is used by imul.  We'll also spill rINST (ebx),
+ * giving us eax, ebc, ecx and rIBASE as computational
+ * temps.  On top of that, we'll spill %esi (edi)
+ * for use as the vA pointer and rFP (esi) for use
+ * as the vB pointer.  Yuck.
+ */
+    /* mul-long/2addr vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- BA
+    andb    $0xf, %al                      # eax <- A
+    CLEAR_WIDE_REF %eax                     # clear refs in advance
+    sarl    $4, rINST                      # rINST <- B
+    mov     rPC, LOCAL0(%esp)               # save Interpreter PC
+    mov     rFP, LOCAL1(%esp)               # save FP
+    mov     rIBASE, LOCAL2(%esp)            # save rIBASE
+    leal    (rFP,%eax,4), %esi              # esi <- &v[A]
+    leal    (rFP,rINST,4), rFP              # rFP <- &v[B]
+    movl    4(%esi), %ecx                   # ecx <- Amsw
+    imull   (rFP), %ecx                     # ecx <- (Amsw*Blsw)
+    movl    4(rFP), %eax                    # eax <- Bmsw
+    imull   (%esi), %eax                    # eax <- (Bmsw*Alsw)
+    addl    %eax, %ecx                      # ecx <- (Amsw*Blsw)+(Bmsw*Alsw)
+    movl    (rFP), %eax                     # eax <- Blsw
+    mull    (%esi)                          # eax <- (Blsw*Alsw)
+    leal    (%ecx,rIBASE), rIBASE           # full result now in %edx:%eax
+    movl    rIBASE, 4(%esi)                 # v[A+1] <- rIBASE
+    movl    %eax, (%esi)                    # v[A] <- %eax
+    mov     LOCAL0(%esp), rPC               # restore Interpreter PC
+    mov     LOCAL2(%esp), rIBASE            # restore IBASE
+    mov     LOCAL1(%esp), rFP               # restore FP
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long_2addr: /* 0xbe */
+/* File: x86/op_div_long_2addr.S */
+/* art_quick_* methods has quick abi,
+ *   so use eax, ecx, edx, ebx for args
+ */
+    /* div/2addr vA, vB */
+    .extern   art_quick_ldiv
+    mov     rIBASE, LOCAL0(%esp)            # save rIBASE/%edx
+    movzbl  rINSTbl, %eax
+    shrl    $4, %eax                       # eax <- B
+    andb    $0xf, rINSTbl                  # rINST <- A
+    mov     rINST, LOCAL1(%esp)             # save rINST/%ebx
+    movl    %ebx, %ecx
+    GET_VREG %edx %eax
+    GET_VREG_HIGH %ebx %eax
+    movl    %edx, %eax
+    orl     %ebx, %eax
+    jz      common_errDivideByZero
+    GET_VREG %eax %ecx
+    GET_VREG_HIGH %ecx %ecx
+    call    art_quick_ldiv
+    mov     LOCAL1(%esp), rINST             # restore rINST/%ebx
+    SET_VREG_HIGH rIBASE rINST
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE            # restore rIBASE/%edx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long_2addr: /* 0xbf */
+/* File: x86/op_rem_long_2addr.S */
+/* File: x86/op_div_long_2addr.S */
+/* art_quick_* methods has quick abi,
+ *   so use eax, ecx, edx, ebx for args
+ */
+    /* div/2addr vA, vB */
+    .extern   art_quick_lmod
+    mov     rIBASE, LOCAL0(%esp)            # save rIBASE/%edx
+    movzbl  rINSTbl, %eax
+    shrl    $4, %eax                       # eax <- B
+    andb    $0xf, rINSTbl                  # rINST <- A
+    mov     rINST, LOCAL1(%esp)             # save rINST/%ebx
+    movl    %ebx, %ecx
+    GET_VREG %edx %eax
+    GET_VREG_HIGH %ebx %eax
+    movl    %edx, %eax
+    orl     %ebx, %eax
+    jz      common_errDivideByZero
+    GET_VREG %eax %ecx
+    GET_VREG_HIGH %ecx %ecx
+    call    art_quick_lmod
+    mov     LOCAL1(%esp), rINST             # restore rINST/%ebx
+    SET_VREG_HIGH rIBASE rINST
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE            # restore rIBASE/%edx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long_2addr: /* 0xc0 */
+/* File: x86/op_and_long_2addr.S */
+/* File: x86/binopWide2addr.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx<- BA
+    sarl    $4,%ecx                        # ecx<- B
+    GET_VREG %eax %ecx                      # eax<- v[B+0]
+    GET_VREG_HIGH %ecx %ecx                 # eax<- v[B+1]
+    andb    $0xF,rINSTbl                   # rINST<- A
+    andl    %eax, (rFP,rINST,4)                                 # ex: addl   %eax,(rFP,rINST,4)
+    andl    %ecx, 4(rFP,rINST,4)                                 # ex: adcl   %ecx,4(rFP,rINST,4)
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long_2addr: /* 0xc1 */
+/* File: x86/op_or_long_2addr.S */
+/* File: x86/binopWide2addr.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx<- BA
+    sarl    $4,%ecx                        # ecx<- B
+    GET_VREG %eax %ecx                      # eax<- v[B+0]
+    GET_VREG_HIGH %ecx %ecx                 # eax<- v[B+1]
+    andb    $0xF,rINSTbl                   # rINST<- A
+    orl     %eax, (rFP,rINST,4)                                 # ex: addl   %eax,(rFP,rINST,4)
+    orl     %ecx, 4(rFP,rINST,4)                                 # ex: adcl   %ecx,4(rFP,rINST,4)
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long_2addr: /* 0xc2 */
+/* File: x86/op_xor_long_2addr.S */
+/* File: x86/binopWide2addr.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx<- BA
+    sarl    $4,%ecx                        # ecx<- B
+    GET_VREG %eax %ecx                      # eax<- v[B+0]
+    GET_VREG_HIGH %ecx %ecx                 # eax<- v[B+1]
+    andb    $0xF,rINSTbl                   # rINST<- A
+    xorl    %eax, (rFP,rINST,4)                                 # ex: addl   %eax,(rFP,rINST,4)
+    xorl    %ecx, 4(rFP,rINST,4)                                 # ex: adcl   %ecx,4(rFP,rINST,4)
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long_2addr: /* 0xc3 */
+/* File: x86/op_shl_long_2addr.S */
+/*
+ * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+ * 32-bit shift distance.
+ */
+    /* shl-long/2addr vA, vB */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- v[AA+0]
+    sarl    $4, %ecx                       # ecx <- B
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE rINST              # rIBASE <- v[AA+1]
+    GET_VREG %ecx %ecx                      # ecx <- vBB
+    shldl   %eax, rIBASE
+    sall    %cl, %eax
+    testb   $32, %cl
+    je      2f
+    movl    %eax, rIBASE
+    xorl    %eax, %eax
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long_2addr: /* 0xc4 */
+/* File: x86/op_shr_long_2addr.S */
+/*
+ * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+ * 32-bit shift distance.
+ */
+    /* shl-long/2addr vA, vB */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- v[AA+0]
+    sarl    $4, %ecx                       # ecx <- B
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE rINST              # rIBASE <- v[AA+1]
+    GET_VREG %ecx %ecx                      # ecx <- vBB
+    shrdl   rIBASE, %eax
+    sarl    %cl, rIBASE
+    testb   $32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    sarl    $31, rIBASE
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long_2addr: /* 0xc5 */
+/* File: x86/op_ushr_long_2addr.S */
+/*
+ * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+ * 32-bit shift distance.
+ */
+    /* shl-long/2addr vA, vB */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- v[AA+0]
+    sarl    $4, %ecx                       # ecx <- B
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE rINST              # rIBASE <- v[AA+1]
+    GET_VREG %ecx %ecx                      # ecx <- vBB
+    shrdl   rIBASE, %eax
+    shrl    %cl, rIBASE
+    testb   $32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    xorl    rIBASE, rIBASE
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float_2addr: /* 0xc6 */
+/* File: x86/op_add_float_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movss VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    addss VREG_ADDRESS(rINST), %xmm0
+    movss %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movss %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float_2addr: /* 0xc7 */
+/* File: x86/op_sub_float_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movss VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    subss VREG_ADDRESS(rINST), %xmm0
+    movss %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movss %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float_2addr: /* 0xc8 */
+/* File: x86/op_mul_float_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movss VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    mulss VREG_ADDRESS(rINST), %xmm0
+    movss %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movss %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float_2addr: /* 0xc9 */
+/* File: x86/op_div_float_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movss VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    divss VREG_ADDRESS(rINST), %xmm0
+    movss %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movss %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float_2addr: /* 0xca */
+/* File: x86/op_rem_float_2addr.S */
+    /* rem_float/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    flds    VREG_ADDRESS(rINST)             # vB to fp stack
+    andb    $0xf, %cl                      # ecx <- A
+    flds    VREG_ADDRESS(%ecx)              # vA to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstps   VREG_ADDRESS(%ecx)              # %st to vA
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double_2addr: /* 0xcb */
+/* File: x86/op_add_double_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movsd VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    addsd VREG_ADDRESS(rINST), %xmm0
+    movsd %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double_2addr: /* 0xcc */
+/* File: x86/op_sub_double_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movsd VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    subsd VREG_ADDRESS(rINST), %xmm0
+    movsd %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double_2addr: /* 0xcd */
+/* File: x86/op_mul_double_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movsd VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    mulsd VREG_ADDRESS(rINST), %xmm0
+    movsd %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double_2addr: /* 0xce */
+/* File: x86/op_div_double_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movsd VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    divsd VREG_ADDRESS(rINST), %xmm0
+    movsd %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double_2addr: /* 0xcf */
+/* File: x86/op_rem_double_2addr.S */
+    /* rem_double/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fldl    VREG_ADDRESS(rINST)             # vB to fp stack
+    andb    $0xf, %cl                      # ecx <- A
+    fldl    VREG_ADDRESS(%ecx)              # vA to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstpl   VREG_ADDRESS(%ecx)              # %st to vA
+    CLEAR_WIDE_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit16: /* 0xd0 */
+/* File: x86/op_add_int_lit16.S */
+/* File: x86/binopLit16.S */
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    addl    %ecx, %eax                                  # for example: addl %ecx, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int: /* 0xd1 */
+/* File: x86/op_rsub_int.S */
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+/* File: x86/binopLit16.S */
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    subl    %eax, %ecx                                  # for example: addl %ecx, %eax
+    SET_VREG %ecx rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit16: /* 0xd2 */
+/* File: x86/op_mul_int_lit16.S */
+    /* mul/lit16 vA, vB, #+CCCC */
+    /* Need A in rINST, ssssCCCC in ecx, vB in eax */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    mov     rIBASE, LOCAL0(%esp)
+    imull   %ecx, %eax                      # trashes rIBASE/edx
+    mov     LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit16: /* 0xd3 */
+/* File: x86/op_div_int_lit16.S */
+/* File: x86/bindivLit16.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem/lit16 vA, vB, #+CCCC */
+    /* Need A in rINST, ssssCCCC in ecx, vB in eax */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $-1, %ecx
+    jne     .Lop_div_int_lit16_continue_div
+    cmpl    $0x80000000, %eax
+    jne     .Lop_div_int_lit16_continue_div
+    movl    $0x80000000, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.Lop_div_int_lit16_continue_div:
+    mov     rIBASE, LOCAL0(%esp)
+    cltd
+    idivl   %ecx
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit16: /* 0xd4 */
+/* File: x86/op_rem_int_lit16.S */
+/* File: x86/bindivLit16.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem/lit16 vA, vB, #+CCCC */
+    /* Need A in rINST, ssssCCCC in ecx, vB in eax */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $-1, %ecx
+    jne     .Lop_rem_int_lit16_continue_div
+    cmpl    $0x80000000, %eax
+    jne     .Lop_rem_int_lit16_continue_div
+    movl    $0, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.Lop_rem_int_lit16_continue_div:
+    mov     rIBASE, LOCAL0(%esp)
+    cltd
+    idivl   %ecx
+    SET_VREG rIBASE rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit16: /* 0xd5 */
+/* File: x86/op_and_int_lit16.S */
+/* File: x86/binopLit16.S */
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    andl    %ecx, %eax                                  # for example: addl %ecx, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit16: /* 0xd6 */
+/* File: x86/op_or_int_lit16.S */
+/* File: x86/binopLit16.S */
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    orl     %ecx, %eax                                  # for example: addl %ecx, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit16: /* 0xd7 */
+/* File: x86/op_xor_int_lit16.S */
+/* File: x86/binopLit16.S */
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    xorl    %ecx, %eax                                  # for example: addl %ecx, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit8: /* 0xd8 */
+/* File: x86/op_add_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax %eax                      # eax <- rBB
+    addl    %ecx, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int_lit8: /* 0xd9 */
+/* File: x86/op_rsub_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax %eax                      # eax <- rBB
+    subl    %eax, %ecx                                  # ex: addl %ecx,%eax
+    SET_VREG %ecx rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit8: /* 0xda */
+/* File: x86/op_mul_int_lit8.S */
+    /* mul/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG  %eax  %eax                    # eax <- rBB
+    mov     rIBASE, LOCAL0(%esp)
+    imull   %ecx, %eax                      # trashes rIBASE/edx
+    mov     LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit8: /* 0xdb */
+/* File: x86/op_div_int_lit8.S */
+/* File: x86/bindivLit8.S */
+/*
+ * 32-bit div/rem "lit8" binary operation.  Handles special case of
+ * op0=minint & op1=-1
+ */
+    /* div/rem/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG  %eax %eax                     # eax <- rBB
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $0x80000000, %eax
+    jne     .Lop_div_int_lit8_continue_div
+    cmpl    $-1, %ecx
+    jne     .Lop_div_int_lit8_continue_div
+    movl    $0x80000000, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.Lop_div_int_lit8_continue_div:
+    mov     rIBASE, LOCAL0(%esp)
+    cltd
+    idivl   %ecx
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit8: /* 0xdc */
+/* File: x86/op_rem_int_lit8.S */
+/* File: x86/bindivLit8.S */
+/*
+ * 32-bit div/rem "lit8" binary operation.  Handles special case of
+ * op0=minint & op1=-1
+ */
+    /* div/rem/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG  %eax %eax                     # eax <- rBB
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $0x80000000, %eax
+    jne     .Lop_rem_int_lit8_continue_div
+    cmpl    $-1, %ecx
+    jne     .Lop_rem_int_lit8_continue_div
+    movl    $0, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.Lop_rem_int_lit8_continue_div:
+    mov     rIBASE, LOCAL0(%esp)
+    cltd
+    idivl   %ecx
+    SET_VREG rIBASE rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit8: /* 0xdd */
+/* File: x86/op_and_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax %eax                      # eax <- rBB
+    andl    %ecx, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit8: /* 0xde */
+/* File: x86/op_or_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax %eax                      # eax <- rBB
+    orl     %ecx, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit8: /* 0xdf */
+/* File: x86/op_xor_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax %eax                      # eax <- rBB
+    xorl    %ecx, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_lit8: /* 0xe0 */
+/* File: x86/op_shl_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax %eax                      # eax <- rBB
+    sall    %cl, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_lit8: /* 0xe1 */
+/* File: x86/op_shr_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax %eax                      # eax <- rBB
+    sarl    %cl, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_lit8: /* 0xe2 */
+/* File: x86/op_ushr_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax %eax                      # eax <- rBB
+    shrl    %cl, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_quick: /* 0xe3 */
+/* File: x86/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    movl (%ecx,%eax,1), %eax
+    andb    $0xf,rINSTbl                   # rINST <- A
+    SET_VREG %eax rINST                     # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide_quick: /* 0xe4 */
+/* File: x86/op_iget_wide_quick.S */
+    /* iget-wide-quick vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    movq    (%ecx,%eax,1), %xmm0
+    andb    $0xf, rINSTbl                  # rINST <- A
+    SET_WIDE_FP_VREG %xmm0 rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object_quick: /* 0xe5 */
+/* File: x86/op_iget_object_quick.S */
+    /* For: iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    %eax, OUT_ARG1(%esp)
+    EXPORT_PC
+    call    artIGetObjectFromMterp          # (obj, offset)
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf,rINSTbl                   # rINST <- A
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_quick: /* 0xe6 */
+/* File: x86/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST rINST                    # rINST <- v[A]
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    movl    rINST, (%ecx,%eax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide_quick: /* 0xe7 */
+/* File: x86/op_iput_wide_quick.S */
+    /* iput-wide-quick vA, vB, offset@CCCC */
+    movzbl    rINSTbl, %ecx                 # ecx<- BA
+    sarl      $4, %ecx                     # ecx<- B
+    GET_VREG  %ecx %ecx                     # vB (object we're operating on)
+    testl     %ecx, %ecx                    # is object null?
+    je        common_errNullObject
+    movzwl    2(rPC), %eax                  # eax<- field byte offset
+    leal      (%ecx,%eax,1), %ecx           # ecx<- Address of 64-bit target
+    andb      $0xf, rINSTbl                # rINST<- A
+    GET_WIDE_FP_VREG %xmm0 rINST            # xmm0<- fp[A]/fp[A+1]
+    movq      %xmm0, (%ecx)                 # obj.field<- r0/r1
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object_quick: /* 0xe8 */
+/* File: x86/op_iput_object_quick.S */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST 232
+    movl    rINST, OUT_ARG2(%esp)
+    call    MterpIputObjectQuick
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_quick: /* 0xe9 */
+/* File: x86/op_invoke_virtual_quick.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuick
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 233
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeVirtualQuick
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range_quick: /* 0xea */
+/* File: x86/op_invoke_virtual_range_quick.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuickRange
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 234
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeVirtualQuickRange
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean_quick: /* 0xeb */
+/* File: x86/op_iput_boolean_quick.S */
+/* File: x86/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST rINST                    # rINST <- v[A]
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    movb    rINSTbl, (%ecx,%eax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte_quick: /* 0xec */
+/* File: x86/op_iput_byte_quick.S */
+/* File: x86/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST rINST                    # rINST <- v[A]
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    movb    rINSTbl, (%ecx,%eax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char_quick: /* 0xed */
+/* File: x86/op_iput_char_quick.S */
+/* File: x86/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST rINST                    # rINST <- v[A]
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    movw    rINSTw, (%ecx,%eax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short_quick: /* 0xee */
+/* File: x86/op_iput_short_quick.S */
+/* File: x86/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST rINST                    # rINST <- v[A]
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    movw    rINSTw, (%ecx,%eax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean_quick: /* 0xef */
+/* File: x86/op_iget_boolean_quick.S */
+/* File: x86/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    movsbl (%ecx,%eax,1), %eax
+    andb    $0xf,rINSTbl                   # rINST <- A
+    SET_VREG %eax rINST                     # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte_quick: /* 0xf0 */
+/* File: x86/op_iget_byte_quick.S */
+/* File: x86/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    movsbl (%ecx,%eax,1), %eax
+    andb    $0xf,rINSTbl                   # rINST <- A
+    SET_VREG %eax rINST                     # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char_quick: /* 0xf1 */
+/* File: x86/op_iget_char_quick.S */
+/* File: x86/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    movzwl (%ecx,%eax,1), %eax
+    andb    $0xf,rINSTbl                   # rINST <- A
+    SET_VREG %eax rINST                     # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short_quick: /* 0xf2 */
+/* File: x86/op_iget_short_quick.S */
+/* File: x86/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    movswl (%ecx,%eax,1), %eax
+    andb    $0xf,rINSTbl                   # rINST <- A
+    SET_VREG %eax rINST                     # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_lambda: /* 0xf3 */
+/* Transfer stub to alternate interpreter */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f4: /* 0xf4 */
+/* File: x86/op_unused_f4.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_capture_variable: /* 0xf5 */
+/* Transfer stub to alternate interpreter */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_create_lambda: /* 0xf6 */
+/* Transfer stub to alternate interpreter */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_liberate_variable: /* 0xf7 */
+/* Transfer stub to alternate interpreter */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_box_lambda: /* 0xf8 */
+/* Transfer stub to alternate interpreter */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unbox_lambda: /* 0xf9 */
+/* Transfer stub to alternate interpreter */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fa: /* 0xfa */
+/* File: x86/op_unused_fa.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fb: /* 0xfb */
+/* File: x86/op_unused_fb.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fc: /* 0xfc */
+/* File: x86/op_unused_fc.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fd: /* 0xfd */
+/* File: x86/op_unused_fd.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fe: /* 0xfe */
+/* File: x86/op_unused_fe.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_ff: /* 0xff */
+/* File: x86/op_unused_ff.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+    .balign 128
+    .size   artMterpAsmInstructionStart, .-artMterpAsmInstructionStart
+    .global artMterpAsmInstructionEnd
+artMterpAsmInstructionEnd:
+
+/*
+ * ===========================================================================
+ *  Sister implementations
+ * ===========================================================================
+ */
+    .global artMterpAsmSisterStart
+    .type   artMterpAsmSisterStart, %function
+    .text
+    .balign 4
+artMterpAsmSisterStart:
+
+    .size   artMterpAsmSisterStart, .-artMterpAsmSisterStart
+    .global artMterpAsmSisterEnd
+artMterpAsmSisterEnd:
+
+
+    .global artMterpAsmAltInstructionStart
+    .type   artMterpAsmAltInstructionStart, %function
+    .text
+
+artMterpAsmAltInstructionStart = .L_ALT_op_nop
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_nop: /* 0x00 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(0*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move: /* 0x01 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(1*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_from16: /* 0x02 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(2*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_16: /* 0x03 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(3*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide: /* 0x04 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(4*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_from16: /* 0x05 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(5*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_16: /* 0x06 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(6*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object: /* 0x07 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(7*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_from16: /* 0x08 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(8*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_16: /* 0x09 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(9*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result: /* 0x0a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(10*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_wide: /* 0x0b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(11*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_object: /* 0x0c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(12*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_exception: /* 0x0d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(13*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void: /* 0x0e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(14*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return: /* 0x0f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(15*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_wide: /* 0x10 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(16*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_object: /* 0x11 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(17*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_4: /* 0x12 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(18*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_16: /* 0x13 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(19*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const: /* 0x14 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(20*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_high16: /* 0x15 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(21*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_16: /* 0x16 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(22*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_32: /* 0x17 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(23*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide: /* 0x18 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(24*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_high16: /* 0x19 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(25*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string: /* 0x1a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(26*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string_jumbo: /* 0x1b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(27*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_class: /* 0x1c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(28*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_enter: /* 0x1d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(29*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_exit: /* 0x1e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(30*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_check_cast: /* 0x1f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(31*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_instance_of: /* 0x20 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(32*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_array_length: /* 0x21 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(33*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_instance: /* 0x22 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(34*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_array: /* 0x23 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(35*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array: /* 0x24 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(36*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array_range: /* 0x25 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(37*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_fill_array_data: /* 0x26 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(38*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_throw: /* 0x27 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(39*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto: /* 0x28 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(40*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_16: /* 0x29 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(41*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_32: /* 0x2a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(42*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_packed_switch: /* 0x2b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(43*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sparse_switch: /* 0x2c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(44*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_float: /* 0x2d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(45*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_float: /* 0x2e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(46*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_double: /* 0x2f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(47*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_double: /* 0x30 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(48*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmp_long: /* 0x31 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(49*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eq: /* 0x32 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(50*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ne: /* 0x33 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(51*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lt: /* 0x34 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(52*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ge: /* 0x35 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(53*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gt: /* 0x36 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(54*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_le: /* 0x37 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(55*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eqz: /* 0x38 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(56*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_nez: /* 0x39 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(57*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ltz: /* 0x3a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(58*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gez: /* 0x3b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(59*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gtz: /* 0x3c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(60*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lez: /* 0x3d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(61*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3e: /* 0x3e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(62*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3f: /* 0x3f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(63*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_40: /* 0x40 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(64*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_41: /* 0x41 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(65*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_42: /* 0x42 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(66*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_43: /* 0x43 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(67*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget: /* 0x44 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(68*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_wide: /* 0x45 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(69*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_object: /* 0x46 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(70*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_boolean: /* 0x47 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(71*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_byte: /* 0x48 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(72*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_char: /* 0x49 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(73*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_short: /* 0x4a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(74*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput: /* 0x4b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(75*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_wide: /* 0x4c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(76*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_object: /* 0x4d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(77*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_boolean: /* 0x4e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(78*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_byte: /* 0x4f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(79*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_char: /* 0x50 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(80*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_short: /* 0x51 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(81*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget: /* 0x52 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(82*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide: /* 0x53 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(83*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object: /* 0x54 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(84*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean: /* 0x55 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(85*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte: /* 0x56 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(86*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char: /* 0x57 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(87*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short: /* 0x58 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(88*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput: /* 0x59 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(89*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide: /* 0x5a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(90*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object: /* 0x5b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(91*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean: /* 0x5c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(92*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte: /* 0x5d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(93*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char: /* 0x5e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(94*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short: /* 0x5f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(95*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget: /* 0x60 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(96*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_wide: /* 0x61 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(97*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_object: /* 0x62 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(98*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_boolean: /* 0x63 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(99*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_byte: /* 0x64 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(100*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_char: /* 0x65 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(101*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_short: /* 0x66 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(102*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput: /* 0x67 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(103*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_wide: /* 0x68 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(104*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_object: /* 0x69 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(105*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_boolean: /* 0x6a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(106*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_byte: /* 0x6b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(107*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_char: /* 0x6c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(108*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_short: /* 0x6d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(109*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual: /* 0x6e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(110*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super: /* 0x6f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(111*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct: /* 0x70 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(112*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static: /* 0x71 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(113*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface: /* 0x72 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(114*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void_no_barrier: /* 0x73 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(115*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range: /* 0x74 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(116*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super_range: /* 0x75 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(117*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct_range: /* 0x76 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(118*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static_range: /* 0x77 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(119*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface_range: /* 0x78 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(120*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_79: /* 0x79 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(121*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_7a: /* 0x7a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(122*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_int: /* 0x7b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(123*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_int: /* 0x7c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(124*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_long: /* 0x7d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(125*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_long: /* 0x7e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(126*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_float: /* 0x7f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(127*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_double: /* 0x80 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(128*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_long: /* 0x81 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(129*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_float: /* 0x82 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(130*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_double: /* 0x83 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(131*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_int: /* 0x84 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(132*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_float: /* 0x85 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(133*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_double: /* 0x86 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(134*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_int: /* 0x87 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(135*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_long: /* 0x88 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(136*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_double: /* 0x89 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(137*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_int: /* 0x8a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(138*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_long: /* 0x8b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(139*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_float: /* 0x8c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(140*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_byte: /* 0x8d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(141*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_char: /* 0x8e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(142*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_short: /* 0x8f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(143*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int: /* 0x90 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(144*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int: /* 0x91 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(145*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int: /* 0x92 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(146*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int: /* 0x93 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(147*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int: /* 0x94 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(148*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int: /* 0x95 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(149*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int: /* 0x96 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(150*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int: /* 0x97 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(151*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int: /* 0x98 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(152*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int: /* 0x99 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(153*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int: /* 0x9a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(154*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long: /* 0x9b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(155*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long: /* 0x9c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(156*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long: /* 0x9d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(157*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long: /* 0x9e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(158*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long: /* 0x9f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(159*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long: /* 0xa0 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(160*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long: /* 0xa1 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(161*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long: /* 0xa2 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(162*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long: /* 0xa3 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(163*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long: /* 0xa4 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(164*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long: /* 0xa5 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(165*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float: /* 0xa6 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(166*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float: /* 0xa7 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(167*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float: /* 0xa8 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(168*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float: /* 0xa9 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(169*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float: /* 0xaa */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(170*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double: /* 0xab */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(171*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double: /* 0xac */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(172*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double: /* 0xad */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(173*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double: /* 0xae */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(174*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double: /* 0xaf */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(175*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_2addr: /* 0xb0 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(176*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int_2addr: /* 0xb1 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(177*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_2addr: /* 0xb2 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(178*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_2addr: /* 0xb3 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(179*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_2addr: /* 0xb4 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(180*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_2addr: /* 0xb5 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(181*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_2addr: /* 0xb6 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(182*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_2addr: /* 0xb7 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(183*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_2addr: /* 0xb8 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(184*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_2addr: /* 0xb9 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(185*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_2addr: /* 0xba */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(186*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long_2addr: /* 0xbb */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(187*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long_2addr: /* 0xbc */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(188*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long_2addr: /* 0xbd */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(189*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long_2addr: /* 0xbe */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(190*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long_2addr: /* 0xbf */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(191*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long_2addr: /* 0xc0 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(192*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long_2addr: /* 0xc1 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(193*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long_2addr: /* 0xc2 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(194*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long_2addr: /* 0xc3 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(195*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long_2addr: /* 0xc4 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(196*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long_2addr: /* 0xc5 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(197*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float_2addr: /* 0xc6 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(198*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float_2addr: /* 0xc7 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(199*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float_2addr: /* 0xc8 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(200*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float_2addr: /* 0xc9 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(201*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float_2addr: /* 0xca */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(202*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double_2addr: /* 0xcb */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(203*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double_2addr: /* 0xcc */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(204*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double_2addr: /* 0xcd */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(205*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double_2addr: /* 0xce */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(206*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double_2addr: /* 0xcf */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(207*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit16: /* 0xd0 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(208*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int: /* 0xd1 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(209*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit16: /* 0xd2 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(210*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit16: /* 0xd3 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(211*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit16: /* 0xd4 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(212*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit16: /* 0xd5 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(213*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit16: /* 0xd6 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(214*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit16: /* 0xd7 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(215*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit8: /* 0xd8 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(216*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int_lit8: /* 0xd9 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(217*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit8: /* 0xda */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(218*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit8: /* 0xdb */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(219*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit8: /* 0xdc */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(220*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit8: /* 0xdd */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(221*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit8: /* 0xde */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(222*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit8: /* 0xdf */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(223*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_lit8: /* 0xe0 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(224*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_lit8: /* 0xe1 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(225*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_lit8: /* 0xe2 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(226*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_quick: /* 0xe3 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(227*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide_quick: /* 0xe4 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(228*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object_quick: /* 0xe5 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(229*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_quick: /* 0xe6 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(230*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide_quick: /* 0xe7 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(231*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object_quick: /* 0xe8 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(232*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_quick: /* 0xe9 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(233*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range_quick: /* 0xea */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(234*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean_quick: /* 0xeb */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(235*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte_quick: /* 0xec */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(236*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char_quick: /* 0xed */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(237*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short_quick: /* 0xee */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(238*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean_quick: /* 0xef */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(239*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte_quick: /* 0xf0 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(240*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char_quick: /* 0xf1 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(241*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short_quick: /* 0xf2 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(242*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_lambda: /* 0xf3 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(243*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f4: /* 0xf4 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(244*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_capture_variable: /* 0xf5 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(245*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_create_lambda: /* 0xf6 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(246*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_liberate_variable: /* 0xf7 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(247*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_box_lambda: /* 0xf8 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(248*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unbox_lambda: /* 0xf9 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(249*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fa: /* 0xfa */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(250*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fb: /* 0xfb */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(251*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fc: /* 0xfc */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(252*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fd: /* 0xfd */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(253*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fe: /* 0xfe */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(254*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_ff: /* 0xff */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(255*128)
+
+    .balign 128
+    .size   artMterpAsmAltInstructionStart, .-artMterpAsmAltInstructionStart
+    .global artMterpAsmAltInstructionEnd
+artMterpAsmAltInstructionEnd:
+/* File: x86/footer.S */
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align  2
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+#define MTERP_LOGGING 0
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogDivideByZeroException
+#endif
+    jmp     MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogArrayIndexException
+#endif
+    jmp     MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogNegativeArraySizeException
+#endif
+    jmp     MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogNoSuchMethodException
+#endif
+    jmp     MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogNullObjectException
+#endif
+    jmp     MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    call    MterpLogExceptionThrownException
+#endif
+    jmp     MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    THREAD_FLAGS_OFFSET(%eax), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    call    MterpLogSuspendFallback
+#endif
+    jmp     MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    movl    rSELF, %eax
+    testl   $-1, THREAD_EXCEPTION_OFFSET(%eax)
+    jz      MterpFallback
+    /* intentional fallthrough - handle pending exception. */
+
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpHandleException
+    testl   %eax, %eax
+    jz      MterpExceptionReturn
+    REFRESH_IBASE
+    movl    OFF_FP_CODE_ITEM(rFP), %eax
+    movl    OFF_FP_DEX_PC(rFP), %ecx
+    lea     CODEITEM_INSNS_OFFSET(%eax), rPC
+    lea     (rPC, %ecx, 2), rPC
+    movl    rPC, OFF_FP_DEX_PC_PTR(rFP)
+    /* resume execution at catch block */
+    FETCH_INST
+    GOTO_NEXT
+    /* NOTE: no fallthrough */
+
+/*
+ * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
+ */
+MterpCheckSuspendAndContinue:
+    movl    rSELF, %eax
+    EXPORT_PC
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    MterpSuspendCheck
+    REFRESH_IBASE
+1:
+    GOTO_NEXT
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogFallback
+#endif
+MterpCommonFallback:
+    xor     %eax, %eax
+    jmp     MterpDone
+
+/*
+ * On entry:
+ *  uint32_t* rFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    movl    $1, %eax
+    jmp     MterpDone
+MterpReturn:
+    movl    OFF_FP_RESULT_REGISTER(rFP), %edx
+    movl    %eax, (%edx)
+    movl    %ecx, 4(%edx)
+    movl    rSELF, %eax
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    MterpSuspendCheck
+1:
+    mov     $1, %eax
+MterpDone:
+    /* Restore callee save register */
+    movl    EBP_SPILL(%esp), %ebp
+    movl    EDI_SPILL(%esp), %edi
+    movl    ESI_SPILL(%esp), %esi
+    movl    EBX_SPILL(%esp), %ebx
+
+    /* pop up frame */
+    addl    $FRAME_SIZE, %esp
+    .cfi_adjust_cfa_offset -FRAME_SIZE
+    ret
+
+    .cfi_endproc
+    .size   ExecuteMterpImpl, .-ExecuteMterpImpl
+
diff --git a/runtime/interpreter/mterp/rebuild.sh b/runtime/interpreter/mterp/rebuild.sh
index a325fff..8b26976 100755
--- a/runtime/interpreter/mterp/rebuild.sh
+++ b/runtime/interpreter/mterp/rebuild.sh
@@ -21,4 +21,4 @@
 set -e
 
 # for arch in arm x86 mips arm64 x86_64 mips64; do TARGET_ARCH_EXT=$arch make -f Makefile_mterp; done
-for arch in arm; do TARGET_ARCH_EXT=$arch make -f Makefile_mterp; done
+for arch in arm x86; do TARGET_ARCH_EXT=$arch make -f Makefile_mterp; done
diff --git a/runtime/interpreter/mterp/x86/alt_stub.S b/runtime/interpreter/mterp/x86/alt_stub.S
new file mode 100644
index 0000000..6462fc5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/alt_stub.S
@@ -0,0 +1,20 @@
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(${opnum}*${handler_size_bytes})
diff --git a/runtime/interpreter/mterp/x86/bincmp.S b/runtime/interpreter/mterp/x86/bincmp.S
new file mode 100644
index 0000000..a9a8c3a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/bincmp.S
@@ -0,0 +1,28 @@
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andb    $$0xf, %cl                      # ecx <- A
+    GET_VREG %eax %ecx                      # eax <- vA
+    sarl    $$4, rINST                      # rINST <- B
+    cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    movl    $$2, %eax                       # assume not taken
+    j${revcmp}   1f
+    movswl  2(rPC),%eax                     # Get signed branch offset
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86/bindiv.S b/runtime/interpreter/mterp/x86/bindiv.S
new file mode 100644
index 0000000..742f758
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/bindiv.S
@@ -0,0 +1,48 @@
+%default {"result":"","special":"","rem":""}
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    GET_VREG %ecx %ecx                      # ecx <- vCC
+    mov     rIBASE, LOCAL0(%esp)
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    movl    %eax, %edx
+    orl     %ecx, %edx
+    test    $$0xFFFFFF00, %edx              # If both arguments are less
+                                            #   than 8-bit and +ve
+    jz      .L${opcode}_8                   # Do 8-bit divide
+    test    $$0xFFFF0000, %edx              # If both arguments are less
+                                            #   than 16-bit and +ve
+    jz      .L${opcode}_16                  # Do 16-bit divide
+    cmpl    $$-1, %ecx
+    jne     .L${opcode}_32
+    cmpl    $$0x80000000, %eax
+    jne     .L${opcode}_32
+    movl    $special, $result
+    jmp     .L${opcode}_finish
+.L${opcode}_32:
+    cltd
+    idivl   %ecx
+    jmp     .L${opcode}_finish
+.L${opcode}_8:
+    div     %cl                             # 8-bit divide otherwise.
+                                            # Remainder in %ah, quotient in %al
+    .if $rem
+    movl    %eax, %edx
+    shr     $$8, %edx
+    .else
+    andl    $$0x000000FF, %eax
+    .endif
+    jmp     .L${opcode}_finish
+.L${opcode}_16:
+    xorl    %edx, %edx                      # Clear %edx before divide
+    div     %cx
+.L${opcode}_finish:
+    SET_VREG $result rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/bindiv2addr.S b/runtime/interpreter/mterp/x86/bindiv2addr.S
new file mode 100644
index 0000000..ee7c523
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/bindiv2addr.S
@@ -0,0 +1,29 @@
+%default {"result":"","special":""}
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # eax <- BA
+    mov     rIBASE, LOCAL0(%esp)
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- vBB
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $$-1, %ecx
+    jne     .L${opcode}_continue_div2addr
+    cmpl    $$0x80000000, %eax
+    jne     .L${opcode}_continue_div2addr
+    movl    $special, $result
+    SET_VREG $result rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.L${opcode}_continue_div2addr:
+    cltd
+    idivl   %ecx
+    SET_VREG $result rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/bindivLit16.S b/runtime/interpreter/mterp/x86/bindivLit16.S
new file mode 100644
index 0000000..a2c4334
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/bindivLit16.S
@@ -0,0 +1,29 @@
+%default {"result":"","special":""}
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem/lit16 vA, vB, #+CCCC */
+    /* Need A in rINST, ssssCCCC in ecx, vB in eax */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $$4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $$-1, %ecx
+    jne     .L${opcode}_continue_div
+    cmpl    $$0x80000000, %eax
+    jne     .L${opcode}_continue_div
+    movl    $special, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.L${opcode}_continue_div:
+    mov     rIBASE, LOCAL0(%esp)
+    cltd
+    idivl   %ecx
+    SET_VREG $result rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/bindivLit8.S b/runtime/interpreter/mterp/x86/bindivLit8.S
new file mode 100644
index 0000000..61bee06
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/bindivLit8.S
@@ -0,0 +1,26 @@
+%default {"result":"","special":""}
+/*
+ * 32-bit div/rem "lit8" binary operation.  Handles special case of
+ * op0=minint & op1=-1
+ */
+    /* div/rem/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG  %eax %eax                     # eax <- rBB
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $$0x80000000, %eax
+    jne     .L${opcode}_continue_div
+    cmpl    $$-1, %ecx
+    jne     .L${opcode}_continue_div
+    movl    $special, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.L${opcode}_continue_div:
+    mov     rIBASE, LOCAL0(%esp)
+    cltd
+    idivl   %ecx
+    SET_VREG $result rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/binop.S b/runtime/interpreter/mterp/x86/binop.S
new file mode 100644
index 0000000..5383f25
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/binop.S
@@ -0,0 +1,17 @@
+%default {"result":"%eax"}
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    $instr                                  # ex: addl    (rFP,%ecx,4),%eax
+    SET_VREG $result rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/binop1.S b/runtime/interpreter/mterp/x86/binop1.S
new file mode 100644
index 0000000..cd51d0c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/binop1.S
@@ -0,0 +1,13 @@
+%default {"result":"%eax","tmp":"%ecx"}
+/*
+ * Generic 32-bit binary operation in which both operands loaded to
+ * registers (op0 in eax, op1 in ecx).
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    $instr                                  # ex: addl    %ecx,%eax
+    SET_VREG $result rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/binop2addr.S b/runtime/interpreter/mterp/x86/binop2addr.S
new file mode 100644
index 0000000..abee4db
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/binop2addr.S
@@ -0,0 +1,19 @@
+%default {"result":"%eax"}
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $$0xf, %cl                      # ecx <- A
+    $instr                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/binopLit16.S b/runtime/interpreter/mterp/x86/binopLit16.S
new file mode 100644
index 0000000..6c7fe61
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/binopLit16.S
@@ -0,0 +1,19 @@
+%default {"result":"%eax"}
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $$4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    $instr                                  # for example: addl %ecx, %eax
+    SET_VREG $result rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/binopLit8.S b/runtime/interpreter/mterp/x86/binopLit8.S
new file mode 100644
index 0000000..924685d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/binopLit8.S
@@ -0,0 +1,18 @@
+%default {"result":"%eax"}
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax %eax                      # eax <- rBB
+    $instr                                  # ex: addl %ecx,%eax
+    SET_VREG $result rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/binopWide.S b/runtime/interpreter/mterp/x86/binopWide.S
new file mode 100644
index 0000000..9f7106e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/binopWide.S
@@ -0,0 +1,15 @@
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    movl    rIBASE,LOCAL0(%esp)             # save rIBASE
+    GET_VREG rIBASE %eax                    # rIBASE <- v[BB+0]
+    GET_VREG_HIGH %eax %eax                 # eax <- v[BB+1]
+    $instr1                                 # ex: addl   (rFP,%ecx,4),rIBASE
+    $instr2                                 # ex: adcl   4(rFP,%ecx,4),%eax
+    SET_VREG rIBASE rINST                   # v[AA+0] <- rIBASE
+    movl    LOCAL0(%esp),rIBASE             # restore rIBASE
+    SET_VREG_HIGH %eax rINST                # v[AA+1] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/binopWide2addr.S b/runtime/interpreter/mterp/x86/binopWide2addr.S
new file mode 100644
index 0000000..7560af4
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/binopWide2addr.S
@@ -0,0 +1,13 @@
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx<- BA
+    sarl    $$4,%ecx                        # ecx<- B
+    GET_VREG %eax %ecx                      # eax<- v[B+0]
+    GET_VREG_HIGH %ecx %ecx                 # eax<- v[B+1]
+    andb    $$0xF,rINSTbl                   # rINST<- A
+    $instr1                                 # ex: addl   %eax,(rFP,rINST,4)
+    $instr2                                 # ex: adcl   %ecx,4(rFP,rINST,4)
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/cvtfp_int.S b/runtime/interpreter/mterp/x86/cvtfp_int.S
new file mode 100644
index 0000000..a8bad63
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/cvtfp_int.S
@@ -0,0 +1,61 @@
+%default {"srcdouble":"1","tgtlong":"1"}
+/* On fp to int conversions, Java requires that
+ * if the result > maxint, it should be clamped to maxint.  If it is less
+ * than minint, it should be clamped to minint.  If it is a nan, the result
+ * should be zero.  Further, the rounding mode is to truncate.  This model
+ * differs from what is delivered normally via the x86 fpu, so we have
+ * to play some games.
+ */
+    /* float/double to int/long vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    .if $srcdouble
+    fldl    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .else
+    flds    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .endif
+    ftst
+    fnstcw  LOCAL0(%esp)                    # remember original rounding mode
+    movzwl  LOCAL0(%esp), %eax
+    movb    $$0xc, %ah
+    movw    %ax, LOCAL0+2(%esp)
+    fldcw   LOCAL0+2(%esp)                  # set "to zero" rounding mode
+    andb    $$0xf, %cl                      # ecx <- A
+    .if $tgtlong
+    fistpll VREG_ADDRESS(%ecx)              # convert and store
+    .else
+    fistpl  VREG_ADDRESS(%ecx)              # convert and store
+    .endif
+    fldcw   LOCAL0(%esp)                    # restore previous rounding mode
+    .if $tgtlong
+    movl    $$0x80000000, %eax
+    xorl    VREG_HIGH_ADDRESS(%ecx), %eax
+    orl     VREG_ADDRESS(%ecx), %eax
+    .else
+    cmpl    $$0x80000000, VREG_ADDRESS(%ecx)
+    .endif
+    je      .L${opcode}_special_case # fix up result
+
+.L${opcode}_finish:
+    xor     %eax, %eax
+    mov     %eax, VREG_REF_ADDRESS(%ecx)
+    .if $tgtlong
+    mov     %eax, VREG_REF_HIGH_ADDRESS(%ecx)
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.L${opcode}_special_case:
+    fnstsw  %ax
+    sahf
+    jp      .L${opcode}_isNaN
+    adcl    $$-1, VREG_ADDRESS(%ecx)
+    .if $tgtlong
+    adcl    $$-1, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+   jmp      .L${opcode}_finish
+.L${opcode}_isNaN:
+    movl    $$0, VREG_ADDRESS(%ecx)
+    .if $tgtlong
+    movl    $$0, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+    jmp     .L${opcode}_finish
diff --git a/runtime/interpreter/mterp/x86/entry.S b/runtime/interpreter/mterp/x86/entry.S
new file mode 100644
index 0000000..a24ef70
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/entry.S
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Interpreter entry point.
+ */
+
+    .text
+    .global ExecuteMterpImpl
+    .type   ExecuteMterpImpl, %function
+
+/*
+ * On entry:
+ *  0  Thread* self
+ *  1  code_item
+ *  2  ShadowFrame
+ *  3  JValue* result_register
+ *
+ */
+
+ExecuteMterpImpl:
+    .cfi_startproc
+    /* Allocate frame */
+    subl    $$FRAME_SIZE, %esp
+    .cfi_adjust_cfa_offset FRAME_SIZE
+
+    /* Spill callee save regs */
+    movl    %ebp, EBP_SPILL(%esp)
+    movl    %edi, EDI_SPILL(%esp)
+    movl    %esi, ESI_SPILL(%esp)
+    movl    %ebx, EBX_SPILL(%esp)
+
+    /* Load ShadowFrame pointer */
+    movl    IN_ARG2(%esp), %edx
+
+    /* Remember the return register */
+    movl    IN_ARG3(%esp), %eax
+    movl    %eax, SHADOWFRAME_RESULT_REGISTER_OFFSET(%edx)
+
+    /* Remember the code_item */
+    movl    IN_ARG1(%esp), %ecx
+    movl    %ecx, SHADOWFRAME_CODE_ITEM_OFFSET(%edx)
+
+    /* set up "named" registers */
+    movl    SHADOWFRAME_NUMBER_OF_VREGS_OFFSET(%edx), %eax
+    leal    SHADOWFRAME_VREGS_OFFSET(%edx), rFP
+    leal    (rFP, %eax, 4), rREFS
+    movl    SHADOWFRAME_DEX_PC_OFFSET(%edx), %eax
+    lea     CODEITEM_INSNS_OFFSET(%ecx), rPC
+    lea     (rPC, %eax, 2), rPC
+    EXPORT_PC
+
+    /* Starting ibase */
+    REFRESH_IBASE
+
+    /* start executing the instruction at rPC */
+    FETCH_INST
+    GOTO_NEXT
+    /* NOTE: no fallthrough */
diff --git a/runtime/interpreter/mterp/x86/fallback.S b/runtime/interpreter/mterp/x86/fallback.S
new file mode 100644
index 0000000..8d61166
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/fallback.S
@@ -0,0 +1,3 @@
+/* Transfer stub to alternate interpreter */
+    jmp     MterpFallback
+
diff --git a/runtime/interpreter/mterp/x86/footer.S b/runtime/interpreter/mterp/x86/footer.S
new file mode 100644
index 0000000..8f79b37
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/footer.S
@@ -0,0 +1,192 @@
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align  2
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+#define MTERP_LOGGING 0
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogDivideByZeroException
+#endif
+    jmp     MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogArrayIndexException
+#endif
+    jmp     MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogNegativeArraySizeException
+#endif
+    jmp     MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogNoSuchMethodException
+#endif
+    jmp     MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogNullObjectException
+#endif
+    jmp     MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    call    MterpLogExceptionThrownException
+#endif
+    jmp     MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    THREAD_FLAGS_OFFSET(%eax), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    call    MterpLogSuspendFallback
+#endif
+    jmp     MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    movl    rSELF, %eax
+    testl   $$-1, THREAD_EXCEPTION_OFFSET(%eax)
+    jz      MterpFallback
+    /* intentional fallthrough - handle pending exception. */
+
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpHandleException
+    testl   %eax, %eax
+    jz      MterpExceptionReturn
+    REFRESH_IBASE
+    movl    OFF_FP_CODE_ITEM(rFP), %eax
+    movl    OFF_FP_DEX_PC(rFP), %ecx
+    lea     CODEITEM_INSNS_OFFSET(%eax), rPC
+    lea     (rPC, %ecx, 2), rPC
+    movl    rPC, OFF_FP_DEX_PC_PTR(rFP)
+    /* resume execution at catch block */
+    FETCH_INST
+    GOTO_NEXT
+    /* NOTE: no fallthrough */
+
+/*
+ * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
+ */
+MterpCheckSuspendAndContinue:
+    movl    rSELF, %eax
+    EXPORT_PC
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    MterpSuspendCheck
+    REFRESH_IBASE
+1:
+    GOTO_NEXT
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogFallback
+#endif
+MterpCommonFallback:
+    xor     %eax, %eax
+    jmp     MterpDone
+
+/*
+ * On entry:
+ *  uint32_t* rFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    movl    $$1, %eax
+    jmp     MterpDone
+MterpReturn:
+    movl    OFF_FP_RESULT_REGISTER(rFP), %edx
+    movl    %eax, (%edx)
+    movl    %ecx, 4(%edx)
+    movl    rSELF, %eax
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    MterpSuspendCheck
+1:
+    mov     $$1, %eax
+MterpDone:
+    /* Restore callee save register */
+    movl    EBP_SPILL(%esp), %ebp
+    movl    EDI_SPILL(%esp), %edi
+    movl    ESI_SPILL(%esp), %esi
+    movl    EBX_SPILL(%esp), %ebx
+
+    /* pop up frame */
+    addl    $$FRAME_SIZE, %esp
+    .cfi_adjust_cfa_offset -FRAME_SIZE
+    ret
+
+    .cfi_endproc
+    .size   ExecuteMterpImpl, .-ExecuteMterpImpl
diff --git a/runtime/interpreter/mterp/x86/fpcmp.S b/runtime/interpreter/mterp/x86/fpcmp.S
new file mode 100644
index 0000000..2b98667
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/fpcmp.S
@@ -0,0 +1,35 @@
+%default {"suff":"d","nanval":"pos"}
+/*
+ * Compare two floating-point values.  Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ *     if (x == y) {
+ *         return 0;
+ *     } else if (x < y) {
+ *         return -1;
+ *     } else if (x > y) {
+ *         return 1;
+ *     } else {
+ *         return nanval ? 1 : -1;
+ *     }
+ * }
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx<- CC
+    movzbl  2(rPC), %eax                    # eax<- BB
+    movs${suff} VREG_ADDRESS(%eax), %xmm0
+    xor     %eax, %eax
+    ucomis${suff} VREG_ADDRESS(%ecx), %xmm0
+    jp      .L${opcode}_nan_is_${nanval}
+    je      .L${opcode}_finish
+    jb      .L${opcode}_less
+.L${opcode}_nan_is_pos:
+    incl    %eax
+    jmp     .L${opcode}_finish
+.L${opcode}_nan_is_neg:
+.L${opcode}_less:
+    decl    %eax
+.L${opcode}_finish:
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/fpcvt.S b/runtime/interpreter/mterp/x86/fpcvt.S
new file mode 100644
index 0000000..7808285
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/fpcvt.S
@@ -0,0 +1,17 @@
+%default {"instr":"","load":"","store":"","wide":"0"}
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    $load   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $$0xf, %cl                      # ecx <- A
+    $instr
+    $store  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if $wide
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/header.S b/runtime/interpreter/mterp/x86/header.S
new file mode 100644
index 0000000..2481785
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/header.S
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat rFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via rFP &
+  number_of_vregs_.
+
+ */
+
+/*
+x86 ABI general notes:
+
+Caller save set:
+   eax, edx, ecx, st(0)-st(7)
+Callee save set:
+   ebx, esi, edi, ebp
+Return regs:
+   32-bit in eax
+   64-bit in edx:eax (low-order 32 in eax)
+   fp on top of fp stack st(0)
+
+Parameters passed on stack, pushed right-to-left.  On entry to target, first
+parm is at 4(%esp).  Traditional entry code is:
+
+functEntry:
+    push    %ebp             # save old frame pointer
+    mov     %ebp,%esp        # establish new frame pointer
+    sub     FrameSize,%esp   # Allocate storage for spill, locals & outs
+
+Once past the prologue, arguments are referenced at ((argno + 2)*4)(%ebp)
+
+Stack must be 16-byte aligned to support SSE in native code.
+
+If we're not doing variable stack allocation (alloca), the frame pointer can be
+eliminated and all arg references adjusted to be esp relative.
+*/
+
+/*
+Mterp and x86 notes:
+
+Some key interpreter variables will be assigned to registers.
+
+  nick     reg   purpose
+  rPC      esi   interpreted program counter, used for fetching instructions
+  rFP      edi   interpreted frame pointer, used for accessing locals and args
+  rINSTw   bx    first 16-bit code of current instruction
+  rINSTbl  bl    opcode portion of instruction word
+  rINSTbh  bh    high byte of inst word, usually contains src/tgt reg names
+  rIBASE   edx   base of instruction handler table
+  rREFS    ebp   base of object references in shadow frame.
+
+Notes:
+   o High order 16 bits of ebx must be zero on entry to handler
+   o rPC, rFP, rINSTw/rINSTbl valid on handler entry and exit
+   o eax and ecx are scratch, rINSTw/ebx sometimes scratch
+
+Macros are provided for common operations.  Each macro MUST emit only
+one instruction to make instruction-counting easier.  They MUST NOT alter
+unspecified registers or condition codes.
+*/
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+/* Frame size must be 16-byte aligned.
+ * Remember about 4 bytes for return address
+ */
+#define FRAME_SIZE     44
+
+/* Frame diagram while executing ExecuteMterpImpl, high to low addresses */
+#define IN_ARG3        (FRAME_SIZE + 16)
+#define IN_ARG2        (FRAME_SIZE + 12)
+#define IN_ARG1        (FRAME_SIZE +  8)
+#define IN_ARG0        (FRAME_SIZE +  4)
+#define CALLER_RP      (FRAME_SIZE +  0)
+/* Spill offsets relative to %esp */
+#define EBP_SPILL      (FRAME_SIZE -  4)
+#define EDI_SPILL      (FRAME_SIZE -  8)
+#define ESI_SPILL      (FRAME_SIZE - 12)
+#define EBX_SPILL      (FRAME_SIZE - 16)
+#define LOCAL0         (FRAME_SIZE - 20)
+#define LOCAL1         (FRAME_SIZE - 24)
+#define LOCAL2         (FRAME_SIZE - 28)
+/* Out Arg offsets, relative to %esp */
+#define OUT_ARG3       ( 12)
+#define OUT_ARG2       (  8)
+#define OUT_ARG1       (  4)
+#define OUT_ARG0       (  0)  /* <- ExecuteMterpImpl esp + 0 */
+
+/* During bringup, we'll use the shadow frame model instead of rFP */
+/* single-purpose registers, given names for clarity */
+#define rSELF    IN_ARG0(%esp)
+#define rPC      %esi
+#define rFP      %edi
+#define rINST    %ebx
+#define rINSTw   %bx
+#define rINSTbh  %bh
+#define rINSTbl  %bl
+#define rIBASE   %edx
+#define rREFS    %ebp
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
+/*
+ *
+ * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
+ * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
+ * mterp should do so as well.
+ */
+#define MTERP_SUSPEND 0
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    movl    rPC, OFF_FP_DEX_PC_PTR(rFP)
+.endm
+
+/*
+ * Refresh handler table.
+ * IBase handles uses the caller save register so we must restore it after each call.
+ * Also it is used as a result of some 64-bit operations (like imul) and we should
+ * restore it in such cases also.
+ *
+ * TODO: Consider spilling the IBase instead of restoring it from Thread structure.
+ */
+.macro REFRESH_IBASE
+    movl    rSELF, rIBASE
+    movl    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
+.endm
+
+/*
+ * If rSELF is already loaded then we can use it from known reg.
+ */
+.macro REFRESH_IBASE_FROM_SELF _reg
+    movl    THREAD_CURRENT_IBASE_OFFSET(\_reg), rIBASE
+.endm
+
+/*
+ * Refresh rINST.
+ * At enter to handler rINST does not contain the opcode number.
+ * However some utilities require the full value, so this macro
+ * restores the opcode number.
+ */
+.macro REFRESH_INST _opnum
+    movb    rINSTbl, rINSTbh
+    movb    $$\_opnum, rINSTbl
+.endm
+
+/*
+ * Fetch the next instruction from rPC into rINSTw.  Does not advance rPC.
+ */
+.macro FETCH_INST
+    movzwl  (rPC), rINST
+.endm
+
+/*
+ * Remove opcode from rINST, compute the address of handler and jump to it.
+ */
+.macro GOTO_NEXT
+    movzx   rINSTbl,%eax
+    movzbl  rINSTbh,rINST
+    shll    $$${handler_size_bits}, %eax
+    addl    rIBASE, %eax
+    jmp     *%eax
+.endm
+
+/*
+ * Advance rPC by instruction count.
+ */
+.macro ADVANCE_PC _count
+    leal    2*\_count(rPC), rPC
+.endm
+
+/*
+ * Advance rPC by instruction count, fetch instruction and jump to handler.
+ */
+.macro ADVANCE_PC_FETCH_AND_GOTO_NEXT _count
+    ADVANCE_PC \_count
+    FETCH_INST
+    GOTO_NEXT
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+#define VREG_ADDRESS(_vreg) (rFP,_vreg,4)
+#define VREG_HIGH_ADDRESS(_vreg) 4(rFP,_vreg,4)
+#define VREG_REF_ADDRESS(_vreg) (rREFS,_vreg,4)
+#define VREG_REF_HIGH_ADDRESS(_vreg) 4(rREFS,_vreg,4)
+
+.macro GET_VREG _reg _vreg
+    movl    (rFP,\_vreg,4), \_reg
+.endm
+
+/* Read wide value to xmm. */
+.macro GET_WIDE_FP_VREG _reg _vreg
+    movq    (rFP,\_vreg,4), \_reg
+.endm
+
+.macro SET_VREG _reg _vreg
+    movl    \_reg, (rFP,\_vreg,4)
+    movl    $$0, (rREFS,\_vreg,4)
+.endm
+
+/* Write wide value from xmm. xmm is clobbered. */
+.macro SET_WIDE_FP_VREG _reg _vreg
+    movq    \_reg, (rFP,\_vreg,4)
+    pxor    \_reg, \_reg
+    movq    \_reg, (rREFS,\_vreg,4)
+.endm
+
+.macro SET_VREG_OBJECT _reg _vreg
+    movl    \_reg, (rFP,\_vreg,4)
+    movl    \_reg, (rREFS,\_vreg,4)
+.endm
+
+.macro GET_VREG_HIGH _reg _vreg
+    movl    4(rFP,\_vreg,4), \_reg
+.endm
+
+.macro SET_VREG_HIGH _reg _vreg
+    movl    \_reg, 4(rFP,\_vreg,4)
+    movl    $$0, 4(rREFS,\_vreg,4)
+.endm
+
+.macro CLEAR_REF _vreg
+    movl    $$0,  (rREFS,\_vreg,4)
+.endm
+
+.macro CLEAR_WIDE_REF _vreg
+    movl    $$0,  (rREFS,\_vreg,4)
+    movl    $$0, 4(rREFS,\_vreg,4)
+.endm
diff --git a/runtime/interpreter/mterp/x86/invoke.S b/runtime/interpreter/mterp/x86/invoke.S
new file mode 100644
index 0000000..80f7822
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/invoke.S
@@ -0,0 +1,20 @@
+%default { "helper":"UndefinedInvokeHandler" }
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern $helper
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_ARG3(%esp)
+    call    $helper
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_add_double.S b/runtime/interpreter/mterp/x86/op_add_double.S
new file mode 100644
index 0000000..de2708f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_double.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"adds","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_add_double_2addr.S b/runtime/interpreter/mterp/x86/op_add_double_2addr.S
new file mode 100644
index 0000000..538c9ab
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_double_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"adds","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_add_float.S b/runtime/interpreter/mterp/x86/op_add_float.S
new file mode 100644
index 0000000..80b1736
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_float.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"adds","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_add_float_2addr.S b/runtime/interpreter/mterp/x86/op_add_float_2addr.S
new file mode 100644
index 0000000..6649253
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_float_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"adds","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_add_int.S b/runtime/interpreter/mterp/x86/op_add_int.S
new file mode 100644
index 0000000..f71a56b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_int.S
@@ -0,0 +1 @@
+%include "x86/binop.S" {"instr":"addl    (rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_add_int_2addr.S b/runtime/interpreter/mterp/x86/op_add_int_2addr.S
new file mode 100644
index 0000000..5d43b65
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/binop2addr.S" {"instr":"addl    %eax, (rFP,%ecx,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_add_int_lit16.S b/runtime/interpreter/mterp/x86/op_add_int_lit16.S
new file mode 100644
index 0000000..4f34d17
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_int_lit16.S
@@ -0,0 +1 @@
+%include "x86/binopLit16.S" {"instr":"addl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_add_int_lit8.S b/runtime/interpreter/mterp/x86/op_add_int_lit8.S
new file mode 100644
index 0000000..3f14744
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"addl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_add_long.S b/runtime/interpreter/mterp/x86/op_add_long.S
new file mode 100644
index 0000000..dce0c26
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_long.S
@@ -0,0 +1 @@
+%include "x86/binopWide.S" {"instr1":"addl    (rFP,%ecx,4), rIBASE", "instr2":"adcl    4(rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_add_long_2addr.S b/runtime/interpreter/mterp/x86/op_add_long_2addr.S
new file mode 100644
index 0000000..7847640
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_long_2addr.S
@@ -0,0 +1 @@
+%include "x86/binopWide2addr.S" {"instr1":"addl    %eax, (rFP,rINST,4)","instr2":"adcl    %ecx, 4(rFP,rINST,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_aget.S b/runtime/interpreter/mterp/x86/op_aget.S
new file mode 100644
index 0000000..52b5236
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aget.S
@@ -0,0 +1,19 @@
+%default { "load":"movl", "shift":"4", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    $load   $data_offset(%eax,%ecx,$shift), %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_aget_boolean.S b/runtime/interpreter/mterp/x86/op_aget_boolean.S
new file mode 100644
index 0000000..d910c94
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aget_boolean.S
@@ -0,0 +1 @@
+%include "x86/op_aget.S" { "load":"movzbl", "shift":"1", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aget_byte.S b/runtime/interpreter/mterp/x86/op_aget_byte.S
new file mode 100644
index 0000000..aba9ffc
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aget_byte.S
@@ -0,0 +1 @@
+%include "x86/op_aget.S" { "load":"movsbl", "shift":"1", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aget_char.S b/runtime/interpreter/mterp/x86/op_aget_char.S
new file mode 100644
index 0000000..748e410
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aget_char.S
@@ -0,0 +1 @@
+%include "x86/op_aget.S" { "load":"movzwl", "shift":"2", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aget_object.S b/runtime/interpreter/mterp/x86/op_aget_object.S
new file mode 100644
index 0000000..61f3e91
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aget_object.S
@@ -0,0 +1,20 @@
+/*
+ * Array object get.  vAA <- vBB[vCC].
+ *
+ * for: aget-object
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecs <- vCC (requested index)
+    EXPORT_PC
+    movl    %eax, OUT_ARG0(%esp)
+    movl    %ecx, OUT_ARG1(%esp)
+    call    artAGetObjectFromMterp          # (array, index)
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    SET_VREG_OBJECT %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_aget_short.S b/runtime/interpreter/mterp/x86/op_aget_short.S
new file mode 100644
index 0000000..6eaf5d9
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aget_short.S
@@ -0,0 +1 @@
+%include "x86/op_aget.S" { "load":"movswl", "shift":"2", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aget_wide.S b/runtime/interpreter/mterp/x86/op_aget_wide.S
new file mode 100644
index 0000000..663adc6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aget_wide.S
@@ -0,0 +1,16 @@
+/*
+ * Array get, 64 bits.  vAA <- vBB[vCC].
+ */
+    /* aget-wide vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_WIDE_ARRAY_DATA_OFFSET(%eax,%ecx,8), %eax
+    movq    (%eax), %xmm0                   # xmm0 <- vBB[vCC]
+    SET_WIDE_FP_VREG %xmm0 rINST            # vAA <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_and_int.S b/runtime/interpreter/mterp/x86/op_and_int.S
new file mode 100644
index 0000000..6272c4e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_and_int.S
@@ -0,0 +1 @@
+%include "x86/binop.S" {"instr":"andl    (rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_and_int_2addr.S b/runtime/interpreter/mterp/x86/op_and_int_2addr.S
new file mode 100644
index 0000000..95df873
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_and_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/binop2addr.S" {"instr":"andl    %eax, (rFP,%ecx,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_and_int_lit16.S b/runtime/interpreter/mterp/x86/op_and_int_lit16.S
new file mode 100644
index 0000000..b062064
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_and_int_lit16.S
@@ -0,0 +1 @@
+%include "x86/binopLit16.S" {"instr":"andl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_and_int_lit8.S b/runtime/interpreter/mterp/x86/op_and_int_lit8.S
new file mode 100644
index 0000000..99915df
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_and_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"andl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_and_long.S b/runtime/interpreter/mterp/x86/op_and_long.S
new file mode 100644
index 0000000..f8514ea
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_and_long.S
@@ -0,0 +1 @@
+%include "x86/binopWide.S" {"instr1":"andl    (rFP,%ecx,4), rIBASE", "instr2":"andl    4(rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_and_long_2addr.S b/runtime/interpreter/mterp/x86/op_and_long_2addr.S
new file mode 100644
index 0000000..37249b8
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_and_long_2addr.S
@@ -0,0 +1 @@
+%include "x86/binopWide2addr.S" {"instr1":"andl    %eax, (rFP,rINST,4)","instr2":"andl    %ecx, 4(rFP,rINST,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_aput.S b/runtime/interpreter/mterp/x86/op_aput.S
new file mode 100644
index 0000000..2ea465d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aput.S
@@ -0,0 +1,20 @@
+%default { "reg":"rINST", "store":"movl", "shift":"4", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    $data_offset(%eax,%ecx,$shift), %eax
+    GET_VREG rINST rINST
+    $store  $reg, (%eax)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_aput_boolean.S b/runtime/interpreter/mterp/x86/op_aput_boolean.S
new file mode 100644
index 0000000..e7fdd53
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aput_boolean.S
@@ -0,0 +1 @@
+%include "x86/op_aput.S" { "reg":"rINSTbl", "store":"movb", "shift":"1", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aput_byte.S b/runtime/interpreter/mterp/x86/op_aput_byte.S
new file mode 100644
index 0000000..491d03c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aput_byte.S
@@ -0,0 +1 @@
+%include "x86/op_aput.S" { "reg":"rINSTbl", "store":"movb", "shift":"1", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aput_char.S b/runtime/interpreter/mterp/x86/op_aput_char.S
new file mode 100644
index 0000000..ca42cf0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aput_char.S
@@ -0,0 +1 @@
+%include "x86/op_aput.S" { "reg":"rINSTw", "store":"movw", "shift":"2", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aput_object.S b/runtime/interpreter/mterp/x86/op_aput_object.S
new file mode 100644
index 0000000..2af5acb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aput_object.S
@@ -0,0 +1,15 @@
+/*
+ * Store an object into an array.  vBB[vCC] <- vAA.
+ */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_ARG2(%esp)
+    call    MterpAputObject            # (array, index)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_aput_short.S b/runtime/interpreter/mterp/x86/op_aput_short.S
new file mode 100644
index 0000000..5e63482
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aput_short.S
@@ -0,0 +1 @@
+%include "x86/op_aput.S" { "reg":"rINSTw", "store":"movw", "shift":"2", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aput_wide.S b/runtime/interpreter/mterp/x86/op_aput_wide.S
new file mode 100644
index 0000000..7a33371
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aput_wide.S
@@ -0,0 +1,17 @@
+/*
+ * Array put, 64 bits.  vBB[vCC] <- vAA.
+ *
+ */
+    /* aput-wide vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_WIDE_ARRAY_DATA_OFFSET(%eax,%ecx,8), %eax
+    GET_WIDE_FP_VREG %xmm0 rINST            # xmm0 <- vAA
+    movq    %xmm0, (%eax)                   # vBB[vCC] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_array_length.S b/runtime/interpreter/mterp/x86/op_array_length.S
new file mode 100644
index 0000000..3e42a7c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_array_length.S
@@ -0,0 +1,12 @@
+/*
+ * Return the length of an array.
+ */
+    mov     rINST, %eax                     # eax <- BA
+    sarl    $$4, rINST                      # rINST <- B
+    GET_VREG %ecx rINST                     # ecx <- vB (object ref)
+    testl   %ecx, %ecx                      # is null?
+    je      common_errNullObject
+    andb    $$0xf, %al                      # eax <- A
+    movl    MIRROR_ARRAY_LENGTH_OFFSET(%ecx), rINST
+    SET_VREG rINST %eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_check_cast.S b/runtime/interpreter/mterp/x86/op_check_cast.S
new file mode 100644
index 0000000..3d85f5e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_check_cast.S
@@ -0,0 +1,18 @@
+/*
+ * Check to see if a cast from one class to another is allowed.
+ */
+    /* check-cast vAA, class@BBBB */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    GET_VREG %ecx rINST
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    OFF_FP_METHOD(rFP),%eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    MterpCheckCast                  # (index, obj, method, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_cmp_long.S b/runtime/interpreter/mterp/x86/op_cmp_long.S
new file mode 100644
index 0000000..bd86738
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_cmp_long.S
@@ -0,0 +1,27 @@
+/*
+ * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
+ * register based on the results of the comparison.
+ */
+    /* cmp-long vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG_HIGH %eax %eax                 # eax <- v[BB+1], BB is clobbered
+    cmpl    VREG_HIGH_ADDRESS(%ecx), %eax
+    jl      .L${opcode}_smaller
+    jg      .L${opcode}_bigger
+    movzbl  2(rPC), %eax                    # eax <- BB, restore BB
+    GET_VREG %eax %eax                      # eax <- v[BB]
+    sub     VREG_ADDRESS(%ecx), %eax
+    ja      .L${opcode}_bigger
+    jb      .L${opcode}_smaller
+.L${opcode}_finish:
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.L${opcode}_bigger:
+    movl    $$1, %eax
+    jmp     .L${opcode}_finish
+
+.L${opcode}_smaller:
+    movl    $$-1, %eax
+    jmp     .L${opcode}_finish
diff --git a/runtime/interpreter/mterp/x86/op_cmpg_double.S b/runtime/interpreter/mterp/x86/op_cmpg_double.S
new file mode 100644
index 0000000..a73ba55
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_cmpg_double.S
@@ -0,0 +1 @@
+%include "x86/fpcmp.S" {"suff":"d","nanval":"pos"}
diff --git a/runtime/interpreter/mterp/x86/op_cmpg_float.S b/runtime/interpreter/mterp/x86/op_cmpg_float.S
new file mode 100644
index 0000000..648051b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_cmpg_float.S
@@ -0,0 +1 @@
+%include "x86/fpcmp.S" {"suff":"s","nanval":"pos"}
diff --git a/runtime/interpreter/mterp/x86/op_cmpl_double.S b/runtime/interpreter/mterp/x86/op_cmpl_double.S
new file mode 100644
index 0000000..058163e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_cmpl_double.S
@@ -0,0 +1 @@
+%include "x86/fpcmp.S" {"suff":"d","nanval":"neg"}
diff --git a/runtime/interpreter/mterp/x86/op_cmpl_float.S b/runtime/interpreter/mterp/x86/op_cmpl_float.S
new file mode 100644
index 0000000..302f078
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_cmpl_float.S
@@ -0,0 +1 @@
+%include "x86/fpcmp.S" {"suff":"s","nanval":"neg"}
diff --git a/runtime/interpreter/mterp/x86/op_const.S b/runtime/interpreter/mterp/x86/op_const.S
new file mode 100644
index 0000000..dc69530
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const.S
@@ -0,0 +1,4 @@
+    /* const vAA, #+BBBBbbbb */
+    movl    2(rPC), %eax                    # grab all 32 bits at once
+    SET_VREG %eax rINST                     # vAA<- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_const_16.S b/runtime/interpreter/mterp/x86/op_const_16.S
new file mode 100644
index 0000000..f5707cf
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_16.S
@@ -0,0 +1,4 @@
+    /* const/16 vAA, #+BBBB */
+    movswl  2(rPC), %ecx                    # ecx <- ssssBBBB
+    SET_VREG %ecx rINST                     # vAA <- ssssBBBB
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_4.S b/runtime/interpreter/mterp/x86/op_const_4.S
new file mode 100644
index 0000000..c336411
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_4.S
@@ -0,0 +1,7 @@
+    /* const/4 vA, #+B */
+    movsx   rINSTbl, %eax                   # eax <-ssssssBx
+    movl    $$0xf, rINST
+    andl    %eax, rINST                     # rINST <- A
+    sarl    $$4, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_const_class.S b/runtime/interpreter/mterp/x86/op_const_class.S
new file mode 100644
index 0000000..eceb8bc
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_class.S
@@ -0,0 +1,14 @@
+    /* const/class vAA, Class@BBBB */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, OUT_ARG1(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    MterpConstClass                 # (index, tgt_reg, shadow_frame, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_high16.S b/runtime/interpreter/mterp/x86/op_const_high16.S
new file mode 100644
index 0000000..da78d1b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_high16.S
@@ -0,0 +1,5 @@
+    /* const/high16 vAA, #+BBBB0000 */
+    movzwl  2(rPC), %eax                    # eax <- 0000BBBB
+    sall    $$16, %eax                      # eax <- BBBB0000
+    SET_VREG %eax rINST                     # vAA <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_string.S b/runtime/interpreter/mterp/x86/op_const_string.S
new file mode 100644
index 0000000..9acd6fe
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_string.S
@@ -0,0 +1,14 @@
+    /* const/string vAA, String@BBBB */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, OUT_ARG1(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    MterpConstString                # (index, tgt_reg, shadow_frame, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_string_jumbo.S b/runtime/interpreter/mterp/x86/op_const_string_jumbo.S
new file mode 100644
index 0000000..5c728b2
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_string_jumbo.S
@@ -0,0 +1,14 @@
+    /* const/string vAA, String@BBBBBBBB */
+    EXPORT_PC
+    movl    2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, OUT_ARG1(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    MterpConstString                # (index, tgt_reg, shadow_frame, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_const_wide.S b/runtime/interpreter/mterp/x86/op_const_wide.S
new file mode 100644
index 0000000..745490e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_wide.S
@@ -0,0 +1,7 @@
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    movl    2(rPC), %eax                    # eax <- lsw
+    movzbl  rINSTbl, %ecx                   # ecx <- AA
+    movl    6(rPC), rINST                   # rINST <- msw
+    SET_VREG %eax %ecx
+    SET_VREG_HIGH  rINST %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 5
diff --git a/runtime/interpreter/mterp/x86/op_const_wide_16.S b/runtime/interpreter/mterp/x86/op_const_wide_16.S
new file mode 100644
index 0000000..8029cfe
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_wide_16.S
@@ -0,0 +1,8 @@
+    /* const-wide/16 vAA, #+BBBB */
+    movswl  2(rPC), %eax                    # eax <- ssssBBBB
+    movl    rIBASE, %ecx                    # preserve rIBASE (cltd trashes it)
+    cltd                                    # rIBASE:eax <- ssssssssssssBBBB
+    SET_VREG_HIGH rIBASE rINST              # store msw
+    SET_VREG %eax rINST                     # store lsw
+    movl    %ecx, rIBASE                    # restore rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_wide_32.S b/runtime/interpreter/mterp/x86/op_const_wide_32.S
new file mode 100644
index 0000000..3e23d3a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_wide_32.S
@@ -0,0 +1,8 @@
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    movl    2(rPC), %eax                    # eax <- BBBBbbbb
+    movl    rIBASE, %ecx                    # preserve rIBASE (cltd trashes it)
+    cltd                                    # rIBASE:eax <- ssssssssssssBBBB
+    SET_VREG_HIGH rIBASE rINST              # store msw
+    SET_VREG %eax rINST                     # store lsw
+    movl    %ecx, rIBASE                    # restore rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_const_wide_high16.S b/runtime/interpreter/mterp/x86/op_const_wide_high16.S
new file mode 100644
index 0000000..d2a1119
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_wide_high16.S
@@ -0,0 +1,7 @@
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    movzwl  2(rPC), %eax                    # eax <- 0000BBBB
+    sall    $$16, %eax                      # eax <- BBBB0000
+    SET_VREG_HIGH %eax rINST                # v[AA+1] <- eax
+    xorl    %eax, %eax
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_div_double.S b/runtime/interpreter/mterp/x86/op_div_double.S
new file mode 100644
index 0000000..575716d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_double.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"divs","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_div_double_2addr.S b/runtime/interpreter/mterp/x86/op_div_double_2addr.S
new file mode 100644
index 0000000..8229a31
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_double_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"divs","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_div_float.S b/runtime/interpreter/mterp/x86/op_div_float.S
new file mode 100644
index 0000000..250f1dc
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_float.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"divs","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_div_float_2addr.S b/runtime/interpreter/mterp/x86/op_div_float_2addr.S
new file mode 100644
index 0000000..c30d148
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_float_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"divs","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_div_int.S b/runtime/interpreter/mterp/x86/op_div_int.S
new file mode 100644
index 0000000..5fc8fa5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_int.S
@@ -0,0 +1 @@
+%include "x86/bindiv.S" {"result":"%eax","special":"$0x80000000","rem":"0"}
diff --git a/runtime/interpreter/mterp/x86/op_div_int_2addr.S b/runtime/interpreter/mterp/x86/op_div_int_2addr.S
new file mode 100644
index 0000000..04cf1ba
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/bindiv2addr.S" {"result":"%eax","special":"$0x80000000"}
diff --git a/runtime/interpreter/mterp/x86/op_div_int_lit16.S b/runtime/interpreter/mterp/x86/op_div_int_lit16.S
new file mode 100644
index 0000000..dd396bb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_int_lit16.S
@@ -0,0 +1 @@
+%include "x86/bindivLit16.S" {"result":"%eax","special":"$0x80000000"}
diff --git a/runtime/interpreter/mterp/x86/op_div_int_lit8.S b/runtime/interpreter/mterp/x86/op_div_int_lit8.S
new file mode 100644
index 0000000..3cbd9d0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/bindivLit8.S" {"result":"%eax","special":"$0x80000000"}
diff --git a/runtime/interpreter/mterp/x86/op_div_long.S b/runtime/interpreter/mterp/x86/op_div_long.S
new file mode 100644
index 0000000..5772826
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_long.S
@@ -0,0 +1,23 @@
+%default {"routine":"art_quick_ldiv"}
+/* art_quick_* methods has quick abi,
+ *   so use eax, ecx, edx, ebx for args
+ */
+    /* div vAA, vBB, vCC */
+    .extern $routine
+    mov     rIBASE, LOCAL0(%esp)            # save rIBASE/%edx
+    mov     rINST, LOCAL1(%esp)             # save rINST/%ebx
+    movzbl  3(rPC), %eax                    # eax <- CC
+    GET_VREG %ecx %eax
+    GET_VREG_HIGH %ebx %eax
+    movl    %ecx, %edx
+    orl     %ebx, %ecx
+    jz      common_errDivideByZero
+    movzbl  2(rPC), %eax                    # eax <- BB
+    GET_VREG_HIGH %ecx %eax
+    GET_VREG %eax %eax
+    call    $routine
+    mov     LOCAL1(%esp), rINST             # restore rINST/%ebx
+    SET_VREG_HIGH rIBASE rINST
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE            # restore rIBASE/%edx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_div_long_2addr.S b/runtime/interpreter/mterp/x86/op_div_long_2addr.S
new file mode 100644
index 0000000..2696042
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_long_2addr.S
@@ -0,0 +1,25 @@
+%default {"routine":"art_quick_ldiv"}
+/* art_quick_* methods has quick abi,
+ *   so use eax, ecx, edx, ebx for args
+ */
+    /* div/2addr vA, vB */
+    .extern   $routine
+    mov     rIBASE, LOCAL0(%esp)            # save rIBASE/%edx
+    movzbl  rINSTbl, %eax
+    shrl    $$4, %eax                       # eax <- B
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    mov     rINST, LOCAL1(%esp)             # save rINST/%ebx
+    movl    %ebx, %ecx
+    GET_VREG %edx %eax
+    GET_VREG_HIGH %ebx %eax
+    movl    %edx, %eax
+    orl     %ebx, %eax
+    jz      common_errDivideByZero
+    GET_VREG %eax %ecx
+    GET_VREG_HIGH %ecx %ecx
+    call    $routine
+    mov     LOCAL1(%esp), rINST             # restore rINST/%ebx
+    SET_VREG_HIGH rIBASE rINST
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE            # restore rIBASE/%edx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_double_to_float.S b/runtime/interpreter/mterp/x86/op_double_to_float.S
new file mode 100644
index 0000000..5135d60
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_double_to_float.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"load":"fldl","store":"fstps"}
diff --git a/runtime/interpreter/mterp/x86/op_double_to_int.S b/runtime/interpreter/mterp/x86/op_double_to_int.S
new file mode 100644
index 0000000..9c4e11c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_double_to_int.S
@@ -0,0 +1 @@
+%include "x86/cvtfp_int.S" {"srcdouble":"1","tgtlong":"0"}
diff --git a/runtime/interpreter/mterp/x86/op_double_to_long.S b/runtime/interpreter/mterp/x86/op_double_to_long.S
new file mode 100644
index 0000000..fe0eee2
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_double_to_long.S
@@ -0,0 +1 @@
+%include "x86/cvtfp_int.S" {"srcdouble":"1","tgtlong":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_fill_array_data.S b/runtime/interpreter/mterp/x86/op_fill_array_data.S
new file mode 100644
index 0000000..0cb05f6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_fill_array_data.S
@@ -0,0 +1,12 @@
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC
+    movl    2(rPC), %ecx                    # ecx <- BBBBbbbb
+    leal    (rPC,%ecx,2), %ecx              # ecx <- PC + BBBBbbbb*2
+    GET_VREG %eax rINST                     # eax <- vAA (array object)
+    movl    %eax, OUT_ARG0(%esp)
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpFillArrayData              # (obj, payload)
+    REFRESH_IBASE
+    testl   %eax, %eax                      # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_filled_new_array.S b/runtime/interpreter/mterp/x86/op_filled_new_array.S
new file mode 100644
index 0000000..c08b09f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_filled_new_array.S
@@ -0,0 +1,20 @@
+%default { "helper":"MterpFilledNewArray" }
+/*
+ * Create a new array with elements filled from registers.
+ *
+ * for: filled-new-array, filled-new-array/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+    .extern $helper
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)
+    call    $helper
+    REFRESH_IBASE
+    testl   %eax, %eax                      # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_filled_new_array_range.S b/runtime/interpreter/mterp/x86/op_filled_new_array_range.S
new file mode 100644
index 0000000..841059e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_filled_new_array_range.S
@@ -0,0 +1 @@
+%include "x86/op_filled_new_array.S" { "helper":"MterpFilledNewArrayRange" }
diff --git a/runtime/interpreter/mterp/x86/op_float_to_double.S b/runtime/interpreter/mterp/x86/op_float_to_double.S
new file mode 100644
index 0000000..12a3e14
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_float_to_double.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"load":"flds","store":"fstpl","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_float_to_int.S b/runtime/interpreter/mterp/x86/op_float_to_int.S
new file mode 100644
index 0000000..ac57388
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_float_to_int.S
@@ -0,0 +1 @@
+%include "x86/cvtfp_int.S" {"srcdouble":"0","tgtlong":"0"}
diff --git a/runtime/interpreter/mterp/x86/op_float_to_long.S b/runtime/interpreter/mterp/x86/op_float_to_long.S
new file mode 100644
index 0000000..be1d982
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_float_to_long.S
@@ -0,0 +1 @@
+%include "x86/cvtfp_int.S" {"srcdouble":"0","tgtlong":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_goto.S b/runtime/interpreter/mterp/x86/op_goto.S
new file mode 100644
index 0000000..411399d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_goto.S
@@ -0,0 +1,19 @@
+/*
+ * Unconditional branch, 8-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ */
+    /* goto +AA */
+    movsbl  rINSTbl, %eax                   # eax <- ssssssAA
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      1f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+1:
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86/op_goto_16.S b/runtime/interpreter/mterp/x86/op_goto_16.S
new file mode 100644
index 0000000..4f04f9e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_goto_16.S
@@ -0,0 +1,19 @@
+/*
+ * Unconditional branch, 16-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ */
+    /* goto/16 +AAAA */
+    movswl  2(rPC), %eax                    # eax <- ssssAAAA
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      1f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+1:
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86/op_goto_32.S b/runtime/interpreter/mterp/x86/op_goto_32.S
new file mode 100644
index 0000000..48f6e5a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_goto_32.S
@@ -0,0 +1,24 @@
+/*
+ * Unconditional branch, 32-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ *
+ * Unlike most opcodes, this one is allowed to branch to itself, so
+ * our "backward branch" test must be "<=0" instead of "<0".  Because
+ * we need the V bit set, we'll use an adds to convert from Dalvik
+ * offset to byte offset.
+ */
+    /* goto/32 +AAAAAAAA */
+    movl    2(rPC), %eax                    # eax <- AAAAAAAA
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      1f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+1:
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86/op_if_eq.S b/runtime/interpreter/mterp/x86/op_if_eq.S
new file mode 100644
index 0000000..5413d98
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_eq.S
@@ -0,0 +1 @@
+%include "x86/bincmp.S" { "revcmp":"ne" }
diff --git a/runtime/interpreter/mterp/x86/op_if_eqz.S b/runtime/interpreter/mterp/x86/op_if_eqz.S
new file mode 100644
index 0000000..53dc99e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_eqz.S
@@ -0,0 +1 @@
+%include "x86/zcmp.S" { "revcmp":"ne" }
diff --git a/runtime/interpreter/mterp/x86/op_if_ge.S b/runtime/interpreter/mterp/x86/op_if_ge.S
new file mode 100644
index 0000000..c2ba3c6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_ge.S
@@ -0,0 +1 @@
+%include "x86/bincmp.S" { "revcmp":"l" }
diff --git a/runtime/interpreter/mterp/x86/op_if_gez.S b/runtime/interpreter/mterp/x86/op_if_gez.S
new file mode 100644
index 0000000..cd2c772
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_gez.S
@@ -0,0 +1 @@
+%include "x86/zcmp.S" { "revcmp":"l" }
diff --git a/runtime/interpreter/mterp/x86/op_if_gt.S b/runtime/interpreter/mterp/x86/op_if_gt.S
new file mode 100644
index 0000000..9fe84bb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_gt.S
@@ -0,0 +1 @@
+%include "x86/bincmp.S" { "revcmp":"le" }
diff --git a/runtime/interpreter/mterp/x86/op_if_gtz.S b/runtime/interpreter/mterp/x86/op_if_gtz.S
new file mode 100644
index 0000000..b454ffd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_gtz.S
@@ -0,0 +1 @@
+%include "x86/zcmp.S" { "revcmp":"le" }
diff --git a/runtime/interpreter/mterp/x86/op_if_le.S b/runtime/interpreter/mterp/x86/op_if_le.S
new file mode 100644
index 0000000..93571a7
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_le.S
@@ -0,0 +1 @@
+%include "x86/bincmp.S" { "revcmp":"g" }
diff --git a/runtime/interpreter/mterp/x86/op_if_lez.S b/runtime/interpreter/mterp/x86/op_if_lez.S
new file mode 100644
index 0000000..779c77f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_lez.S
@@ -0,0 +1 @@
+%include "x86/zcmp.S" { "revcmp":"g" }
diff --git a/runtime/interpreter/mterp/x86/op_if_lt.S b/runtime/interpreter/mterp/x86/op_if_lt.S
new file mode 100644
index 0000000..1fb1521
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_lt.S
@@ -0,0 +1 @@
+%include "x86/bincmp.S" { "revcmp":"ge" }
diff --git a/runtime/interpreter/mterp/x86/op_if_ltz.S b/runtime/interpreter/mterp/x86/op_if_ltz.S
new file mode 100644
index 0000000..155c356
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_ltz.S
@@ -0,0 +1 @@
+%include "x86/zcmp.S" { "revcmp":"ge" }
diff --git a/runtime/interpreter/mterp/x86/op_if_ne.S b/runtime/interpreter/mterp/x86/op_if_ne.S
new file mode 100644
index 0000000..7e1b065
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_ne.S
@@ -0,0 +1 @@
+%include "x86/bincmp.S" { "revcmp":"e" }
diff --git a/runtime/interpreter/mterp/x86/op_if_nez.S b/runtime/interpreter/mterp/x86/op_if_nez.S
new file mode 100644
index 0000000..8951f5b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_nez.S
@@ -0,0 +1 @@
+%include "x86/zcmp.S" { "revcmp":"e" }
diff --git a/runtime/interpreter/mterp/x86/op_iget.S b/runtime/interpreter/mterp/x86/op_iget.S
new file mode 100644
index 0000000..868ffd0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget.S
@@ -0,0 +1,29 @@
+%default { "is_object":"0", "helper":"artGet32InstanceFromCode"}
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    $helper
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    .if $is_object
+    SET_VREG_OBJECT %eax rINST              # fp[A] <-value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <-value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iget_boolean.S b/runtime/interpreter/mterp/x86/op_iget_boolean.S
new file mode 100644
index 0000000..9ddad04
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_boolean.S
@@ -0,0 +1 @@
+%include "x86/op_iget.S" { "helper":"artGetBooleanInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_boolean_quick.S b/runtime/interpreter/mterp/x86/op_iget_boolean_quick.S
new file mode 100644
index 0000000..02b0c16
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_boolean_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iget_quick.S" { "load":"movsbl" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_byte.S b/runtime/interpreter/mterp/x86/op_iget_byte.S
new file mode 100644
index 0000000..8250788
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_byte.S
@@ -0,0 +1 @@
+%include "x86/op_iget.S" { "helper":"artGetByteInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_byte_quick.S b/runtime/interpreter/mterp/x86/op_iget_byte_quick.S
new file mode 100644
index 0000000..02b0c16
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_byte_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iget_quick.S" { "load":"movsbl" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_char.S b/runtime/interpreter/mterp/x86/op_iget_char.S
new file mode 100644
index 0000000..e9d2156
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_char.S
@@ -0,0 +1 @@
+%include "x86/op_iget.S" { "helper":"artGetCharInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_char_quick.S b/runtime/interpreter/mterp/x86/op_iget_char_quick.S
new file mode 100644
index 0000000..a5d9712
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_char_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iget_quick.S" { "load":"movzwl" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_object.S b/runtime/interpreter/mterp/x86/op_iget_object.S
new file mode 100644
index 0000000..3abeefc
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_object.S
@@ -0,0 +1 @@
+%include "x86/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_object_quick.S b/runtime/interpreter/mterp/x86/op_iget_object_quick.S
new file mode 100644
index 0000000..b09772f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_object_quick.S
@@ -0,0 +1,17 @@
+    /* For: iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    %eax, OUT_ARG1(%esp)
+    EXPORT_PC
+    call    artIGetObjectFromMterp          # (obj, offset)
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $$0xf,rINSTbl                   # rINST <- A
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iget_quick.S b/runtime/interpreter/mterp/x86/op_iget_quick.S
new file mode 100644
index 0000000..372071c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_quick.S
@@ -0,0 +1,13 @@
+%default { "load":"movl"}
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    ${load} (%ecx,%eax,1), %eax
+    andb    $$0xf,rINSTbl                   # rINST <- A
+    SET_VREG %eax rINST                     # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iget_short.S b/runtime/interpreter/mterp/x86/op_iget_short.S
new file mode 100644
index 0000000..c8fad89
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_short.S
@@ -0,0 +1 @@
+%include "x86/op_iget.S" { "helper":"artGetShortInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_short_quick.S b/runtime/interpreter/mterp/x86/op_iget_short_quick.S
new file mode 100644
index 0000000..2c3aeb6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_short_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iget_quick.S" { "load":"movswl" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_wide.S b/runtime/interpreter/mterp/x86/op_iget_wide.S
new file mode 100644
index 0000000..58e5a65
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_wide.S
@@ -0,0 +1,25 @@
+/*
+ * 64-bit instance field get.
+ *
+ * for: iget-wide
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artGet64InstanceFromCode
+    mov     rSELF, %ecx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    SET_VREG %eax rINST
+    SET_VREG_HIGH %edx rINST
+    REFRESH_IBASE_FROM_SELF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iget_wide_quick.S b/runtime/interpreter/mterp/x86/op_iget_wide_quick.S
new file mode 100644
index 0000000..8be336b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_wide_quick.S
@@ -0,0 +1,11 @@
+    /* iget-wide-quick vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    movq    (%ecx,%eax,1), %xmm0
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    SET_WIDE_FP_VREG %xmm0 rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_instance_of.S b/runtime/interpreter/mterp/x86/op_instance_of.S
new file mode 100644
index 0000000..cfbd4a0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_instance_of.S
@@ -0,0 +1,26 @@
+/*
+ * Check to see if an object reference is an instance of a class.
+ *
+ * Most common situation is a non-null object, being compared against
+ * an already-resolved class.
+ */
+    /* instance-of vA, vB, class@CCCC */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, %eax                     # eax <- BA
+    sarl    $$4, %eax                       # eax <- B
+    GET_VREG %ecx %eax                      # Get object
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    OFF_FP_METHOD(rFP),%eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    MterpInstanceOf                 # (index, obj, method, self)
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    andb    $$0xf, rINSTbl                  # rINSTbl <- A
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_int_to_byte.S b/runtime/interpreter/mterp/x86/op_int_to_byte.S
new file mode 100644
index 0000000..b4e8d22
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_int_to_byte.S
@@ -0,0 +1 @@
+%include "x86/unop.S" {"instr":"movsbl  %al, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_int_to_char.S b/runtime/interpreter/mterp/x86/op_int_to_char.S
new file mode 100644
index 0000000..4608971
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_int_to_char.S
@@ -0,0 +1 @@
+%include "x86/unop.S" {"instr":"movzwl  %ax,%eax"}
diff --git a/runtime/interpreter/mterp/x86/op_int_to_double.S b/runtime/interpreter/mterp/x86/op_int_to_double.S
new file mode 100644
index 0000000..3e9921e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_int_to_double.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"load":"fildl","store":"fstpl","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_int_to_float.S b/runtime/interpreter/mterp/x86/op_int_to_float.S
new file mode 100644
index 0000000..849540d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_int_to_float.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"load":"fildl","store":"fstps"}
diff --git a/runtime/interpreter/mterp/x86/op_int_to_long.S b/runtime/interpreter/mterp/x86/op_int_to_long.S
new file mode 100644
index 0000000..736ea69
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_int_to_long.S
@@ -0,0 +1,12 @@
+    /* int to long vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- +A
+    sarl    $$4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    movl    rIBASE, %ecx                    # cltd trashes rIBASE/edx
+    cltd                                    # rINST:eax<- sssssssBBBBBBBB
+    SET_VREG_HIGH rIBASE rINST              # v[A+1] <- rIBASE
+    SET_VREG %eax rINST                     # v[A+0] <- %eax
+    movl    %ecx, rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
diff --git a/runtime/interpreter/mterp/x86/op_int_to_short.S b/runtime/interpreter/mterp/x86/op_int_to_short.S
new file mode 100644
index 0000000..90d0ae6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_int_to_short.S
@@ -0,0 +1 @@
+%include "x86/unop.S" {"instr":"movswl %ax, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_invoke_direct.S b/runtime/interpreter/mterp/x86/op_invoke_direct.S
new file mode 100644
index 0000000..76fb9a6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_direct.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeDirect" }
diff --git a/runtime/interpreter/mterp/x86/op_invoke_direct_range.S b/runtime/interpreter/mterp/x86/op_invoke_direct_range.S
new file mode 100644
index 0000000..a6ab604
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_direct_range.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeDirectRange" }
diff --git a/runtime/interpreter/mterp/x86/op_invoke_interface.S b/runtime/interpreter/mterp/x86/op_invoke_interface.S
new file mode 100644
index 0000000..91c24f5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_interface.S
@@ -0,0 +1,8 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeInterface" }
+/*
+ * Handle an interface method call.
+ *
+ * for: invoke-interface, invoke-interface/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/x86/op_invoke_interface_range.S b/runtime/interpreter/mterp/x86/op_invoke_interface_range.S
new file mode 100644
index 0000000..e478beb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_interface_range.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeInterfaceRange" }
diff --git a/runtime/interpreter/mterp/x86/op_invoke_static.S b/runtime/interpreter/mterp/x86/op_invoke_static.S
new file mode 100644
index 0000000..b4c1236
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_static.S
@@ -0,0 +1,2 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeStatic" }
+
diff --git a/runtime/interpreter/mterp/x86/op_invoke_static_range.S b/runtime/interpreter/mterp/x86/op_invoke_static_range.S
new file mode 100644
index 0000000..3dc8a26
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_static_range.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeStaticRange" }
diff --git a/runtime/interpreter/mterp/x86/op_invoke_super.S b/runtime/interpreter/mterp/x86/op_invoke_super.S
new file mode 100644
index 0000000..be20edd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_super.S
@@ -0,0 +1,8 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeSuper" }
+/*
+ * Handle a "super" method call.
+ *
+ * for: invoke-super, invoke-super/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/x86/op_invoke_super_range.S b/runtime/interpreter/mterp/x86/op_invoke_super_range.S
new file mode 100644
index 0000000..f36bf72
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_super_range.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeSuperRange" }
diff --git a/runtime/interpreter/mterp/x86/op_invoke_virtual.S b/runtime/interpreter/mterp/x86/op_invoke_virtual.S
new file mode 100644
index 0000000..7e9c456
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_virtual.S
@@ -0,0 +1,8 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeVirtual" }
+/*
+ * Handle a virtual method call.
+ *
+ * for: invoke-virtual, invoke-virtual/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/x86/op_invoke_virtual_quick.S b/runtime/interpreter/mterp/x86/op_invoke_virtual_quick.S
new file mode 100644
index 0000000..2dc9ab6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_virtual_quick.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeVirtualQuick" }
diff --git a/runtime/interpreter/mterp/x86/op_invoke_virtual_range.S b/runtime/interpreter/mterp/x86/op_invoke_virtual_range.S
new file mode 100644
index 0000000..d1d20d2
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_virtual_range.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeVirtualRange" }
diff --git a/runtime/interpreter/mterp/x86/op_invoke_virtual_range_quick.S b/runtime/interpreter/mterp/x86/op_invoke_virtual_range_quick.S
new file mode 100644
index 0000000..21bfc55
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_virtual_range_quick.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeVirtualQuickRange" }
diff --git a/runtime/interpreter/mterp/x86/op_iput.S b/runtime/interpreter/mterp/x86/op_iput.S
new file mode 100644
index 0000000..f8a6549
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput.S
@@ -0,0 +1,25 @@
+%default { "handler":"artSet32InstanceFromMterp" }
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern $handler
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $$4, %ecx                       # ecx<- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $$0xf, rINSTbl                  # rINST<- A
+    GET_VREG %eax, rINST
+    movl    %eax, OUT_ARG2(%esp)            # fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    $handler
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_boolean.S b/runtime/interpreter/mterp/x86/op_iput_boolean.S
new file mode 100644
index 0000000..11cab88
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_boolean.S
@@ -0,0 +1 @@
+%include "x86/op_iput.S" { "handler":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_boolean_quick.S b/runtime/interpreter/mterp/x86/op_iput_boolean_quick.S
new file mode 100644
index 0000000..93865de
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_boolean_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iput_quick.S" { "reg":"rINSTbl", "store":"movb" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_byte.S b/runtime/interpreter/mterp/x86/op_iput_byte.S
new file mode 100644
index 0000000..11cab88
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_byte.S
@@ -0,0 +1 @@
+%include "x86/op_iput.S" { "handler":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_byte_quick.S b/runtime/interpreter/mterp/x86/op_iput_byte_quick.S
new file mode 100644
index 0000000..93865de
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_byte_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iput_quick.S" { "reg":"rINSTbl", "store":"movb" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_char.S b/runtime/interpreter/mterp/x86/op_iput_char.S
new file mode 100644
index 0000000..abbf2bd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_char.S
@@ -0,0 +1 @@
+%include "x86/op_iput.S" { "handler":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_char_quick.S b/runtime/interpreter/mterp/x86/op_iput_char_quick.S
new file mode 100644
index 0000000..4ec8029
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_char_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iput_quick.S" { "reg":"rINSTw", "store":"movw" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_object.S b/runtime/interpreter/mterp/x86/op_iput_object.S
new file mode 100644
index 0000000..20d57aa
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_object.S
@@ -0,0 +1,13 @@
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    MterpIputObject
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_object_quick.S b/runtime/interpreter/mterp/x86/op_iput_object_quick.S
new file mode 100644
index 0000000..4c7f4bd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_object_quick.S
@@ -0,0 +1,11 @@
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_ARG2(%esp)
+    call    MterpIputObjectQuick
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_quick.S b/runtime/interpreter/mterp/x86/op_iput_quick.S
new file mode 100644
index 0000000..e2f7caf
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_quick.S
@@ -0,0 +1,13 @@
+%default { "reg":"rINST", "store":"movl" }
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST rINST                    # rINST <- v[A]
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    ${store}    ${reg}, (%ecx,%eax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_short.S b/runtime/interpreter/mterp/x86/op_iput_short.S
new file mode 100644
index 0000000..abbf2bd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_short.S
@@ -0,0 +1 @@
+%include "x86/op_iput.S" { "handler":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_short_quick.S b/runtime/interpreter/mterp/x86/op_iput_short_quick.S
new file mode 100644
index 0000000..4ec8029
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_short_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iput_quick.S" { "reg":"rINSTw", "store":"movw" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_wide.S b/runtime/interpreter/mterp/x86/op_iput_wide.S
new file mode 100644
index 0000000..92cb770
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_wide.S
@@ -0,0 +1,19 @@
+    /* iput-wide vA, vB, field@CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl,%ecx                    # ecx <- BA
+    sarl    $$4,%ecx                        # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $$0xf,rINSTbl                   # rINST <- A
+    leal    VREG_ADDRESS(rINST), %eax
+    movl    %eax, OUT_ARG2(%esp)            # &fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    artSet64InstanceFromMterp
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_wide_quick.S b/runtime/interpreter/mterp/x86/op_iput_wide_quick.S
new file mode 100644
index 0000000..72285c5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_wide_quick.S
@@ -0,0 +1,12 @@
+    /* iput-wide-quick vA, vB, offset@CCCC */
+    movzbl    rINSTbl, %ecx                 # ecx<- BA
+    sarl      $$4, %ecx                     # ecx<- B
+    GET_VREG  %ecx %ecx                     # vB (object we're operating on)
+    testl     %ecx, %ecx                    # is object null?
+    je        common_errNullObject
+    movzwl    2(rPC), %eax                  # eax<- field byte offset
+    leal      (%ecx,%eax,1), %ecx           # ecx<- Address of 64-bit target
+    andb      $$0xf, rINSTbl                # rINST<- A
+    GET_WIDE_FP_VREG %xmm0 rINST            # xmm0<- fp[A]/fp[A+1]
+    movq      %xmm0, (%ecx)                 # obj.field<- r0/r1
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_long_to_double.S b/runtime/interpreter/mterp/x86/op_long_to_double.S
new file mode 100644
index 0000000..2c7f905
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_long_to_double.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"load":"fildll","store":"fstpl","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_long_to_float.S b/runtime/interpreter/mterp/x86/op_long_to_float.S
new file mode 100644
index 0000000..e500e39
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_long_to_float.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"load":"fildll","store":"fstps"}
diff --git a/runtime/interpreter/mterp/x86/op_long_to_int.S b/runtime/interpreter/mterp/x86/op_long_to_int.S
new file mode 100644
index 0000000..1c39b96
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_long_to_int.S
@@ -0,0 +1,2 @@
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+%include "x86/op_move.S"
diff --git a/runtime/interpreter/mterp/x86/op_monitor_enter.S b/runtime/interpreter/mterp/x86/op_monitor_enter.S
new file mode 100644
index 0000000..8236fb3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_monitor_enter.S
@@ -0,0 +1,14 @@
+/*
+ * Synchronize on an object.
+ */
+    /* monitor-enter vAA */
+    EXPORT_PC
+    GET_VREG %ecx rINST
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    artLockObjectFromCode           # (object, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_monitor_exit.S b/runtime/interpreter/mterp/x86/op_monitor_exit.S
new file mode 100644
index 0000000..56d4eb3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_monitor_exit.S
@@ -0,0 +1,18 @@
+/*
+ * Unlock an object.
+ *
+ * Exceptions that occur when unlocking a monitor need to appear as
+ * if they happened at the following instruction.  See the Dalvik
+ * instruction spec.
+ */
+    /* monitor-exit vAA */
+    EXPORT_PC
+    GET_VREG %ecx rINST
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    artUnlockObjectFromCode         # (object, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_move.S b/runtime/interpreter/mterp/x86/op_move.S
new file mode 100644
index 0000000..0a531be
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move.S
@@ -0,0 +1,13 @@
+%default { "is_object":"0" }
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- BA
+    andb    $$0xf, %al                      # eax <- A
+    shrl    $$4, rINST                      # rINST <- B
+    GET_VREG rINST rINST
+    .if $is_object
+    SET_VREG_OBJECT rINST %eax              # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST %eax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_move_16.S b/runtime/interpreter/mterp/x86/op_move_16.S
new file mode 100644
index 0000000..0773f41
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_16.S
@@ -0,0 +1,12 @@
+%default { "is_object":"0" }
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    movzwl  4(rPC), %ecx                    # ecx <- BBBB
+    movzwl  2(rPC), %eax                    # eax <- AAAA
+    GET_VREG rINST %ecx
+    .if $is_object
+    SET_VREG_OBJECT rINST %eax              # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST %eax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_move_exception.S b/runtime/interpreter/mterp/x86/op_move_exception.S
new file mode 100644
index 0000000..e37cdfa
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_exception.S
@@ -0,0 +1,6 @@
+    /* move-exception vAA */
+    movl    rSELF, %ecx
+    movl    THREAD_EXCEPTION_OFFSET(%ecx), %eax
+    SET_VREG_OBJECT %eax rINST              # fp[AA] <- exception object
+    movl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_move_from16.S b/runtime/interpreter/mterp/x86/op_move_from16.S
new file mode 100644
index 0000000..623a4d3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_from16.S
@@ -0,0 +1,12 @@
+%default { "is_object":"0" }
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    movzx   rINSTbl, %eax                   # eax <- AA
+    movw    2(rPC), rINSTw                  # rINSTw <- BBBB
+    GET_VREG rINST rINST                    # rINST <- fp[BBBB]
+    .if $is_object
+    SET_VREG_OBJECT rINST %eax              # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST %eax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_move_object.S b/runtime/interpreter/mterp/x86/op_move_object.S
new file mode 100644
index 0000000..a6a7c90
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_object.S
@@ -0,0 +1 @@
+%include "x86/op_move.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_move_object_16.S b/runtime/interpreter/mterp/x86/op_move_object_16.S
new file mode 100644
index 0000000..e0c8527
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_object_16.S
@@ -0,0 +1 @@
+%include "x86/op_move_16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_move_object_from16.S b/runtime/interpreter/mterp/x86/op_move_object_from16.S
new file mode 100644
index 0000000..e623820
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_object_from16.S
@@ -0,0 +1 @@
+%include "x86/op_move_from16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_move_result.S b/runtime/interpreter/mterp/x86/op_move_result.S
new file mode 100644
index 0000000..414f2cb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_result.S
@@ -0,0 +1,11 @@
+%default { "is_object":"0" }
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    movl    OFF_FP_RESULT_REGISTER(rFP), %eax    # get pointer to result JType.
+    movl    (%eax), %eax                    # r0 <- result.i.
+    .if $is_object
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- fp[B]
+    .else
+    SET_VREG %eax rINST                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_move_result_object.S b/runtime/interpreter/mterp/x86/op_move_result_object.S
new file mode 100644
index 0000000..cbf5e1d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_result_object.S
@@ -0,0 +1 @@
+%include "x86/op_move_result.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_move_result_wide.S b/runtime/interpreter/mterp/x86/op_move_result_wide.S
new file mode 100644
index 0000000..0c1683b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_result_wide.S
@@ -0,0 +1,7 @@
+    /* move-result-wide vAA */
+    movl    OFF_FP_RESULT_REGISTER(rFP), %eax    # get pointer to result JType.
+    movl    4(%eax), %ecx                   # Get high
+    movl    (%eax), %eax                    # Get low
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    SET_VREG_HIGH %ecx rINST                # v[AA+1] <- ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_move_wide.S b/runtime/interpreter/mterp/x86/op_move_wide.S
new file mode 100644
index 0000000..9c0e985
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_wide.S
@@ -0,0 +1,8 @@
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, rINST                      # rINST <- B
+    andb    $$0xf, %cl                      # ecx <- A
+    GET_WIDE_FP_VREG %xmm0 rINST            # xmm0 <- v[B]
+    SET_WIDE_FP_VREG %xmm0 %ecx             # v[A] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_move_wide_16.S b/runtime/interpreter/mterp/x86/op_move_wide_16.S
new file mode 100644
index 0000000..7522c27
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_wide_16.S
@@ -0,0 +1,7 @@
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzwl  4(rPC), %ecx                    # ecx<- BBBB
+    movzwl  2(rPC), %eax                    # eax<- AAAA
+    GET_WIDE_FP_VREG %xmm0 %ecx             # xmm0 <- v[B]
+    SET_WIDE_FP_VREG %xmm0 %eax             # v[A] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_move_wide_from16.S b/runtime/interpreter/mterp/x86/op_move_wide_from16.S
new file mode 100644
index 0000000..5ad2cb4
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_wide_from16.S
@@ -0,0 +1,7 @@
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzwl  2(rPC), %ecx                    # ecx <- BBBB
+    movzbl  rINSTbl, %eax                   # eax <- AAAA
+    GET_WIDE_FP_VREG %xmm0 %ecx             # xmm0 <- v[B]
+    SET_WIDE_FP_VREG %xmm0 %eax             # v[A] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_mul_double.S b/runtime/interpreter/mterp/x86/op_mul_double.S
new file mode 100644
index 0000000..7cef4c0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_double.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"muls","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_mul_double_2addr.S b/runtime/interpreter/mterp/x86/op_mul_double_2addr.S
new file mode 100644
index 0000000..bb722b6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_double_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"muls","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_mul_float.S b/runtime/interpreter/mterp/x86/op_mul_float.S
new file mode 100644
index 0000000..1156230
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_float.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"muls","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_mul_float_2addr.S b/runtime/interpreter/mterp/x86/op_mul_float_2addr.S
new file mode 100644
index 0000000..e9316df
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_float_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"muls","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_mul_int.S b/runtime/interpreter/mterp/x86/op_mul_int.S
new file mode 100644
index 0000000..a367ab7
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_int.S
@@ -0,0 +1,12 @@
+    /*
+     * 32-bit binary multiplication.
+     */
+    /* mul vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    mov     rIBASE, LOCAL0(%esp)
+    imull   (rFP,%ecx,4), %eax              # trashes rIBASE/edx
+    mov     LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_mul_int_2addr.S b/runtime/interpreter/mterp/x86/op_mul_int_2addr.S
new file mode 100644
index 0000000..6005075
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_int_2addr.S
@@ -0,0 +1,10 @@
+    /* mul vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $$0xf, %cl                      # ecx <- A
+    mov     rIBASE, LOCAL0(%esp)
+    imull   (rFP,%ecx,4), %eax              # trashes rIBASE/edx
+    mov     LOCAL0(%esp), rIBASE
+    SET_VREG %eax %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_mul_int_lit16.S b/runtime/interpreter/mterp/x86/op_mul_int_lit16.S
new file mode 100644
index 0000000..1c0fde3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_int_lit16.S
@@ -0,0 +1,12 @@
+    /* mul/lit16 vA, vB, #+CCCC */
+    /* Need A in rINST, ssssCCCC in ecx, vB in eax */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $$4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    mov     rIBASE, LOCAL0(%esp)
+    imull   %ecx, %eax                      # trashes rIBASE/edx
+    mov     LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_mul_int_lit8.S b/runtime/interpreter/mterp/x86/op_mul_int_lit8.S
new file mode 100644
index 0000000..4d7a22d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_int_lit8.S
@@ -0,0 +1,9 @@
+    /* mul/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG  %eax  %eax                    # eax <- rBB
+    mov     rIBASE, LOCAL0(%esp)
+    imull   %ecx, %eax                      # trashes rIBASE/edx
+    mov     LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_mul_long.S b/runtime/interpreter/mterp/x86/op_mul_long.S
new file mode 100644
index 0000000..3746e41
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_long.S
@@ -0,0 +1,33 @@
+/*
+ * Signed 64-bit integer multiply.
+ *
+ * We could definately use more free registers for
+ * this code.   We spill rINSTw (ebx),
+ * giving us eax, ebc, ecx and edx as computational
+ * temps.  On top of that, we'll spill edi (rFP)
+ * for use as the vB pointer and esi (rPC) for use
+ * as the vC pointer.  Yuck.
+ *
+ */
+    /* mul-long vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- B
+    movzbl  3(rPC), %ecx                    # ecx <- C
+    mov     rPC, LOCAL0(%esp)               # save Interpreter PC
+    mov     rFP, LOCAL1(%esp)               # save FP
+    mov     rIBASE, LOCAL2(%esp)            # save rIBASE
+    leal    (rFP,%eax,4), %esi              # esi <- &v[B]
+    leal    (rFP,%ecx,4), rFP               # rFP <- &v[C]
+    movl    4(%esi), %ecx                   # ecx <- Bmsw
+    imull   (rFP), %ecx                     # ecx <- (Bmsw*Clsw)
+    movl    4(rFP), %eax                    # eax <- Cmsw
+    imull   (%esi), %eax                    # eax <- (Cmsw*Blsw)
+    addl    %eax, %ecx                      # ecx <- (Bmsw*Clsw)+(Cmsw*Blsw)
+    movl    (rFP), %eax                     # eax <- Clsw
+    mull    (%esi)                          # eax <- (Clsw*Alsw)
+    mov     LOCAL0(%esp), rPC               # restore Interpreter PC
+    mov     LOCAL1(%esp), rFP               # restore FP
+    leal    (%ecx,rIBASE), rIBASE           # full result now in rIBASE:%eax
+    SET_VREG_HIGH rIBASE rINST              # v[B+1] <- rIBASE
+    mov     LOCAL2(%esp), rIBASE            # restore IBASE
+    SET_VREG %eax rINST                     # v[B] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_mul_long_2addr.S b/runtime/interpreter/mterp/x86/op_mul_long_2addr.S
new file mode 100644
index 0000000..565a57c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_long_2addr.S
@@ -0,0 +1,35 @@
+/*
+ * Signed 64-bit integer multiply, 2-addr version
+ *
+ * We could definately use more free registers for
+ * this code.  We must spill %edx (rIBASE) because it
+ * is used by imul.  We'll also spill rINST (ebx),
+ * giving us eax, ebc, ecx and rIBASE as computational
+ * temps.  On top of that, we'll spill %esi (edi)
+ * for use as the vA pointer and rFP (esi) for use
+ * as the vB pointer.  Yuck.
+ */
+    /* mul-long/2addr vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- BA
+    andb    $$0xf, %al                      # eax <- A
+    CLEAR_WIDE_REF %eax                     # clear refs in advance
+    sarl    $$4, rINST                      # rINST <- B
+    mov     rPC, LOCAL0(%esp)               # save Interpreter PC
+    mov     rFP, LOCAL1(%esp)               # save FP
+    mov     rIBASE, LOCAL2(%esp)            # save rIBASE
+    leal    (rFP,%eax,4), %esi              # esi <- &v[A]
+    leal    (rFP,rINST,4), rFP              # rFP <- &v[B]
+    movl    4(%esi), %ecx                   # ecx <- Amsw
+    imull   (rFP), %ecx                     # ecx <- (Amsw*Blsw)
+    movl    4(rFP), %eax                    # eax <- Bmsw
+    imull   (%esi), %eax                    # eax <- (Bmsw*Alsw)
+    addl    %eax, %ecx                      # ecx <- (Amsw*Blsw)+(Bmsw*Alsw)
+    movl    (rFP), %eax                     # eax <- Blsw
+    mull    (%esi)                          # eax <- (Blsw*Alsw)
+    leal    (%ecx,rIBASE), rIBASE           # full result now in %edx:%eax
+    movl    rIBASE, 4(%esi)                 # v[A+1] <- rIBASE
+    movl    %eax, (%esi)                    # v[A] <- %eax
+    mov     LOCAL0(%esp), rPC               # restore Interpreter PC
+    mov     LOCAL2(%esp), rIBASE            # restore IBASE
+    mov     LOCAL1(%esp), rFP               # restore FP
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_neg_double.S b/runtime/interpreter/mterp/x86/op_neg_double.S
new file mode 100644
index 0000000..fac4322
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_neg_double.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"instr":"fchs","load":"fldl","store":"fstpl","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_neg_float.S b/runtime/interpreter/mterp/x86/op_neg_float.S
new file mode 100644
index 0000000..30f071b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_neg_float.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"instr":"fchs","load":"flds","store":"fstps"}
diff --git a/runtime/interpreter/mterp/x86/op_neg_int.S b/runtime/interpreter/mterp/x86/op_neg_int.S
new file mode 100644
index 0000000..67d4d18
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_neg_int.S
@@ -0,0 +1 @@
+%include "x86/unop.S" {"instr":"negl    %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_neg_long.S b/runtime/interpreter/mterp/x86/op_neg_long.S
new file mode 100644
index 0000000..7cc17f0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_neg_long.S
@@ -0,0 +1,13 @@
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax %ecx                      # eax <- v[B+0]
+    GET_VREG_HIGH %ecx %ecx                 # ecx <- v[B+1]
+    negl    %eax
+    adcl    $$0, %ecx
+    negl    %ecx
+    SET_VREG %eax rINST                     # v[A+0] <- eax
+    SET_VREG_HIGH %ecx rINST                # v[A+1] <- ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
diff --git a/runtime/interpreter/mterp/x86/op_new_array.S b/runtime/interpreter/mterp/x86/op_new_array.S
new file mode 100644
index 0000000..6852183
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_new_array.S
@@ -0,0 +1,21 @@
+/*
+ * Allocate an array of objects, specified with the array class
+ * and a count.
+ *
+ * The verifier guarantees that this is an array class, so we don't
+ * check for it here.
+ */
+    /* new-array vA, vB, class@CCCC */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    MterpNewArray
+    REFRESH_IBASE
+    testl   %eax, %eax                      # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_new_instance.S b/runtime/interpreter/mterp/x86/op_new_instance.S
new file mode 100644
index 0000000..a3632e8
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_new_instance.S
@@ -0,0 +1,16 @@
+/*
+ * Create a new instance of a class.
+ */
+    /* new-instance vAA, class@BBBB */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_ARG2(%esp)
+    call    MterpNewInstance
+    REFRESH_IBASE
+    testl   %eax, %eax                 # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_nop.S b/runtime/interpreter/mterp/x86/op_nop.S
new file mode 100644
index 0000000..4cb68e3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_nop.S
@@ -0,0 +1 @@
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_not_int.S b/runtime/interpreter/mterp/x86/op_not_int.S
new file mode 100644
index 0000000..335ab09
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_not_int.S
@@ -0,0 +1 @@
+%include "x86/unop.S" {"instr":"notl %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_not_long.S b/runtime/interpreter/mterp/x86/op_not_long.S
new file mode 100644
index 0000000..55666a1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_not_long.S
@@ -0,0 +1,11 @@
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax %ecx                      # eax <- v[B+0]
+    GET_VREG_HIGH %ecx %ecx                 # ecx <- v[B+1]
+    notl    %eax
+    notl    %ecx
+    SET_VREG %eax rINST                     # v[A+0] <- eax
+    SET_VREG_HIGH %ecx rINST                # v[A+1] <- ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_or_int.S b/runtime/interpreter/mterp/x86/op_or_int.S
new file mode 100644
index 0000000..ebe2ec2
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_or_int.S
@@ -0,0 +1 @@
+%include "x86/binop.S" {"instr":"orl     (rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_or_int_2addr.S b/runtime/interpreter/mterp/x86/op_or_int_2addr.S
new file mode 100644
index 0000000..36c17db
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_or_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/binop2addr.S" {"instr":"orl     %eax, (rFP,%ecx,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_or_int_lit16.S b/runtime/interpreter/mterp/x86/op_or_int_lit16.S
new file mode 100644
index 0000000..0a88ff59
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_or_int_lit16.S
@@ -0,0 +1 @@
+%include "x86/binopLit16.S" {"instr":"orl     %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_or_int_lit8.S b/runtime/interpreter/mterp/x86/op_or_int_lit8.S
new file mode 100644
index 0000000..0670b67
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_or_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"orl     %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_or_long.S b/runtime/interpreter/mterp/x86/op_or_long.S
new file mode 100644
index 0000000..09ca539
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_or_long.S
@@ -0,0 +1 @@
+%include "x86/binopWide.S" {"instr1":"orl     (rFP,%ecx,4), rIBASE", "instr2":"orl     4(rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_or_long_2addr.S b/runtime/interpreter/mterp/x86/op_or_long_2addr.S
new file mode 100644
index 0000000..2062e81
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_or_long_2addr.S
@@ -0,0 +1 @@
+%include "x86/binopWide2addr.S" {"instr1":"orl     %eax, (rFP,rINST,4)","instr2":"orl     %ecx, 4(rFP,rINST,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_packed_switch.S b/runtime/interpreter/mterp/x86/op_packed_switch.S
new file mode 100644
index 0000000..4e39a48
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_packed_switch.S
@@ -0,0 +1,29 @@
+%default { "func":"MterpDoPackedSwitch" }
+/*
+ * Handle a packed-switch or sparse-switch instruction.  In both cases
+ * we decode it and hand it off to a helper function.
+ *
+ * We don't really expect backward branches in a switch statement, but
+ * they're perfectly legal, so we check for them here.
+ *
+ * for: packed-switch, sparse-switch
+ */
+    /* op vAA, +BBBB */
+    movl    2(rPC), %ecx                    # ecx <- BBBBbbbb
+    GET_VREG %eax rINST                     # eax <- vAA
+    leal    (rPC,%ecx,2), %ecx              # ecx <- PC + BBBBbbbb*2
+    movl    %eax, OUT_ARG1(%esp)            # ARG1 <- vAA
+    movl    %ecx, OUT_ARG0(%esp)            # ARG0 <- switchData
+    call    $func
+    addl    %eax, %eax
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    REFRESH_IBASE
+    jg      1f
+#if MTERP_SUSPEND
+    #     REFRESH_IBASE - we did it above.
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+1:
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86/op_rem_double.S b/runtime/interpreter/mterp/x86/op_rem_double.S
new file mode 100644
index 0000000..4b52a06
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_double.S
@@ -0,0 +1,14 @@
+    /* rem_double vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx <- BB
+    movzbl  2(rPC), %eax                    # eax <- CC
+    fldl    VREG_ADDRESS(%ecx)              # %st1 <- fp[vBB]
+    fldl    VREG_ADDRESS(%eax)              # %st0 <- fp[vCC]
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstpl   VREG_ADDRESS(rINST)             # fp[vAA] <- %st
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_rem_double_2addr.S b/runtime/interpreter/mterp/x86/op_rem_double_2addr.S
new file mode 100644
index 0000000..5a0e669
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_double_2addr.S
@@ -0,0 +1,15 @@
+    /* rem_double/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    fldl    VREG_ADDRESS(rINST)             # vB to fp stack
+    andb    $$0xf, %cl                      # ecx <- A
+    fldl    VREG_ADDRESS(%ecx)              # vA to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstpl   VREG_ADDRESS(%ecx)              # %st to vA
+    CLEAR_WIDE_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_rem_float.S b/runtime/interpreter/mterp/x86/op_rem_float.S
new file mode 100644
index 0000000..05e0bf1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_float.S
@@ -0,0 +1,14 @@
+    /* rem_float vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx <- BB
+    movzbl  2(rPC), %eax                    # eax <- CC
+    flds    VREG_ADDRESS(%ecx)              # vBB to fp stack
+    flds    VREG_ADDRESS(%eax)              # vCC to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstps   VREG_ADDRESS(rINST)             # %st to vAA
+    CLEAR_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_rem_float_2addr.S b/runtime/interpreter/mterp/x86/op_rem_float_2addr.S
new file mode 100644
index 0000000..29f84e6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_float_2addr.S
@@ -0,0 +1,15 @@
+    /* rem_float/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    flds    VREG_ADDRESS(rINST)             # vB to fp stack
+    andb    $$0xf, %cl                      # ecx <- A
+    flds    VREG_ADDRESS(%ecx)              # vA to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstps   VREG_ADDRESS(%ecx)              # %st to vA
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_rem_int.S b/runtime/interpreter/mterp/x86/op_rem_int.S
new file mode 100644
index 0000000..d25b93c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_int.S
@@ -0,0 +1 @@
+%include "x86/bindiv.S" {"result":"rIBASE","special":"$0","rem":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_rem_int_2addr.S b/runtime/interpreter/mterp/x86/op_rem_int_2addr.S
new file mode 100644
index 0000000..c788e0e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/bindiv2addr.S" {"result":"rIBASE","special":"$0"}
diff --git a/runtime/interpreter/mterp/x86/op_rem_int_lit16.S b/runtime/interpreter/mterp/x86/op_rem_int_lit16.S
new file mode 100644
index 0000000..3df9d39
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_int_lit16.S
@@ -0,0 +1 @@
+%include "x86/bindivLit16.S" {"result":"rIBASE","special":"$0"}
diff --git a/runtime/interpreter/mterp/x86/op_rem_int_lit8.S b/runtime/interpreter/mterp/x86/op_rem_int_lit8.S
new file mode 100644
index 0000000..56e19c6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/bindivLit8.S" {"result":"rIBASE","special":"$0"}
diff --git a/runtime/interpreter/mterp/x86/op_rem_long.S b/runtime/interpreter/mterp/x86/op_rem_long.S
new file mode 100644
index 0000000..0ffe1f6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_long.S
@@ -0,0 +1 @@
+%include "x86/op_div_long.S" {"routine":"art_quick_lmod"}
diff --git a/runtime/interpreter/mterp/x86/op_rem_long_2addr.S b/runtime/interpreter/mterp/x86/op_rem_long_2addr.S
new file mode 100644
index 0000000..4b97735
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_long_2addr.S
@@ -0,0 +1 @@
+%include "x86/op_div_long_2addr.S" {"routine":"art_quick_lmod"}
diff --git a/runtime/interpreter/mterp/x86/op_return.S b/runtime/interpreter/mterp/x86/op_return.S
new file mode 100644
index 0000000..1658322
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_return.S
@@ -0,0 +1,11 @@
+/*
+ * Return a 32-bit value.
+ *
+ * for: return, return-object
+ */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    call    MterpThreadFenceForConstructor
+    GET_VREG %eax rINST                     # eax <- vAA
+    xorl    %ecx, %ecx
+    jmp     MterpReturn
diff --git a/runtime/interpreter/mterp/x86/op_return_object.S b/runtime/interpreter/mterp/x86/op_return_object.S
new file mode 100644
index 0000000..12c84b3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_return_object.S
@@ -0,0 +1 @@
+%include "x86/op_return.S"
diff --git a/runtime/interpreter/mterp/x86/op_return_void.S b/runtime/interpreter/mterp/x86/op_return_void.S
new file mode 100644
index 0000000..b74446e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_return_void.S
@@ -0,0 +1,5 @@
+    .extern MterpThreadFenceForConstructor
+    call    MterpThreadFenceForConstructor
+    xorl    %eax, %eax
+    xorl    %ecx, %ecx
+    jmp     MterpReturn
diff --git a/runtime/interpreter/mterp/x86/op_return_void_no_barrier.S b/runtime/interpreter/mterp/x86/op_return_void_no_barrier.S
new file mode 100644
index 0000000..abc7c4d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_return_void_no_barrier.S
@@ -0,0 +1,3 @@
+    xorl    %eax, %eax
+    xorl    %ecx, %ecx
+    jmp     MterpReturn
diff --git a/runtime/interpreter/mterp/x86/op_return_wide.S b/runtime/interpreter/mterp/x86/op_return_wide.S
new file mode 100644
index 0000000..00effd6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_return_wide.S
@@ -0,0 +1,9 @@
+/*
+ * Return a 64-bit value.
+ */
+    /* return-wide vAA */
+    .extern MterpThreadFenceForConstructor
+    call    MterpThreadFenceForConstructor
+    GET_VREG %eax rINST                     # eax <- v[AA+0]
+    GET_VREG_HIGH %ecx rINST                # ecx <- v[AA+1]
+    jmp     MterpReturn
diff --git a/runtime/interpreter/mterp/x86/op_rsub_int.S b/runtime/interpreter/mterp/x86/op_rsub_int.S
new file mode 100644
index 0000000..d6449c6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rsub_int.S
@@ -0,0 +1,2 @@
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+%include "x86/binopLit16.S" {"instr":"subl    %eax, %ecx","result":"%ecx"}
diff --git a/runtime/interpreter/mterp/x86/op_rsub_int_lit8.S b/runtime/interpreter/mterp/x86/op_rsub_int_lit8.S
new file mode 100644
index 0000000..15d0e35
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rsub_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"subl    %eax, %ecx" , "result":"%ecx"}
diff --git a/runtime/interpreter/mterp/x86/op_sget.S b/runtime/interpreter/mterp/x86/op_sget.S
new file mode 100644
index 0000000..ed5aedf
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sget.S
@@ -0,0 +1,26 @@
+%default { "is_object":"0", "helper":"artGet32StaticFromCode" }
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+ */
+    /* op vAA, field@BBBB */
+    .extern $helper
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    $helper
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    .if $is_object
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sget_boolean.S b/runtime/interpreter/mterp/x86/op_sget_boolean.S
new file mode 100644
index 0000000..f058dd8
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sget_boolean.S
@@ -0,0 +1 @@
+%include "x86/op_sget.S" {"helper":"artGetBooleanStaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_byte.S b/runtime/interpreter/mterp/x86/op_sget_byte.S
new file mode 100644
index 0000000..c952f40
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sget_byte.S
@@ -0,0 +1 @@
+%include "x86/op_sget.S" {"helper":"artGetByteStaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_char.S b/runtime/interpreter/mterp/x86/op_sget_char.S
new file mode 100644
index 0000000..d7bd410
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sget_char.S
@@ -0,0 +1 @@
+%include "x86/op_sget.S" {"helper":"artGetCharStaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_object.S b/runtime/interpreter/mterp/x86/op_sget_object.S
new file mode 100644
index 0000000..1c95f9a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sget_object.S
@@ -0,0 +1 @@
+%include "x86/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_short.S b/runtime/interpreter/mterp/x86/op_sget_short.S
new file mode 100644
index 0000000..6475306
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sget_short.S
@@ -0,0 +1 @@
+%include "x86/op_sget.S" {"helper":"artGetShortStaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_wide.S b/runtime/interpreter/mterp/x86/op_sget_wide.S
new file mode 100644
index 0000000..76b993b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sget_wide.S
@@ -0,0 +1,21 @@
+/*
+ * SGET_WIDE handler wrapper.
+ *
+ */
+    /* sget-wide vAA, field@BBBB */
+    .extern artGet64StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    artGet64StaticFromCode
+    movl    rSELF, %ecx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    SET_VREG %eax rINST                     # fp[A]<- low part
+    SET_VREG_HIGH %edx rINST                # fp[A+1]<- high part
+    REFRESH_IBASE_FROM_SELF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_shl_int.S b/runtime/interpreter/mterp/x86/op_shl_int.S
new file mode 100644
index 0000000..6a41d1c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shl_int.S
@@ -0,0 +1 @@
+%include "x86/binop1.S" {"instr":"sall    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_shl_int_2addr.S b/runtime/interpreter/mterp/x86/op_shl_int_2addr.S
new file mode 100644
index 0000000..72abb8e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shl_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/shop2addr.S" {"instr":"sall    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_shl_int_lit8.S b/runtime/interpreter/mterp/x86/op_shl_int_lit8.S
new file mode 100644
index 0000000..b8d6069
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shl_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"sall    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_shl_long.S b/runtime/interpreter/mterp/x86/op_shl_long.S
new file mode 100644
index 0000000..56d13e3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shl_long.S
@@ -0,0 +1,29 @@
+/*
+ * Long integer shift.  This is different from the generic 32/64-bit
+ * binary operations because vAA/vBB are 64-bit but vCC (the shift
+ * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+ * 6 bits of the shift distance.  x86 shifts automatically mask off
+ * the low 5 bits of %cl, so have to handle the 64 > shiftcount > 31
+ * case specially.
+ */
+    /* shl-long vAA, vBB, vCC */
+    /* ecx gets shift count */
+    /* Need to spill rINST */
+    /* rINSTw gets AA */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE %eax               # ecx <- v[BB+1]
+    GET_VREG %ecx %ecx                      # ecx <- vCC
+    GET_VREG %eax %eax                      # eax <- v[BB+0]
+    shldl   %eax,rIBASE
+    sall    %cl, %eax
+    testb   $$32, %cl
+    je      2f
+    movl    %eax, rIBASE
+    xorl    %eax, %eax
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[AA+0] <- %eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_shl_long_2addr.S b/runtime/interpreter/mterp/x86/op_shl_long_2addr.S
new file mode 100644
index 0000000..5da873f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shl_long_2addr.S
@@ -0,0 +1,26 @@
+/*
+ * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+ * 32-bit shift distance.
+ */
+    /* shl-long/2addr vA, vB */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- v[AA+0]
+    sarl    $$4, %ecx                       # ecx <- B
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE rINST              # rIBASE <- v[AA+1]
+    GET_VREG %ecx %ecx                      # ecx <- vBB
+    shldl   %eax, rIBASE
+    sall    %cl, %eax
+    testb   $$32, %cl
+    je      2f
+    movl    %eax, rIBASE
+    xorl    %eax, %eax
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_shr_int.S b/runtime/interpreter/mterp/x86/op_shr_int.S
new file mode 100644
index 0000000..687b2c3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shr_int.S
@@ -0,0 +1 @@
+%include "x86/binop1.S" {"instr":"sarl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_shr_int_2addr.S b/runtime/interpreter/mterp/x86/op_shr_int_2addr.S
new file mode 100644
index 0000000..533b0e9
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shr_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/shop2addr.S" {"instr":"sarl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_shr_int_lit8.S b/runtime/interpreter/mterp/x86/op_shr_int_lit8.S
new file mode 100644
index 0000000..ebd1bea
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shr_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"sarl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_shr_long.S b/runtime/interpreter/mterp/x86/op_shr_long.S
new file mode 100644
index 0000000..4490a9a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shr_long.S
@@ -0,0 +1,29 @@
+/*
+ * Long integer shift.  This is different from the generic 32/64-bit
+ * binary operations because vAA/vBB are 64-bit but vCC (the shift
+ * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+ * 6 bits of the shift distance.  x86 shifts automatically mask off
+ * the low 5 bits of %cl, so have to handle the 64 > shiftcount > 31
+ * case specially.
+ */
+    /* shr-long vAA, vBB, vCC */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE %eax               # rIBASE<- v[BB+1]
+    GET_VREG %ecx %ecx                      # ecx <- vCC
+    GET_VREG %eax %eax                      # eax <- v[BB+0]
+    shrdl   rIBASE, %eax
+    sarl    %cl, rIBASE
+    testb   $$32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    sarl    $$31, rIBASE
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_shr_long_2addr.S b/runtime/interpreter/mterp/x86/op_shr_long_2addr.S
new file mode 100644
index 0000000..57494f9
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shr_long_2addr.S
@@ -0,0 +1,26 @@
+/*
+ * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+ * 32-bit shift distance.
+ */
+    /* shl-long/2addr vA, vB */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- v[AA+0]
+    sarl    $$4, %ecx                       # ecx <- B
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE rINST              # rIBASE <- v[AA+1]
+    GET_VREG %ecx %ecx                      # ecx <- vBB
+    shrdl   rIBASE, %eax
+    sarl    %cl, rIBASE
+    testb   $$32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    sarl    $$31, rIBASE
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_sparse_switch.S b/runtime/interpreter/mterp/x86/op_sparse_switch.S
new file mode 100644
index 0000000..fdaec47
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sparse_switch.S
@@ -0,0 +1 @@
+%include "x86/op_packed_switch.S" { "func":"MterpDoSparseSwitch" }
diff --git a/runtime/interpreter/mterp/x86/op_sput.S b/runtime/interpreter/mterp/x86/op_sput.S
new file mode 100644
index 0000000..04a8f23
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sput.S
@@ -0,0 +1,22 @@
+%default { "helper":"artSet32StaticFromCode"}
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern $helper
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    GET_VREG rINST rINST
+    movl    rINST, OUT_ARG1(%esp)           # fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    $helper
+    testl   %eax, %eax
+    jnz     MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sput_boolean.S b/runtime/interpreter/mterp/x86/op_sput_boolean.S
new file mode 100644
index 0000000..63601bd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sput_boolean.S
@@ -0,0 +1 @@
+%include "x86/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sput_byte.S b/runtime/interpreter/mterp/x86/op_sput_byte.S
new file mode 100644
index 0000000..63601bd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sput_byte.S
@@ -0,0 +1 @@
+%include "x86/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sput_char.S b/runtime/interpreter/mterp/x86/op_sput_char.S
new file mode 100644
index 0000000..1749f7c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sput_char.S
@@ -0,0 +1 @@
+%include "x86/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sput_object.S b/runtime/interpreter/mterp/x86/op_sput_object.S
new file mode 100644
index 0000000..0480e00
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sput_object.S
@@ -0,0 +1,13 @@
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    MterpSputObject
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sput_short.S b/runtime/interpreter/mterp/x86/op_sput_short.S
new file mode 100644
index 0000000..1749f7c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sput_short.S
@@ -0,0 +1 @@
+%include "x86/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sput_wide.S b/runtime/interpreter/mterp/x86/op_sput_wide.S
new file mode 100644
index 0000000..d58d5af
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sput_wide.S
@@ -0,0 +1,20 @@
+/*
+ * SPUT_WIDE handler wrapper.
+ *
+ */
+    /* sput-wide vAA, field@BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    leal    VREG_ADDRESS(rINST), %eax
+    movl    %eax, OUT_ARG2(%esp)            # &fp[AA]
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artSet64IndirectStaticFromMterp
+    testl   %eax, %eax
+    jnz     MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sub_double.S b/runtime/interpreter/mterp/x86/op_sub_double.S
new file mode 100644
index 0000000..e83afeb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_double.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"subs","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_sub_double_2addr.S b/runtime/interpreter/mterp/x86/op_sub_double_2addr.S
new file mode 100644
index 0000000..af9a2ab
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_double_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"subs","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_sub_float.S b/runtime/interpreter/mterp/x86/op_sub_float.S
new file mode 100644
index 0000000..423d834
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_float.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"subs","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_sub_float_2addr.S b/runtime/interpreter/mterp/x86/op_sub_float_2addr.S
new file mode 100644
index 0000000..18de000
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_float_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"subs","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_sub_int.S b/runtime/interpreter/mterp/x86/op_sub_int.S
new file mode 100644
index 0000000..7fe03fb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_int.S
@@ -0,0 +1 @@
+%include "x86/binop.S" {"instr":"subl    (rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_sub_int_2addr.S b/runtime/interpreter/mterp/x86/op_sub_int_2addr.S
new file mode 100644
index 0000000..cc9bf60
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/binop2addr.S" {"instr":"subl    %eax, (rFP,%ecx,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_sub_long.S b/runtime/interpreter/mterp/x86/op_sub_long.S
new file mode 100644
index 0000000..014591e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_long.S
@@ -0,0 +1 @@
+%include "x86/binopWide.S" {"instr1":"subl    (rFP,%ecx,4), rIBASE", "instr2":"sbbl    4(rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_sub_long_2addr.S b/runtime/interpreter/mterp/x86/op_sub_long_2addr.S
new file mode 100644
index 0000000..7498029
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_long_2addr.S
@@ -0,0 +1 @@
+%include "x86/binopWide2addr.S" {"instr1":"subl    %eax, (rFP,rINST,4)","instr2":"sbbl    %ecx, 4(rFP,rINST,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_throw.S b/runtime/interpreter/mterp/x86/op_throw.S
new file mode 100644
index 0000000..15b20b5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_throw.S
@@ -0,0 +1,11 @@
+/*
+ * Throw an exception object in the current thread.
+ */
+    /* throw vAA */
+    EXPORT_PC
+    GET_VREG %eax rINST                     # eax<- vAA (exception object)
+    testl   %eax, %eax
+    jz      common_errNullObject
+    movl    rSELF,%ecx
+    movl    %eax, THREAD_EXCEPTION_OFFSET(%ecx)
+    jmp     MterpException
diff --git a/runtime/interpreter/mterp/x86/op_unused_3e.S b/runtime/interpreter/mterp/x86/op_unused_3e.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_3e.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_3f.S b/runtime/interpreter/mterp/x86/op_unused_3f.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_3f.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_40.S b/runtime/interpreter/mterp/x86/op_unused_40.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_40.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_41.S b/runtime/interpreter/mterp/x86/op_unused_41.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_41.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_42.S b/runtime/interpreter/mterp/x86/op_unused_42.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_42.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_43.S b/runtime/interpreter/mterp/x86/op_unused_43.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_43.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_79.S b/runtime/interpreter/mterp/x86/op_unused_79.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_79.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_7a.S b/runtime/interpreter/mterp/x86/op_unused_7a.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_7a.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_f4.S b/runtime/interpreter/mterp/x86/op_unused_f4.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_f4.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_fa.S b/runtime/interpreter/mterp/x86/op_unused_fa.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_fa.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_fb.S b/runtime/interpreter/mterp/x86/op_unused_fb.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_fb.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_fc.S b/runtime/interpreter/mterp/x86/op_unused_fc.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_fc.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_fd.S b/runtime/interpreter/mterp/x86/op_unused_fd.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_fd.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_fe.S b/runtime/interpreter/mterp/x86/op_unused_fe.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_fe.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_ff.S b/runtime/interpreter/mterp/x86/op_unused_ff.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_ff.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_ushr_int.S b/runtime/interpreter/mterp/x86/op_ushr_int.S
new file mode 100644
index 0000000..dfe25ff
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_ushr_int.S
@@ -0,0 +1 @@
+%include "x86/binop1.S" {"instr":"shrl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_ushr_int_2addr.S b/runtime/interpreter/mterp/x86/op_ushr_int_2addr.S
new file mode 100644
index 0000000..c14bc98
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_ushr_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/shop2addr.S" {"instr":"shrl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_ushr_int_lit8.S b/runtime/interpreter/mterp/x86/op_ushr_int_lit8.S
new file mode 100644
index 0000000..e129f6b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_ushr_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"shrl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_ushr_long.S b/runtime/interpreter/mterp/x86/op_ushr_long.S
new file mode 100644
index 0000000..287946e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_ushr_long.S
@@ -0,0 +1,29 @@
+/*
+ * Long integer shift.  This is different from the generic 32/64-bit
+ * binary operations because vAA/vBB are 64-bit but vCC (the shift
+ * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+ * 6 bits of the shift distance.  x86 shifts automatically mask off
+ * the low 5 bits of %cl, so have to handle the 64 > shiftcount > 31
+ * case specially.
+ */
+    /* shr-long vAA, vBB, vCC */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE %eax               # rIBASE <- v[BB+1]
+    GET_VREG %ecx %ecx                      # ecx <- vCC
+    GET_VREG %eax %eax                      # eax <- v[BB+0]
+    shrdl   rIBASE, %eax
+    shrl    %cl, rIBASE
+    testb   $$32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    xorl    rIBASE, rIBASE
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[BB+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_ushr_long_2addr.S b/runtime/interpreter/mterp/x86/op_ushr_long_2addr.S
new file mode 100644
index 0000000..39c2724
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_ushr_long_2addr.S
@@ -0,0 +1,26 @@
+/*
+ * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+ * 32-bit shift distance.
+ */
+    /* shl-long/2addr vA, vB */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- v[AA+0]
+    sarl    $$4, %ecx                       # ecx <- B
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE rINST              # rIBASE <- v[AA+1]
+    GET_VREG %ecx %ecx                      # ecx <- vBB
+    shrdl   rIBASE, %eax
+    shrl    %cl, rIBASE
+    testb   $$32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    xorl    rIBASE, rIBASE
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_xor_int.S b/runtime/interpreter/mterp/x86/op_xor_int.S
new file mode 100644
index 0000000..35aca6a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_xor_int.S
@@ -0,0 +1 @@
+%include "x86/binop.S" {"instr":"xorl    (rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_xor_int_2addr.S b/runtime/interpreter/mterp/x86/op_xor_int_2addr.S
new file mode 100644
index 0000000..d7b70e2
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_xor_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/binop2addr.S" {"instr":"xorl    %eax, (rFP,%ecx,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_xor_int_lit16.S b/runtime/interpreter/mterp/x86/op_xor_int_lit16.S
new file mode 100644
index 0000000..115f0a0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_xor_int_lit16.S
@@ -0,0 +1 @@
+%include "x86/binopLit16.S" {"instr":"xorl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_xor_int_lit8.S b/runtime/interpreter/mterp/x86/op_xor_int_lit8.S
new file mode 100644
index 0000000..243971c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_xor_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"xorl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_xor_long.S b/runtime/interpreter/mterp/x86/op_xor_long.S
new file mode 100644
index 0000000..0d3c0f5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_xor_long.S
@@ -0,0 +1 @@
+%include "x86/binopWide.S" {"instr1":"xorl    (rFP,%ecx,4), rIBASE", "instr2":"xorl    4(rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_xor_long_2addr.S b/runtime/interpreter/mterp/x86/op_xor_long_2addr.S
new file mode 100644
index 0000000..b5000e4
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_xor_long_2addr.S
@@ -0,0 +1 @@
+%include "x86/binopWide2addr.S" {"instr1":"xorl    %eax, (rFP,rINST,4)","instr2":"xorl    %ecx, 4(rFP,rINST,4)"}
diff --git a/runtime/interpreter/mterp/x86/shop2addr.S b/runtime/interpreter/mterp/x86/shop2addr.S
new file mode 100644
index 0000000..94d3545
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/shop2addr.S
@@ -0,0 +1,13 @@
+%default {"result":"%eax"}
+/*
+ * Generic 32-bit "shift/2addr" operation.
+ */
+    /* shift/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # eax <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- vAA
+    $instr                                  # ex: sarl %cl, %eax
+    SET_VREG $result rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/sseBinop.S b/runtime/interpreter/mterp/x86/sseBinop.S
new file mode 100644
index 0000000..63a1e21
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/sseBinop.S
@@ -0,0 +1,9 @@
+%default {"instr":"","suff":""}
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movs${suff}   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    ${instr}${suff} VREG_ADDRESS(%eax), %xmm0
+    movs${suff}   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movs${suff}   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/sseBinop2Addr.S b/runtime/interpreter/mterp/x86/sseBinop2Addr.S
new file mode 100644
index 0000000..d157e67
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/sseBinop2Addr.S
@@ -0,0 +1,10 @@
+%default {"instr":"","suff":""}
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $$0xf, %ecx                     # ecx <- A
+    movs${suff} VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $$4, rINST                      # rINST<- B
+    ${instr}${suff} VREG_ADDRESS(rINST), %xmm0
+    movs${suff} %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movs${suff} %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/unop.S b/runtime/interpreter/mterp/x86/unop.S
new file mode 100644
index 0000000..00d3e15
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/unop.S
@@ -0,0 +1,13 @@
+%default {"instr":""}
+/*
+ * Generic 32-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx <- A+
+    sarl    $$4,rINST                       # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $$0xf,%cl                       # ecx <- A
+    $instr
+    SET_VREG %eax %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/unused.S b/runtime/interpreter/mterp/x86/unused.S
new file mode 100644
index 0000000..c95ef94
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/unused.S
@@ -0,0 +1,4 @@
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
diff --git a/runtime/interpreter/mterp/x86/zcmp.S b/runtime/interpreter/mterp/x86/zcmp.S
new file mode 100644
index 0000000..5ce4f0f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/zcmp.S
@@ -0,0 +1,24 @@
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $$0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    movl    $$2, %eax                       # assume branch not taken
+    j${revcmp}   1f
+    movswl  2(rPC),%eax                     # fetch signed displacement
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
diff --git a/runtime/jdwp/jdwp_socket.cc b/runtime/jdwp/jdwp_socket.cc
index 4fb6df1..1bc58ac 100644
--- a/runtime/jdwp/jdwp_socket.cc
+++ b/runtime/jdwp/jdwp_socket.cc
@@ -276,7 +276,12 @@
    */
 #if defined(__linux__)
   hostent he;
-  char auxBuf[128];
+  // The size of the work buffer used in the gethostbyname_r call
+  // below. It used to be 128, but this was not enough on some
+  // configurations (maybe because of IPv6?), causing failures in JDWP
+  // host testing; thus it was increased to 256.
+  static constexpr size_t kAuxBufSize = 256;
+  char auxBuf[kAuxBufSize];
   int error;
   int cc = gethostbyname_r(options->host.c_str(), &he, auxBuf, sizeof(auxBuf), &pEntry, &error);
   if (cc != 0) {
@@ -298,7 +303,8 @@
 
   addr.addrInet.sin_port = htons(options->port);
 
-  LOG(INFO) << "Connecting out to " << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port);
+  LOG(INFO) << "Connecting out to " << inet_ntoa(addr.addrInet.sin_addr) << ":"
+            << ntohs(addr.addrInet.sin_port);
 
   /*
    * Create a socket.
@@ -313,13 +319,15 @@
    * Try to connect.
    */
   if (connect(clientSock, &addr.addrPlain, sizeof(addr)) != 0) {
-    PLOG(ERROR) << "Unable to connect to " << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port);
+    PLOG(ERROR) << "Unable to connect to " << inet_ntoa(addr.addrInet.sin_addr) << ":"
+                << ntohs(addr.addrInet.sin_port);
     close(clientSock);
     clientSock = -1;
     return false;
   }
 
-  LOG(INFO) << "Connection established to " << options->host << " (" << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port) << ")";
+  LOG(INFO) << "Connection established to " << options->host << " ("
+            << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port) << ")";
   SetAwaitingHandshake(true);
   input_count_ = 0;
 
@@ -438,7 +446,8 @@
         }
       }
       if (clientSock >= 0 && FD_ISSET(clientSock, &readfds)) {
-        readCount = read(clientSock, input_buffer_ + input_count_, sizeof(input_buffer_) - input_count_);
+        readCount =
+            read(clientSock, input_buffer_ + input_count_, sizeof(input_buffer_) - input_count_);
         if (readCount < 0) {
           /* read failed */
           if (errno != EINTR) {
@@ -479,7 +488,8 @@
     errno = 0;
     int cc = TEMP_FAILURE_RETRY(write(clientSock, input_buffer_, kMagicHandshakeLen));
     if (cc != kMagicHandshakeLen) {
-      PLOG(ERROR) << "Failed writing handshake bytes (" << cc << " of " << kMagicHandshakeLen << ")";
+      PLOG(ERROR) << "Failed writing handshake bytes ("
+                  << cc << " of " << kMagicHandshakeLen << ")";
       goto fail;
     }
 
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 5c9dab2..8f4d24f 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -56,7 +56,8 @@
   os << "JIT code cache size=" << PrettySize(code_cache_->CodeCacheSize()) << "\n"
      << "JIT data cache size=" << PrettySize(code_cache_->DataCacheSize()) << "\n"
      << "JIT current capacity=" << PrettySize(code_cache_->GetCurrentCapacity()) << "\n"
-     << "JIT number of compiled code=" << code_cache_->NumberOfCompiledCode() << "\n";
+     << "JIT number of compiled code=" << code_cache_->NumberOfCompiledCode() << "\n"
+     << "JIT total number of compilations=" << code_cache_->NumberOfCompilations() << "\n";
   cumulative_timings_.Dump(os);
 }
 
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 2d575bd..64b2c89 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -125,7 +125,8 @@
       data_end_(initial_data_capacity),
       has_done_one_collection_(false),
       last_update_time_ns_(0),
-      garbage_collect_code_(garbage_collect_code) {
+      garbage_collect_code_(garbage_collect_code),
+      number_of_compilations_(0) {
 
   DCHECK_GE(max_capacity, initial_code_capacity + initial_data_capacity);
   code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_end_, false /*locked*/);
@@ -322,6 +323,7 @@
 
     __builtin___clear_cache(reinterpret_cast<char*>(code_ptr),
                             reinterpret_cast<char*>(code_ptr + code_size));
+    number_of_compilations_++;
   }
   // We need to update the entry point in the runnable state for the instrumentation.
   {
@@ -347,6 +349,11 @@
   return reinterpret_cast<uint8_t*>(method_header);
 }
 
+size_t JitCodeCache::NumberOfCompilations() {
+  MutexLock mu(Thread::Current(), lock_);
+  return number_of_compilations_;
+}
+
 size_t JitCodeCache::CodeCacheSize() {
   MutexLock mu(Thread::Current(), lock_);
   return CodeCacheSizeLocked();
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index a152bcd..67fa928 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -68,6 +68,9 @@
   // of methods that got JIT compiled, as we might have collected some.
   size_t NumberOfCompiledCode() REQUIRES(!lock_);
 
+  // Number of compilations done throughout the lifetime of the JIT.
+  size_t NumberOfCompilations() REQUIRES(!lock_);
+
   bool NotifyCompilationOf(ArtMethod* method, Thread* self)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!lock_);
@@ -261,6 +264,9 @@
   // Whether we can do garbage collection.
   const bool garbage_collect_code_;
 
+  // Number of compilations done throughout the lifetime of the JIT.
+  size_t number_of_compilations_ GUARDED_BY(lock_);
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache);
 };
 
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index 4cbaf2c..6b47b67 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -165,11 +165,14 @@
   instrumentation_cache_->AddSamples(thread, method, 1);
 }
 
-void JitInstrumentationListener::BackwardBranch(Thread* thread,
-                                                ArtMethod* method,
-                                                int32_t dex_pc_offset) {
-  CHECK_LE(dex_pc_offset, 0);
-  instrumentation_cache_->AddSamples(thread, method, 1);
+void JitInstrumentationListener::Branch(Thread* thread,
+                                        ArtMethod* method,
+                                        uint32_t dex_pc ATTRIBUTE_UNUSED,
+                                        int32_t dex_pc_offset) {
+  if (dex_pc_offset < 0) {
+    // Increment method hotness if it is a backward branch.
+    instrumentation_cache_->AddSamples(thread, method, 1);
+  }
 }
 
 void JitInstrumentationListener::InvokeVirtualOrInterface(Thread* thread,
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
index 15969e4..620c087 100644
--- a/runtime/jit/jit_instrumentation.h
+++ b/runtime/jit/jit_instrumentation.h
@@ -70,7 +70,7 @@
   void DexPcMoved(Thread* /*self*/, mirror::Object* /*this_object*/,
                   ArtMethod* /*method*/, uint32_t /*new_dex_pc*/) OVERRIDE { }
 
-  void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
+  void Branch(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t dex_pc_offset)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
 
   void InvokeVirtualOrInterface(Thread* thread,
@@ -84,7 +84,7 @@
 
   static constexpr uint32_t kJitEvents =
       instrumentation::Instrumentation::kMethodEntered |
-      instrumentation::Instrumentation::kBackwardBranch |
+      instrumentation::Instrumentation::kBranch |
       instrumentation::Instrumentation::kInvokeVirtualOrInterface;
 
  private:
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
index a132701..b4b872f 100644
--- a/runtime/jit/offline_profiling_info.cc
+++ b/runtime/jit/offline_profiling_info.cc
@@ -24,8 +24,11 @@
 
 #include "art_method-inl.h"
 #include "base/mutex.h"
+#include "base/scoped_flock.h"
 #include "base/stl_util.h"
+#include "base/unix_file/fd_file.h"
 #include "jit/profiling_info.h"
+#include "os.h"
 #include "safe_map.h"
 
 namespace art {
@@ -37,8 +40,17 @@
     return true;
   }
 
+  ScopedFlock flock;
+  std::string error;
+  if (!flock.Init(filename.c_str(), O_RDWR | O_NOFOLLOW | O_CLOEXEC, /* block */ false, &error)) {
+    LOG(WARNING) << "Couldn't lock the profile file " << filename << ": " << error;
+    return false;
+  }
+
+  int fd = flock.GetFile()->Fd();
+
   ProfileCompilationInfo info;
-  if (!info.Load(filename)) {
+  if (!info.Load(fd)) {
     LOG(WARNING) << "Could not load previous profile data from file " << filename;
     return false;
   }
@@ -54,9 +66,14 @@
     }
   }
 
+  if (!flock.GetFile()->ClearContent()) {
+    PLOG(WARNING) << "Could not clear profile file: " << filename;
+    return false;
+  }
+
   // This doesn't need locking because we are trying to lock the file for exclusive
   // access and fail immediately if we can't.
-  bool result = info.Save(filename);
+  bool result = info.Save(fd);
   if (result) {
     VLOG(profiler) << "Successfully saved profile info to " << filename
         << " Size: " << GetFileSizeBytes(filename);
@@ -66,64 +83,20 @@
   return result;
 }
 
-enum OpenMode {
-  READ,
-  READ_WRITE
-};
-
-static int OpenFile(const std::string& filename, OpenMode open_mode) {
-  int fd = -1;
-  switch (open_mode) {
-    case READ:
-      fd = open(filename.c_str(), O_RDONLY);
-      break;
-    case READ_WRITE:
-      // TODO(calin) allow the shared uid of the app to access the file.
-      fd = open(filename.c_str(), O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC);
-      break;
-  }
-
-  if (fd < 0) {
-    PLOG(WARNING) << "Failed to open profile file " << filename;
-    return -1;
-  }
-
-  // Lock the file for exclusive access but don't wait if we can't lock it.
-  int err = flock(fd, LOCK_EX | LOCK_NB);
-  if (err < 0) {
-    PLOG(WARNING) << "Failed to lock profile file " << filename;
-    return -1;
-  }
-  return fd;
-}
-
-static bool CloseDescriptorForFile(int fd, const std::string& filename) {
-  // Now unlock the file, allowing another process in.
-  int err = flock(fd, LOCK_UN);
-  if (err < 0) {
-    PLOG(WARNING) << "Failed to unlock profile file " << filename;
-    return false;
-  }
-
-  // Done, close the file.
-  err = ::close(fd);
-  if (err < 0) {
-    PLOG(WARNING) << "Failed to close descriptor for profile file" << filename;
-    return false;
-  }
-
-  return true;
-}
-
-static void WriteToFile(int fd, const std::ostringstream& os) {
+static bool WriteToFile(int fd, const std::ostringstream& os) {
   std::string data(os.str());
   const char *p = data.c_str();
   size_t length = data.length();
   do {
-    int n = ::write(fd, p, length);
+    int n = TEMP_FAILURE_RETRY(write(fd, p, length));
+    if (n < 0) {
+      PLOG(WARNING) << "Failed to write to descriptor: " << fd;
+      return false;
+    }
     p += n;
     length -= n;
   } while (length > 0);
+  return true;
 }
 
 static constexpr const char kFieldSeparator = ',';
@@ -137,13 +110,8 @@
  *    /system/priv-app/app/app.apk,131232145,11,23,454,54
  *    /system/priv-app/app/app.apk:classes5.dex,218490184,39,13,49,1
  **/
-bool ProfileCompilationInfo::Save(const std::string& filename) {
-  int fd = OpenFile(filename, READ_WRITE);
-  if (fd == -1) {
-    return false;
-  }
-
-  // TODO(calin): Merge with a previous existing profile.
+bool ProfileCompilationInfo::Save(uint32_t fd) {
+  DCHECK_GE(fd, 0u);
   // TODO(calin): Profile this and see how much memory it takes. If too much,
   // write to file directly.
   std::ostringstream os;
@@ -158,9 +126,7 @@
     os << kLineSeparator;
   }
 
-  WriteToFile(fd, os);
-
-  return CloseDescriptorForFile(fd, filename);
+  return WriteToFile(fd, os);
 }
 
 // TODO(calin): This a duplicate of Utils::Split fixing the case where the first character
@@ -222,7 +188,9 @@
       LOG(WARNING) << "Cannot parse method_idx " << parts[i];
       return false;
     }
-    AddData(dex_location, checksum, method_idx);
+    if (!AddData(dex_location, checksum, method_idx)) {
+      return false;
+    }
   }
   return true;
 }
@@ -249,23 +217,18 @@
   return new_line_pos == -1 ? new_line_pos : new_line_pos + 1;
 }
 
-bool ProfileCompilationInfo::Load(const std::string& filename) {
-  int fd = OpenFile(filename, READ);
-  if (fd == -1) {
-    return false;
-  }
+bool ProfileCompilationInfo::Load(uint32_t fd) {
+  DCHECK_GE(fd, 0u);
 
   std::string current_line;
   const int kBufferSize = 1024;
   char buffer[kBufferSize];
-  bool success = true;
 
-  while (success) {
-    int n = read(fd, buffer, kBufferSize);
+  while (true) {
+    int n = TEMP_FAILURE_RETRY(read(fd, buffer, kBufferSize));
     if (n < 0) {
-      PLOG(WARNING) << "Error when reading profile file " << filename;
-      success = false;
-      break;
+      PLOG(WARNING) << "Error when reading profile file";
+      return false;
     } else if (n == 0) {
       break;
     }
@@ -278,17 +241,13 @@
         break;
       }
       if (!ProcessLine(current_line)) {
-        success = false;
-        break;
+        return false;
       }
       // Reset the current line (we just processed it).
       current_line.clear();
     }
   }
-  if (!success) {
-    info_.clear();
-  }
-  return CloseDescriptorForFile(fd, filename) && success;
+  return true;
 }
 
 bool ProfileCompilationInfo::Load(const ProfileCompilationInfo& other) {
@@ -369,4 +328,8 @@
   return os.str();
 }
 
+bool ProfileCompilationInfo::Equals(ProfileCompilationInfo& other) {
+  return info_.Equals(other.info_);
+}
+
 }  // namespace art
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
index 26e1ac3..ffd1433 100644
--- a/runtime/jit/offline_profiling_info.h
+++ b/runtime/jit/offline_profiling_info.h
@@ -39,15 +39,18 @@
  */
 class ProfileCompilationInfo {
  public:
+  // Saves profile information about the given methods in the given file.
+  // Note that the saving proceeds only if the file can be locked for exclusive access.
+  // If not (the locking is not blocking), the function does not save and returns false.
   static bool SaveProfilingInfo(const std::string& filename,
                                 const std::vector<ArtMethod*>& methods);
 
-  // Loads profile information from the given file.
-  bool Load(const std::string& profile_filename);
+  // Loads profile information from the given file descriptor.
+  bool Load(uint32_t fd);
   // Loads the data from another ProfileCompilationInfo object.
   bool Load(const ProfileCompilationInfo& info);
-  // Saves the profile data to the given file.
-  bool Save(const std::string& profile_filename);
+  // Saves the profile data to the given file descriptor.
+  bool Save(uint32_t fd);
   // Returns the number of methods that were profiled.
   uint32_t GetNumberOfMethods() const;
 
@@ -61,6 +64,9 @@
   std::string DumpInfo(const std::vector<const DexFile*>* dex_files,
                        bool print_full_dex_location = true) const;
 
+  // For testing purposes.
+  bool Equals(ProfileCompilationInfo& other);
+
  private:
   bool AddData(const std::string& dex_location, uint32_t checksum, uint16_t method_idx);
   bool ProcessLine(const std::string& line);
@@ -69,10 +75,18 @@
     explicit DexFileData(uint32_t location_checksum) : checksum(location_checksum) {}
     uint32_t checksum;
     std::set<uint16_t> method_set;
+
+    bool operator==(const DexFileData& other) const {
+      return checksum == other.checksum && method_set == other.method_set;
+    }
   };
 
   using DexFileToProfileInfoMap = SafeMap<const std::string, DexFileData>;
 
+  friend class ProfileCompilationInfoTest;
+  friend class CompilerDriverProfileTest;
+  friend class ProfileAssistantTest;
+
   DexFileToProfileInfoMap info_;
 };
 
diff --git a/runtime/jit/profile_compilation_info_test.cc b/runtime/jit/profile_compilation_info_test.cc
new file mode 100644
index 0000000..482ea06
--- /dev/null
+++ b/runtime/jit/profile_compilation_info_test.cc
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "base/unix_file/fd_file.h"
+#include "art_method-inl.h"
+#include "class_linker-inl.h"
+#include "common_runtime_test.h"
+#include "dex_file.h"
+#include "mirror/class-inl.h"
+#include "mirror/class_loader.h"
+#include "handle_scope-inl.h"
+#include "jit/offline_profiling_info.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+
+class ProfileCompilationInfoTest : public CommonRuntimeTest {
+ protected:
+  std::vector<ArtMethod*> GetVirtualMethods(jobject class_loader,
+                                            const std::string& clazz) {
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    Thread* self = Thread::Current();
+    ScopedObjectAccess soa(self);
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ClassLoader> h_loader(hs.NewHandle(
+        reinterpret_cast<mirror::ClassLoader*>(self->DecodeJObject(class_loader))));
+    mirror::Class* klass = class_linker->FindClass(self, clazz.c_str(), h_loader);
+
+    const auto pointer_size = class_linker->GetImagePointerSize();
+    std::vector<ArtMethod*> methods;
+    for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+      methods.push_back(&m);
+    }
+    return methods;
+  }
+
+  bool AddData(const std::string& dex_location,
+               uint32_t checksum,
+               uint16_t method_index,
+               ProfileCompilationInfo* info) {
+    return info->AddData(dex_location, checksum, method_index);
+  }
+
+  uint32_t GetFd(const ScratchFile& file) {
+    return static_cast<uint32_t>(file.GetFd());
+  }
+};
+
+TEST_F(ProfileCompilationInfoTest, SaveArtMethods) {
+  ScratchFile profile;
+
+  Thread* self = Thread::Current();
+  jobject class_loader;
+  {
+    ScopedObjectAccess soa(self);
+    class_loader = LoadDex("ProfileTestMultiDex");
+  }
+  ASSERT_NE(class_loader, nullptr);
+
+  // Save virtual methods from Main.
+  std::vector<ArtMethod*> main_methods = GetVirtualMethods(class_loader, "LMain;");
+  ASSERT_TRUE(ProfileCompilationInfo::SaveProfilingInfo(profile.GetFilename(), main_methods));
+
+  // Check that what we saved is in the profile.
+  ProfileCompilationInfo info1;
+  ASSERT_TRUE(info1.Load(GetFd(profile)));
+  ASSERT_EQ(info1.GetNumberOfMethods(), main_methods.size());
+  {
+    ScopedObjectAccess soa(self);
+    for (ArtMethod* m : main_methods) {
+      ASSERT_TRUE(info1.ContainsMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
+    }
+  }
+
+  // Save virtual methods from Second.
+  std::vector<ArtMethod*> second_methods = GetVirtualMethods(class_loader, "LSecond;");
+  ASSERT_TRUE(ProfileCompilationInfo::SaveProfilingInfo(profile.GetFilename(), second_methods));
+
+  // Check that what we saved is in the profile (methods form Main and Second).
+  ProfileCompilationInfo info2;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(info2.Load(GetFd(profile)));
+  ASSERT_EQ(info2.GetNumberOfMethods(), main_methods.size() + second_methods.size());
+  {
+    ScopedObjectAccess soa(self);
+    for (ArtMethod* m : main_methods) {
+      ASSERT_TRUE(info2.ContainsMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
+    }
+    for (ArtMethod* m : second_methods) {
+      ASSERT_TRUE(info2.ContainsMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
+    }
+  }
+}
+
+TEST_F(ProfileCompilationInfoTest, SaveFd) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo saved_info;
+  // Save a few methods.
+  for (uint16_t i = 0; i < 10; i++) {
+    ASSERT_TRUE(AddData("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddData("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
+  }
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we get back what we saved.
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info.Equals(saved_info));
+
+  // Save more methods.
+  for (uint16_t i = 0; i < 100; i++) {
+    ASSERT_TRUE(AddData("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddData("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddData("dex_location3", /* checksum */ 3, /* method_idx */ i, &saved_info));
+  }
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we get back everything we saved.
+  ProfileCompilationInfo loaded_info2;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info2.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info2.Equals(saved_info));
+}
+
+TEST_F(ProfileCompilationInfoTest, AddDataFail) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo info;
+  ASSERT_TRUE(AddData("dex_location", /* checksum */ 1, /* method_idx */ 1, &info));
+  // Trying to add info for an existing file but with a different checksum.
+  ASSERT_FALSE(AddData("dex_location", /* checksum */ 2, /* method_idx */ 2, &info));
+}
+
+TEST_F(ProfileCompilationInfoTest, LoadFail) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo info1;
+  ASSERT_TRUE(AddData("dex_location", /* checksum */ 1, /* method_idx */ 1, &info1));
+  // Use the same file, change the checksum.
+  ProfileCompilationInfo info2;
+  ASSERT_TRUE(AddData("dex_location", /* checksum */ 2, /* method_idx */ 2, &info2));
+
+  ASSERT_FALSE(info1.Load(info2));
+}
+
+}  // namespace art
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index ec289ea..f3f5f95 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -22,16 +22,16 @@
 
 namespace art {
 
-// An arbitrary value to throttle save requests. Set to 500ms for now.
+// An arbitrary value to throttle save requests. Set to 2s for now.
 static constexpr const uint64_t kMilisecondsToNano = 1000000;
-static constexpr const uint64_t kMinimumTimeBetweenCodeCacheUpdatesNs = 500 * kMilisecondsToNano;
+static constexpr const uint64_t kMinimumTimeBetweenCodeCacheUpdatesNs = 2000 * kMilisecondsToNano;
 
 // TODO: read the constants from ProfileOptions,
 // Add a random delay each time we go to sleep so that we don't hammer the CPU
 // with all profile savers running at the same time.
-static constexpr const uint64_t kRandomDelayMaxMs = 10 * 1000;  // 10 seconds
-static constexpr const uint64_t kMaxBackoffMs = 4 * 60 * 1000;  // 4 minutes
-static constexpr const uint64_t kSavePeriodMs = 4 * 1000;  // 4 seconds
+static constexpr const uint64_t kRandomDelayMaxMs = 20 * 1000;  // 20 seconds
+static constexpr const uint64_t kMaxBackoffMs = 5 * 60 * 1000;  // 5 minutes
+static constexpr const uint64_t kSavePeriodMs = 10 * 1000;  // 10 seconds
 static constexpr const double kBackoffCoef = 1.5;
 
 static constexpr const uint32_t kMinimumNrOrMethodsToSave = 10;
@@ -86,12 +86,14 @@
 }
 
 bool ProfileSaver::ProcessProfilingInfo() {
-  VLOG(profiler) << "Initiating save profiling information to: " << output_filename_;
+  VLOG(profiler) << "Save profiling information to: " << output_filename_;
 
   uint64_t last_update_time_ns = jit_code_cache_->GetLastUpdateTimeNs();
   if (last_update_time_ns - code_cache_last_update_time_ns_
-      > kMinimumTimeBetweenCodeCacheUpdatesNs) {
-    VLOG(profiler) << "Not enough time has passed since the last code cache update.";
+      < kMinimumTimeBetweenCodeCacheUpdatesNs) {
+    VLOG(profiler) << "Not enough time has passed since the last code cache update."
+        << "Last update: " << last_update_time_ns
+        << " Last save: " << code_cache_last_update_time_ns_;
     return false;
   }
 
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 3571edb..18c52e4 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -583,6 +583,10 @@
   }
 }
 
+bool MemMap::Sync() {
+  return msync(BaseBegin(), BaseSize(), MS_SYNC) == 0;
+}
+
 bool MemMap::Protect(int prot) {
   if (base_begin_ == nullptr && base_size_ == 0) {
     prot_ = prot;
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index ed21365..ebd550a 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -126,6 +126,8 @@
     return name_;
   }
 
+  bool Sync();
+
   bool Protect(int prot);
 
   void MadviseDontNeedAndZero();
diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc
index edcbcf2..81c855e 100644
--- a/runtime/mem_map_test.cc
+++ b/runtime/mem_map_test.cc
@@ -251,6 +251,10 @@
 #endif
 
 TEST_F(MemMapTest, MapAnonymousExactAddr32bitHighAddr) {
+  // Some MIPS32 hardware (namely the Creator Ci20 development board)
+  // cannot allocate in the 2GB-4GB region.
+  TEST_DISABLED_FOR_MIPS();
+
   CommonInit();
   // This test may not work under valgrind.
   if (RUNNING_ON_MEMORY_TOOL == 0) {
@@ -271,8 +275,8 @@
         break;
       }
     }
-    ASSERT_GE(reinterpret_cast<uintptr_t>(map->End()), 2u * GB);
     ASSERT_TRUE(map.get() != nullptr) << error_msg;
+    ASSERT_GE(reinterpret_cast<uintptr_t>(map->End()), 2u * GB);
     ASSERT_TRUE(error_msg.empty());
     ASSERT_EQ(BaseBegin(map.get()), reinterpret_cast<void*>(start_addr));
   }
diff --git a/runtime/oat.h b/runtime/oat.h
index 13fd6a4..989e3f9 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -31,7 +31,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '7', '4', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '7', '5', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 83e594b..82b3933 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -46,6 +46,7 @@
 #include "oat_file_manager.h"
 #include "os.h"
 #include "runtime.h"
+#include "type_lookup_table.h"
 #include "utils.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 #include "vmap_table.h"
@@ -266,16 +267,15 @@
                                 i);
       return false;
     }
-
-    const char* dex_file_location_data = reinterpret_cast<const char*>(oat);
-    oat += dex_file_location_size;
-    if (UNLIKELY(oat > End())) {
+    if (UNLIKELY(static_cast<size_t>(End() - oat) < dex_file_location_size)) {
       *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu with truncated dex file "
                                     "location",
                                 GetLocation().c_str(),
                                 i);
       return false;
     }
+    const char* dex_file_location_data = reinterpret_cast<const char*>(oat);
+    oat += dex_file_location_size;
 
     std::string dex_file_location = ResolveRelativeEncodedDexLocation(
         abs_dex_location,
@@ -318,6 +318,17 @@
                                 Size());
       return false;
     }
+    if (UNLIKELY(Size() - dex_file_offset < sizeof(DexFile::Header))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with dex file "
+                                    "offset %u of %zu but the size of dex file header is %zu",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                dex_file_offset,
+                                Size(),
+                                sizeof(DexFile::Header));
+      return false;
+    }
 
     const uint8_t* dex_file_pointer = Begin() + dex_file_offset;
     if (UNLIKELY(!DexFile::IsMagicValid(dex_file_pointer))) {
@@ -339,34 +350,75 @@
       return false;
     }
     const DexFile::Header* header = reinterpret_cast<const DexFile::Header*>(dex_file_pointer);
+    if (Size() - dex_file_offset < header->file_size_) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with dex file "
+                                    "offset %u and size %u truncated at %zu",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                dex_file_offset,
+                                header->file_size_,
+                                Size());
+      return false;
+    }
 
-    if (UNLIKELY(oat > End())) {
-      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' with truncated "
-                                "lookup table offset", GetLocation().c_str(), i,
+    uint32_t class_offsets_offset;
+    if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &class_offsets_offset))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' truncated "
+                                    "after class offsets offset",
+                                GetLocation().c_str(),
+                                i,
                                 dex_file_location.c_str());
       return false;
     }
-    uint32_t lookup_table_offset = *reinterpret_cast<const uint32_t*>(oat);
-    oat += sizeof(lookup_table_offset);
-    if (Begin() + lookup_table_offset > End()) {
-      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' with truncated "
-                                "lookup table", GetLocation().c_str(), i,
+    if (UNLIKELY(class_offsets_offset > Size()) ||
+        UNLIKELY((Size() - class_offsets_offset) / sizeof(uint32_t) < header->class_defs_size_)) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with truncated "
+                                    "class offsets, offset %u of %zu, class defs %u",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                class_offsets_offset,
+                                Size(),
+                                header->class_defs_size_);
+      return false;
+    }
+    if (UNLIKELY(!IsAligned<alignof(uint32_t)>(class_offsets_offset))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with unaligned "
+                                    "class offsets, offset %u",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                class_offsets_offset);
+      return false;
+    }
+    const uint32_t* class_offsets_pointer =
+        reinterpret_cast<const uint32_t*>(Begin() + class_offsets_offset);
+
+    uint32_t lookup_table_offset;
+    if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &lookup_table_offset))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' truncated "
+                                    "after lookup table offset",
+                                GetLocation().c_str(),
+                                i,
                                 dex_file_location.c_str());
       return false;
     }
     const uint8_t* lookup_table_data = lookup_table_offset != 0u
         ? Begin() + lookup_table_offset
         : nullptr;
-
-    const uint32_t* methods_offsets_pointer = reinterpret_cast<const uint32_t*>(oat);
-
-    oat += (sizeof(*methods_offsets_pointer) * header->class_defs_size_);
-    if (UNLIKELY(oat > End())) {
+    if (lookup_table_offset != 0u &&
+        (UNLIKELY(lookup_table_offset > Size()) ||
+            UNLIKELY(Size() - lookup_table_offset <
+                     TypeLookupTable::RawDataLength(header->class_defs_size_)))) {
       *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with truncated "
-                                    "method offsets",
+                                    "type lookup table, offset %u of %zu, class defs %u",
                                 GetLocation().c_str(),
                                 i,
-                                dex_file_location.c_str());
+                                dex_file_location.c_str(),
+                                lookup_table_offset,
+                                Size(),
+                                header->class_defs_size_);
       return false;
     }
 
@@ -398,7 +450,7 @@
                                               dex_file_checksum,
                                               dex_file_pointer,
                                               lookup_table_data,
-                                              methods_offsets_pointer,
+                                              class_offsets_pointer,
                                               current_dex_cache_arrays);
     oat_dex_files_storage_.push_back(oat_dex_file);
 
@@ -627,7 +679,7 @@
 
   if (dl_iterate_phdr(dl_iterate_context::callback, &context) == 0) {
     PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
-    LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but can not find its mmaps.";
+    LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but cannot find its mmaps.";
   }
 #endif
 }
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 36a967f..7f216f9 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -312,7 +312,7 @@
   // Update the oat file on disk if we can. This may fail, but that's okay.
   // Best effort is all that matters here.
   if (!oat_file_assistant.MakeUpToDate(/*out*/&error_msg)) {
-    LOG(WARNING) << error_msg;
+    LOG(INFO) << error_msg;
   }
 
   // Get the oat file on disk.
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 2b92303..341be9a 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -309,8 +309,8 @@
     const std::string option(options[i].first);
       // TODO: support -Djava.class.path
     if (option == "bootclasspath") {
-      auto boot_class_path
-          = reinterpret_cast<const std::vector<const DexFile*>*>(options[i].second);
+      auto boot_class_path = static_cast<std::vector<std::unique_ptr<const DexFile>>*>(
+          const_cast<void*>(options[i].second));
 
       if (runtime_options != nullptr) {
         runtime_options->Set(M::BootClassPathDexList, boot_class_path);
diff --git a/runtime/prebuilt_tools_test.cc b/runtime/prebuilt_tools_test.cc
index a7f7bcd..eb226d4 100644
--- a/runtime/prebuilt_tools_test.cc
+++ b/runtime/prebuilt_tools_test.cc
@@ -34,7 +34,7 @@
     struct stat exec_st;
     std::string exec_path = tools_dir + tool;
     if (stat(exec_path.c_str(), &exec_st) != 0) {
-      ADD_FAILURE() << "Can not find " << tool << " in " << tools_dir;
+      ADD_FAILURE() << "Cannot find " << tool << " in " << tools_dir;
     }
   }
 }
@@ -42,7 +42,7 @@
 TEST_F(PrebuiltToolsTest, CheckHostTools) {
   std::string tools_dir = GetAndroidHostToolsDir();
   if (tools_dir.empty()) {
-    ADD_FAILURE() << "Can not find Android tools directory for host";
+    ADD_FAILURE() << "Cannot find Android tools directory for host";
   } else {
     CheckToolsExist(tools_dir);
   }
@@ -54,7 +54,7 @@
   for (InstructionSet isa : isas) {
     std::string tools_dir = GetAndroidTargetToolsDir(isa);
     if (tools_dir.empty()) {
-      ADD_FAILURE() << "Can not find Android tools directory for " << isa;
+      ADD_FAILURE() << "Cannot find Android tools directory for " << isa;
     } else {
       CheckToolsExist(tools_dir);
     }
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index 6cea902..ca456c2 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -39,6 +39,7 @@
   kIntrinsicFloatCvt,
   kIntrinsicReverseBits,
   kIntrinsicReverseBytes,
+  kIntrinsicBitCount,
   kIntrinsicNumberOfLeadingZeros,
   kIntrinsicNumberOfTrailingZeros,
   kIntrinsicRotateRight,
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 1101acd..e30c26d 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1131,10 +1131,14 @@
     }
 
     std::vector<std::unique_ptr<const DexFile>> boot_class_path;
-    OpenDexFiles(dex_filenames,
-                 dex_locations,
-                 runtime_options.GetOrDefault(Opt::Image),
-                 &boot_class_path);
+    if (runtime_options.Exists(Opt::BootClassPathDexList)) {
+      boot_class_path.swap(*runtime_options.GetOrDefault(Opt::BootClassPathDexList));
+    } else {
+      OpenDexFiles(dex_filenames,
+                   dex_locations,
+                   runtime_options.GetOrDefault(Opt::Image),
+                   &boot_class_path);
+    }
     instruction_set_ = runtime_options.GetOrDefault(Opt::ImageInstructionSet);
     std::string error_msg;
     if (!class_linker_->InitWithoutImage(std::move(boot_class_path), &error_msg)) {
diff --git a/runtime/runtime_options.cc b/runtime/runtime_options.cc
index c54461e..e75481c 100644
--- a/runtime/runtime_options.cc
+++ b/runtime/runtime_options.cc
@@ -13,8 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #include "runtime_options.h"
 
+#include <memory>
+
 #include "gc/heap.h"
 #include "monitor.h"
 #include "runtime.h"
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 5624285..c5b009d 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -117,8 +117,8 @@
 
 // Not parse-able from command line, but can be provided explicitly.
 // (Do not add anything here that is defined in ParsedOptions::MakeParser)
-RUNTIME_OPTIONS_KEY (const std::vector<const DexFile*>*, \
-                                          BootClassPathDexList)  // TODO: make unique_ptr
+RUNTIME_OPTIONS_KEY (std::vector<std::unique_ptr<const DexFile>>*, \
+                                          BootClassPathDexList)
 RUNTIME_OPTIONS_KEY (InstructionSet,      ImageInstructionSet,            kRuntimeISA)
 RUNTIME_OPTIONS_KEY (CompilerCallbacks*,  CompilerCallbacksPtr)  // TDOO: make unique_ptr
 RUNTIME_OPTIONS_KEY (bool (*)(),          HookIsSensitiveThread)
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index fc1a445..727ffe5 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -190,7 +190,7 @@
 class DumpCheckpoint FINAL : public Closure {
  public:
   explicit DumpCheckpoint(std::ostream* os)
-      : os_(os), barrier_(0), backtrace_map_(BacktraceMap::Create(GetTid())) {}
+      : os_(os), barrier_(0), backtrace_map_(BacktraceMap::Create(getpid())) {}
 
   void Run(Thread* thread) OVERRIDE {
     // Note thread and self may not be equal if thread was already suspended at the point of the
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 5815f7a..99b2296 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -815,10 +815,10 @@
   LOG(ERROR) << "Unexpected exception caught event in tracing";
 }
 
-void Trace::BackwardBranch(Thread* /*thread*/, ArtMethod* method,
-                           int32_t /*dex_pc_offset*/)
+void Trace::Branch(Thread* /*thread*/, ArtMethod* method,
+                   uint32_t /*dex_pc*/, int32_t /*dex_pc_offset*/)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-  LOG(ERROR) << "Unexpected backward branch event in tracing" << PrettyMethod(method);
+  LOG(ERROR) << "Unexpected branch event in tracing" << PrettyMethod(method);
 }
 
 void Trace::InvokeVirtualOrInterface(Thread*,
diff --git a/runtime/trace.h b/runtime/trace.h
index 356a81f..80f1a4c 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -164,7 +164,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
   void ExceptionCaught(Thread* thread, mirror::Throwable* exception_object)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
-  void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
+  void Branch(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t dex_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
   void InvokeVirtualOrInterface(Thread* thread,
                                 mirror::Object* this_object,
diff --git a/runtime/type_lookup_table.cc b/runtime/type_lookup_table.cc
index 0d40bb7..fc9faec 100644
--- a/runtime/type_lookup_table.cc
+++ b/runtime/type_lookup_table.cc
@@ -16,6 +16,7 @@
 
 #include "type_lookup_table.h"
 
+#include "base/bit_utils.h"
 #include "dex_file-inl.h"
 #include "utf-inl.h"
 #include "utils.h"
@@ -42,25 +43,39 @@
 }
 
 uint32_t TypeLookupTable::RawDataLength(const DexFile& dex_file) {
-  return RoundUpToPowerOfTwo(dex_file.NumClassDefs()) * sizeof(Entry);
+  return RawDataLength(dex_file.NumClassDefs());
 }
 
-TypeLookupTable* TypeLookupTable::Create(const DexFile& dex_file) {
+uint32_t TypeLookupTable::RawDataLength(uint32_t num_class_defs) {
+  return SupportedSize(num_class_defs) ? RoundUpToPowerOfTwo(num_class_defs) * sizeof(Entry) : 0u;
+}
+
+uint32_t TypeLookupTable::CalculateMask(uint32_t num_class_defs) {
+  return SupportedSize(num_class_defs) ? RoundUpToPowerOfTwo(num_class_defs) - 1u : 0u;
+}
+
+bool TypeLookupTable::SupportedSize(uint32_t num_class_defs) {
+  return num_class_defs != 0u && num_class_defs <= std::numeric_limits<uint16_t>::max();
+}
+
+TypeLookupTable* TypeLookupTable::Create(const DexFile& dex_file, uint8_t* storage) {
   const uint32_t num_class_defs = dex_file.NumClassDefs();
-  return (num_class_defs == 0 || num_class_defs > std::numeric_limits<uint16_t>::max())
-      ? nullptr
-      : new TypeLookupTable(dex_file);
+  return SupportedSize(num_class_defs)
+      ? new TypeLookupTable(dex_file, storage)
+      : nullptr;
 }
 
 TypeLookupTable* TypeLookupTable::Open(const uint8_t* raw_data, const DexFile& dex_file) {
   return new TypeLookupTable(raw_data, dex_file);
 }
 
-TypeLookupTable::TypeLookupTable(const DexFile& dex_file)
+TypeLookupTable::TypeLookupTable(const DexFile& dex_file, uint8_t* storage)
     : dex_file_(dex_file),
-      mask_(RoundUpToPowerOfTwo(dex_file.NumClassDefs()) - 1),
-      entries_(new Entry[mask_ + 1]),
-      owns_entries_(true) {
+      mask_(CalculateMask(dex_file.NumClassDefs())),
+      entries_(storage != nullptr ? reinterpret_cast<Entry*>(storage) : new Entry[mask_ + 1]),
+      owns_entries_(storage == nullptr) {
+  static_assert(alignof(Entry) == 4u, "Expecting Entry to be 4-byte aligned.");
+  DCHECK_ALIGNED(storage, alignof(Entry));
   std::vector<uint16_t> conflict_class_defs;
   // The first stage. Put elements on their initial positions. If an initial position is already
   // occupied then delay the insertion of the element to the second stage to reduce probing
@@ -93,7 +108,7 @@
 
 TypeLookupTable::TypeLookupTable(const uint8_t* raw_data, const DexFile& dex_file)
     : dex_file_(dex_file),
-      mask_(RoundUpToPowerOfTwo(dex_file.NumClassDefs()) - 1),
+      mask_(CalculateMask(dex_file.NumClassDefs())),
       entries_(reinterpret_cast<Entry*>(const_cast<uint8_t*>(raw_data))),
       owns_entries_(false) {}
 
diff --git a/runtime/type_lookup_table.h b/runtime/type_lookup_table.h
index 3c2295c..d74d01d 100644
--- a/runtime/type_lookup_table.h
+++ b/runtime/type_lookup_table.h
@@ -60,7 +60,7 @@
   }
 
   // Method creates lookup table for dex file
-  static TypeLookupTable* Create(const DexFile& dex_file);
+  static TypeLookupTable* Create(const DexFile& dex_file, uint8_t* storage = nullptr);
 
   // Method opens lookup table from binary data. Lookup table does not owns binary data.
   static TypeLookupTable* Open(const uint8_t* raw_data, const DexFile& dex_file);
@@ -76,6 +76,9 @@
   // Method returns length of binary data for the specified dex file.
   static uint32_t RawDataLength(const DexFile& dex_file);
 
+  // Method returns length of binary data for the specified number of class definitions.
+  static uint32_t RawDataLength(uint32_t num_class_defs);
+
  private:
    /**
     * To find element we need to compare strings.
@@ -109,8 +112,11 @@
     }
   };
 
+  static uint32_t CalculateMask(uint32_t num_class_defs);
+  static bool SupportedSize(uint32_t num_class_defs);
+
   // Construct from a dex file.
-  explicit TypeLookupTable(const DexFile& dex_file);
+  explicit TypeLookupTable(const DexFile& dex_file, uint8_t* storage);
 
   // Construct from a dex file with existing data.
   TypeLookupTable(const uint8_t* raw_data, const DexFile& dex_file);
diff --git a/runtime/utf_test.cc b/runtime/utf_test.cc
index 5239e40..c67879b 100644
--- a/runtime/utf_test.cc
+++ b/runtime/utf_test.cc
@@ -353,7 +353,7 @@
     if (codePoint <= 0xffff) {
       if (codePoint >= 0xd800 && codePoint <= 0xdfff) {
         // According to the Unicode standard, no character will ever
-        // be assigned to these code points, and they can not be encoded
+        // be assigned to these code points, and they cannot be encoded
         // into either utf-16 or utf-8.
         continue;
       }
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 8e9f12b..07f94c0 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1115,7 +1115,7 @@
   BacktraceMap* map = existing_map;
   std::unique_ptr<BacktraceMap> tmp_map;
   if (map == nullptr) {
-    tmp_map.reset(BacktraceMap::Create(tid));
+    tmp_map.reset(BacktraceMap::Create(getpid()));
     map = tmp_map.get();
   }
   std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid, map));
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 1c95648..7e0f337 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -2735,7 +2735,8 @@
                        inst->Opcode() == Instruction::INVOKE_SUPER_RANGE);
       bool is_super = (inst->Opcode() == Instruction::INVOKE_SUPER ||
                        inst->Opcode() == Instruction::INVOKE_SUPER_RANGE);
-      ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_VIRTUAL, is_range, is_super);
+      MethodType type = is_super ? METHOD_SUPER : METHOD_VIRTUAL;
+      ArtMethod* called_method = VerifyInvocationArgs(inst, type, is_range);
       const RegType* return_type = nullptr;
       if (called_method != nullptr) {
         size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
@@ -2768,7 +2769,7 @@
     case Instruction::INVOKE_DIRECT:
     case Instruction::INVOKE_DIRECT_RANGE: {
       bool is_range = (inst->Opcode() == Instruction::INVOKE_DIRECT_RANGE);
-      ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_DIRECT, is_range, false);
+      ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_DIRECT, is_range);
       const char* return_type_descriptor;
       bool is_constructor;
       const RegType* return_type = nullptr;
@@ -2848,7 +2849,7 @@
     case Instruction::INVOKE_STATIC:
     case Instruction::INVOKE_STATIC_RANGE: {
         bool is_range = (inst->Opcode() == Instruction::INVOKE_STATIC_RANGE);
-        ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_STATIC, is_range, false);
+        ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_STATIC, is_range);
         const char* descriptor;
         if (called_method == nullptr) {
           uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
@@ -2870,7 +2871,7 @@
     case Instruction::INVOKE_INTERFACE:
     case Instruction::INVOKE_INTERFACE_RANGE: {
       bool is_range =  (inst->Opcode() == Instruction::INVOKE_INTERFACE_RANGE);
-      ArtMethod* abs_method = VerifyInvocationArgs(inst, METHOD_INTERFACE, is_range, false);
+      ArtMethod* abs_method = VerifyInvocationArgs(inst, METHOD_INTERFACE, is_range);
       if (abs_method != nullptr) {
         mirror::Class* called_interface = abs_method->GetDeclaringClass();
         if (!called_interface->IsInterface() && !called_interface->IsObjectClass()) {
@@ -3639,9 +3640,8 @@
   return *common_super;
 }
 
-// TODO Maybe I should just add a METHOD_SUPER to MethodType?
 ArtMethod* MethodVerifier::ResolveMethodAndCheckAccess(
-    uint32_t dex_method_idx, MethodType method_type, bool is_super) {
+    uint32_t dex_method_idx, MethodType method_type) {
   const DexFile::MethodId& method_id = dex_file_->GetMethodId(dex_method_idx);
   const RegType& klass_type = ResolveClassAndCheckAccess(method_id.class_idx_);
   if (klass_type.IsConflict()) {
@@ -3668,9 +3668,10 @@
       res_method = klass->FindDirectMethod(name, signature, pointer_size);
     } else if (method_type == METHOD_INTERFACE) {
       res_method = klass->FindInterfaceMethod(name, signature, pointer_size);
-    } else if (is_super && klass->IsInterface()) {
+    } else if (method_type == METHOD_SUPER && klass->IsInterface()) {
       res_method = klass->FindInterfaceMethod(name, signature, pointer_size);
     } else {
+      DCHECK(method_type == METHOD_VIRTUAL || method_type == METHOD_SUPER);
       res_method = klass->FindVirtualMethod(name, signature, pointer_size);
     }
     if (res_method != nullptr) {
@@ -3679,7 +3680,9 @@
       // If a virtual or interface method wasn't found with the expected type, look in
       // the direct methods. This can happen when the wrong invoke type is used or when
       // a class has changed, and will be flagged as an error in later checks.
-      if (method_type == METHOD_INTERFACE || method_type == METHOD_VIRTUAL) {
+      if (method_type == METHOD_INTERFACE ||
+          method_type == METHOD_VIRTUAL ||
+          method_type == METHOD_SUPER) {
         res_method = klass->FindDirectMethod(name, signature, pointer_size);
       }
       if (res_method == nullptr) {
@@ -3742,7 +3745,7 @@
     return res_method;
   }
   // Check that invoke-virtual and invoke-super are not used on private methods of the same class.
-  if (res_method->IsPrivate() && method_type == METHOD_VIRTUAL) {
+  if (res_method->IsPrivate() && (method_type == METHOD_VIRTUAL || method_type == METHOD_SUPER)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invoke-super/virtual can't be used on private method "
                                       << PrettyMethod(res_method);
     return nullptr;
@@ -3751,7 +3754,9 @@
   // target method.
   if ((method_type == METHOD_DIRECT && (!res_method->IsDirect() || res_method->IsStatic())) ||
       (method_type == METHOD_STATIC && !res_method->IsStatic()) ||
-      ((method_type == METHOD_VIRTUAL || method_type == METHOD_INTERFACE) && res_method->IsDirect())
+      ((method_type == METHOD_SUPER ||
+        method_type == METHOD_VIRTUAL ||
+        method_type == METHOD_INTERFACE) && res_method->IsDirect())
       ) {
     Fail(VERIFY_ERROR_CLASS_CHANGE) << "invoke type (" << method_type << ") does not match method "
                                        " type of " << PrettyMethod(res_method);
@@ -3937,12 +3942,12 @@
 };
 
 ArtMethod* MethodVerifier::VerifyInvocationArgs(
-    const Instruction* inst, MethodType method_type, bool is_range, bool is_super) {
+    const Instruction* inst, MethodType method_type, bool is_range) {
   // Resolve the method. This could be an abstract or concrete method depending on what sort of call
   // we're making.
   const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
 
-  ArtMethod* res_method = ResolveMethodAndCheckAccess(method_idx, method_type, is_super);
+  ArtMethod* res_method = ResolveMethodAndCheckAccess(method_idx, method_type);
   if (res_method == nullptr) {  // error or class is unresolved
     // Check what we can statically.
     if (!have_pending_hard_failure_) {
@@ -3953,8 +3958,7 @@
 
   // If we're using invoke-super(method), make sure that the executing method's class' superclass
   // has a vtable entry for the target method. Or the target is on a interface.
-  if (is_super) {
-    DCHECK(method_type == METHOD_VIRTUAL);
+  if (method_type == METHOD_SUPER) {
     if (res_method->GetDeclaringClass()->IsInterface()) {
       // TODO Fill in this part. Verify what we can...
       if (Runtime::Current()->IsAotCompiler()) {
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index ec0a8f9..a26e0fb 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -57,7 +57,8 @@
   METHOD_UNKNOWN  = 0,
   METHOD_DIRECT,      // <init>, private
   METHOD_STATIC,      // static
-  METHOD_VIRTUAL,     // virtual, super
+  METHOD_VIRTUAL,     // virtual
+  METHOD_SUPER,       // super
   METHOD_INTERFACE    // interface
 };
 std::ostream& operator<<(std::ostream& os, const MethodType& rhs);
@@ -654,7 +655,7 @@
    * the referrer can access the resolved method.
    * Does not throw exceptions.
    */
-  ArtMethod* ResolveMethodAndCheckAccess(uint32_t method_idx, MethodType method_type, bool is_super)
+  ArtMethod* ResolveMethodAndCheckAccess(uint32_t method_idx, MethodType method_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   /*
@@ -679,9 +680,7 @@
    * Returns the resolved method on success, null on failure (with *failure
    * set appropriately).
    */
-  ArtMethod* VerifyInvocationArgs(const Instruction* inst,
-                                          MethodType method_type,
-                                          bool is_range, bool is_super)
+  ArtMethod* VerifyInvocationArgs(const Instruction* inst, MethodType method_type, bool is_range)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Similar checks to the above, but on the proto. Will be used when the method cannot be
diff --git a/runtime/zip_archive.cc b/runtime/zip_archive.cc
index 9daaf8e..d96fb42 100644
--- a/runtime/zip_archive.cc
+++ b/runtime/zip_archive.cc
@@ -133,4 +133,8 @@
   return new ZipEntry(handle_, zip_entry.release());
 }
 
+ZipArchive::~ZipArchive() {
+  CloseArchive(handle_);
+}
+
 }  // namespace art
diff --git a/runtime/zip_archive.h b/runtime/zip_archive.h
index 717eb8c..42bf55c 100644
--- a/runtime/zip_archive.h
+++ b/runtime/zip_archive.h
@@ -63,9 +63,7 @@
 
   ZipEntry* Find(const char* name, std::string* error_msg) const;
 
-  ~ZipArchive() {
-    CloseArchive(handle_);
-  }
+  ~ZipArchive();
 
  private:
   explicit ZipArchive(ZipArchiveHandle handle) : handle_(handle) {}
diff --git a/test/004-StackWalk/src/Main.java b/test/004-StackWalk/src/Main.java
index 883ce2c..072b1d0 100644
--- a/test/004-StackWalk/src/Main.java
+++ b/test/004-StackWalk/src/Main.java
@@ -2,14 +2,14 @@
   public Main() {
   }
 
+  boolean doThrow = false;
+
   int $noinline$f() throws Exception {
     g(1);
     g(2);
 
-    // This loop currently defeats inlining of `f`.
-    for (int i = 0; i < 10; i++) {
-      Thread.sleep(0);
-    }
+    // This currently defeats inlining of `f`.
+    if (doThrow) { throw new Error(); }
     return 0;
   }
 
diff --git a/test/048-reflect-v8/build b/test/048-reflect-v8/build
new file mode 100644
index 0000000..4ea1838
--- /dev/null
+++ b/test/048-reflect-v8/build
@@ -0,0 +1,28 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Make us exit on a failure.
+set -e
+
+# Hard-wired use of experimental jack.
+# TODO: fix this temporary work-around for lambdas, see b/19467889
+export USE_JACK=true
+export JACK_SERVER=false
+export JACK_REPOSITORY="${ANDROID_BUILD_TOP}/prebuilts/sdk/tools/jacks"
+# e.g. /foo/bar/jack-3.10.ALPHA.jar -> 3.10.ALPHA
+export JACK_VERSION="$(find "$JACK_REPOSITORY" -name '*ALPHA*' | sed 's/.*jack-//g' | sed 's/[.]jar//g')"
+
+./default-build "$@" --experimental default-methods
diff --git a/test/048-reflect-v8/expected.txt b/test/048-reflect-v8/expected.txt
new file mode 100644
index 0000000..2d0b4cc
--- /dev/null
+++ b/test/048-reflect-v8/expected.txt
@@ -0,0 +1,4 @@
+Main$DefaultInterface is default = yes
+Main$RegularInterface is default = no
+Main$ImplementsWithDefault is default = yes
+Main$ImplementsWithRegular is default = no
diff --git a/test/048-reflect-v8/info.txt b/test/048-reflect-v8/info.txt
new file mode 100644
index 0000000..a336d30
--- /dev/null
+++ b/test/048-reflect-v8/info.txt
@@ -0,0 +1 @@
+Test reflection for 1.8 APIs
diff --git a/test/048-reflect-v8/run b/test/048-reflect-v8/run
new file mode 100644
index 0000000..ba3318a
--- /dev/null
+++ b/test/048-reflect-v8/run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Ensure that the default methods are turned on for dalvikvm and dex2oat
+${RUN} "$@" --experimental default-methods
diff --git a/test/048-reflect-v8/src/Main.java b/test/048-reflect-v8/src/Main.java
new file mode 100644
index 0000000..7fa2a92
--- /dev/null
+++ b/test/048-reflect-v8/src/Main.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  interface DefaultInterface {
+    default void sayHi() {
+      System.out.println("hi default");
+    }
+  }
+
+  interface RegularInterface {
+    void sayHi();
+  }
+
+  class ImplementsWithDefault implements DefaultInterface {}
+  class ImplementsWithRegular implements RegularInterface {
+    public void sayHi() {
+      System.out.println("hello specific");
+    }
+  }
+
+  private static void printIsDefault(Class<?> klass) {
+    Method m;
+    try {
+      m = klass.getMethod("sayHi");
+    } catch (Throwable t) {
+      System.out.println(t);
+      return;
+    }
+
+    boolean isDefault = m.isDefault();
+    System.out.println(klass.getName() + " is default = " + (isDefault ? "yes" : "no"));
+  }
+
+  public static void main(String[] args) {
+    printIsDefault(DefaultInterface.class);
+    printIsDefault(RegularInterface.class);
+    printIsDefault(ImplementsWithDefault.class);
+    printIsDefault(ImplementsWithRegular.class);
+  }
+}
diff --git a/test/127-secondarydex/build b/test/127-checker-secondarydex/build
similarity index 100%
rename from test/127-secondarydex/build
rename to test/127-checker-secondarydex/build
diff --git a/test/127-secondarydex/expected.txt b/test/127-checker-secondarydex/expected.txt
similarity index 100%
rename from test/127-secondarydex/expected.txt
rename to test/127-checker-secondarydex/expected.txt
diff --git a/test/127-secondarydex/info.txt b/test/127-checker-secondarydex/info.txt
similarity index 100%
rename from test/127-secondarydex/info.txt
rename to test/127-checker-secondarydex/info.txt
diff --git a/test/127-secondarydex/run b/test/127-checker-secondarydex/run
similarity index 100%
rename from test/127-secondarydex/run
rename to test/127-checker-secondarydex/run
diff --git a/test/127-secondarydex/src/Main.java b/test/127-checker-secondarydex/src/Main.java
similarity index 100%
rename from test/127-secondarydex/src/Main.java
rename to test/127-checker-secondarydex/src/Main.java
diff --git a/test/127-secondarydex/src/Super.java b/test/127-checker-secondarydex/src/Super.java
similarity index 100%
rename from test/127-secondarydex/src/Super.java
rename to test/127-checker-secondarydex/src/Super.java
diff --git a/test/127-secondarydex/src/Test.java b/test/127-checker-secondarydex/src/Test.java
similarity index 78%
rename from test/127-secondarydex/src/Test.java
rename to test/127-checker-secondarydex/src/Test.java
index 8547e79..266ed19 100644
--- a/test/127-secondarydex/src/Test.java
+++ b/test/127-checker-secondarydex/src/Test.java
@@ -23,6 +23,13 @@
         System.out.println("Test");
     }
 
+    /// CHECK-START: java.lang.Integer Test.toInteger() ssa_builder (after)
+    /// CHECK:         LoadClass needs_access_check:false klass:java.lang.Integer
+
+    public Integer toInteger() {
+        return new Integer(42);
+    }
+
     public String toString() {
         return new String("Test");
     }
diff --git a/test/137-cfi/cfi.cc b/test/137-cfi/cfi.cc
index 9bfe429..77301d2 100644
--- a/test/137-cfi/cfi.cc
+++ b/test/137-cfi/cfi.cc
@@ -76,7 +76,7 @@
     }
   }
 
-  printf("Can not find %s in backtrace:\n", seq[cur_search_index].c_str());
+  printf("Cannot find %s in backtrace:\n", seq[cur_search_index].c_str());
   for (Backtrace::const_iterator it = bt->begin(); it != bt->end(); ++it) {
     if (BacktraceMap::IsValid(it->map)) {
       printf("  %s\n", it->func_name.c_str());
@@ -112,7 +112,7 @@
 
   std::unique_ptr<Backtrace> bt(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, GetTid()));
   if (!bt->Unwind(0, nullptr)) {
-    printf("Can not unwind in process.\n");
+    printf("Cannot unwind in process.\n");
     return JNI_FALSE;
   } else if (bt->NumFrames() == 0) {
     printf("No frames for unwind in process.\n");
@@ -205,7 +205,7 @@
   std::unique_ptr<Backtrace> bt(Backtrace::Create(pid, BACKTRACE_CURRENT_THREAD));
   bool result = true;
   if (!bt->Unwind(0, nullptr)) {
-    printf("Can not unwind other process.\n");
+    printf("Cannot unwind other process.\n");
     result = false;
   } else if (bt->NumFrames() == 0) {
     printf("No frames for unwind of other process.\n");
diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java
index 0640b36..bcb697a 100644
--- a/test/141-class-unload/src/Main.java
+++ b/test/141-class-unload/src/Main.java
@@ -79,7 +79,7 @@
 
     private static void testUnloadClass(Constructor constructor) throws Exception {
         WeakReference<Class> klass = setUpUnloadClass(constructor);
-        // No strong refernces to class loader, should get unloaded.
+        // No strong references to class loader, should get unloaded.
         Runtime.getRuntime().gc();
         WeakReference<Class> klass2 = setUpUnloadClass(constructor);
         Runtime.getRuntime().gc();
@@ -91,7 +91,7 @@
     private static void testUnloadLoader(Constructor constructor)
         throws Exception {
       WeakReference<ClassLoader> loader = setUpUnloadLoader(constructor, true);
-      // No strong refernces to class loader, should get unloaded.
+      // No strong references to class loader, should get unloaded.
       Runtime.getRuntime().gc();
       // If the weak reference is cleared, then it was unloaded.
       System.out.println(loader.get());
@@ -109,7 +109,7 @@
 
     private static void testLoadAndUnloadLibrary(Constructor constructor) throws Exception {
         WeakReference<ClassLoader> loader = setUpLoadLibrary(constructor);
-        // No strong refernces to class loader, should get unloaded.
+        // No strong references to class loader, should get unloaded.
         Runtime.getRuntime().gc();
         // If the weak reference is cleared, then it was unloaded.
         System.out.println(loader.get());
diff --git a/test/482-checker-loop-back-edge-use/src/Main.java b/test/482-checker-loop-back-edge-use/src/Main.java
index d0b33b9..f8f0aa3 100644
--- a/test/482-checker-loop-back-edge-use/src/Main.java
+++ b/test/482-checker-loop-back-edge-use/src/Main.java
@@ -40,6 +40,9 @@
   /// CHECK-EVAL:    <<GotoLiv2>> + 2 == <<ArgLoopUse2>>
 
   public static void loop2(boolean incoming) {
+    // Add some code at entry to avoid having the entry block be a pre header.
+    // This avoids having to create a synthesized block.
+    System.out.println("Enter");
     while (true) {
       System.out.println("foo");
       while (incoming) {}
@@ -170,6 +173,9 @@
   /// CHECK-EVAL:    <<GotoLiv1>> + 2 == <<ArgLoopUse>>
 
   public static void loop9() {
+    // Add some code at entry to avoid having the entry block be a pre header.
+    // This avoids having to create a synthesized block.
+    System.out.println("Enter");
     while (Runtime.getRuntime() != null) {
       // 'incoming' must only have a use in the inner loop.
       boolean incoming = field;
diff --git a/test/510-checker-try-catch/smali/SsaBuilder.smali b/test/510-checker-try-catch/smali/SsaBuilder.smali
index 710e849..a6a5bfe 100644
--- a/test/510-checker-try-catch/smali/SsaBuilder.smali
+++ b/test/510-checker-try-catch/smali/SsaBuilder.smali
@@ -21,7 +21,7 @@
 
 ## CHECK-START: int SsaBuilder.testSimplifyCatchBlock(int, int, int) ssa_builder (after)
 
-## CHECK:      name             "B0"
+## CHECK:      name             "B1"
 ## CHECK-NEXT: from_bci
 ## CHECK-NEXT: to_bci
 ## CHECK-NEXT: predecessors
@@ -39,12 +39,15 @@
 ## CHECK:      name             "<<BExtracted>>"
 ## CHECK-NEXT: from_bci
 ## CHECK-NEXT: to_bci
-## CHECK-NEXT: predecessors     "B0" "<<BCatch>>"
+## CHECK-NEXT: predecessors     "B1" "<<BCatch>>"
 ## CHECK-NOT:  flags            "catch_block"
 ## CHECK:      Add
 
 .method public static testSimplifyCatchBlock(III)I
     .registers 4
+    # Avoid entry block be a pre header, which leads to
+    # the cfg simplifier to add a synthesized block.
+    goto :catch_all
 
     :catch_all
     add-int/2addr p0, p1
diff --git a/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
index 30a648d..971ad84 100644
--- a/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
+++ b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -91,9 +91,7 @@
    goto :other_loop_entry
 .end method
 
-# Check that if a irreducible loop entry is dead, the loop can become
-# natural.
-# We start with:
+# Check that dce does not apply for irreducible loops.
 #
 #        entry
 #       /    \
@@ -106,18 +104,8 @@
 ## CHECK-START: int IrreducibleLoop.dce(int) dead_code_elimination (before)
 ## CHECK: irreducible:true
 
-# And end with:
-#
-#        entry
-#       /
-#      /
-# loop_entry
-#    /    \-
-#  exit    \-
-#           other_loop_entry
-
 ## CHECK-START: int IrreducibleLoop.dce(int) dead_code_elimination (after)
-## CHECK-NOT: irreducible:true
+## CHECK: irreducible:true
 .method public static dce(I)I
    .registers 3
    const/16 v0, 42
diff --git a/test/563-checker-fakestring/smali/TestCase.smali b/test/563-checker-fakestring/smali/TestCase.smali
index cd52f3d..54312a4 100644
--- a/test/563-checker-fakestring/smali/TestCase.smali
+++ b/test/563-checker-fakestring/smali/TestCase.smali
@@ -64,9 +64,15 @@
 
 .end method
 
-# Test deoptimization between String's allocation and initialization.
+# Test deoptimization between String's allocation and initialization. When not
+# compiling --debuggable, the NewInstance will be optimized out.
 
 ## CHECK-START: int TestCase.deoptimizeNewInstance(int[], byte[]) register (after)
+## CHECK:         <<Null:l\d+>>   NullConstant
+## CHECK:                         Deoptimize env:[[<<Null>>,{{.*]]}}
+## CHECK:                         InvokeStaticOrDirect method_name:java.lang.String.<init>
+
+## CHECK-START-DEBUGGABLE: int TestCase.deoptimizeNewInstance(int[], byte[]) register (after)
 ## CHECK:         <<String:l\d+>> NewInstance
 ## CHECK:                         Deoptimize env:[[<<String>>,{{.*]]}}
 ## CHECK:                         InvokeStaticOrDirect method_name:java.lang.String.<init>
@@ -98,3 +104,79 @@
    return v2
 
 .end method
+
+# Test that a redundant NewInstance is removed if not used and not compiling
+# --debuggable.
+
+## CHECK-START: java.lang.String TestCase.removeNewInstance(byte[]) register (after)
+## CHECK-NOT:     NewInstance
+## CHECK-NOT:     LoadClass
+
+## CHECK-START-DEBUGGABLE: java.lang.String TestCase.removeNewInstance(byte[]) register (after)
+## CHECK:         NewInstance
+
+.method public static removeNewInstance([B)Ljava/lang/String;
+   .registers 5
+
+   new-instance v0, Ljava/lang/String;
+   const-string v1, "UTF8"
+   invoke-direct {v0, p0, v1}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+   return-object v0
+
+.end method
+
+# Test that the compiler does not assume that the first argument of String.<init>
+# is a NewInstance by inserting an irreducible loop between them (b/26676472).
+
+# We verify the type of the input instruction (Phi) in debuggable mode, because
+# it is eliminated by later stages of SsaBuilder otherwise.
+
+## CHECK-START-DEBUGGABLE: java.lang.String TestCase.thisNotNewInstance1(byte[], boolean) register (after)
+## CHECK-DAG:                   InvokeStaticOrDirect env:[[<<Phi:l\d+>>,{{.*]]}}
+## CHECK-DAG:     <<Phi>>       Phi
+
+.method public static thisNotNewInstance1([BZ)Ljava/lang/String;
+   .registers 5
+
+   new-instance v0, Ljava/lang/String;
+
+   # Irreducible loop
+   if-eqz p1, :loop_entry
+   :loop_header
+   const v1, 0x1
+   xor-int p1, p1, v1
+   :loop_entry
+   if-eqz p1, :string_init
+   goto :loop_header
+
+   :string_init
+   const-string v1, "UTF8"
+   invoke-direct {v0, p0, v1}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+   return-object v0
+
+.end method
+
+## CHECK-START-DEBUGGABLE: java.lang.String TestCase.thisNotNewInstance2(byte[], boolean) register (after)
+## CHECK-DAG:                   InvokeStaticOrDirect env:[[<<Phi:l\d+>>,{{.*]]}}
+## CHECK-DAG:     <<Phi>>       Phi
+
+.method public static thisNotNewInstance2([BZ)Ljava/lang/String;
+   .registers 5
+
+   new-instance v0, Ljava/lang/String;
+
+   # Irreducible loop
+   if-eqz p1, :loop_entry
+   :loop_header
+   if-eqz p1, :string_init
+   :loop_entry
+   const v1, 0x1
+   xor-int p1, p1, v1
+   goto :loop_header
+
+   :string_init
+   const-string v1, "UTF8"
+   invoke-direct {v0, p0, v1}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+   return-object v0
+
+.end method
diff --git a/test/563-checker-fakestring/src/Main.java b/test/563-checker-fakestring/src/Main.java
index 750f718..1ac8a5b 100644
--- a/test/563-checker-fakestring/src/Main.java
+++ b/test/563-checker-fakestring/src/Main.java
@@ -57,5 +57,26 @@
         }
       }
     }
+
+    {
+      Method m = c.getMethod("removeNewInstance", byte[].class);
+      String result = (String) m.invoke(null, new Object[] { testData });
+      assertEqual(testString, result);
+    }
+
+    {
+      Method m = c.getMethod("thisNotNewInstance1", byte[].class, boolean.class);
+      String result = (String) m.invoke(null, new Object[] { testData, true });
+      assertEqual(testString, result);
+      result = (String) m.invoke(null, new Object[] { testData, false });
+      assertEqual(testString, result);
+    }
+    {
+      Method m = c.getMethod("thisNotNewInstance2", byte[].class, boolean.class);
+      String result = (String) m.invoke(null, new Object[] { testData, true });
+      assertEqual(testString, result);
+      result = (String) m.invoke(null, new Object[] { testData, false });
+      assertEqual(testString, result);
+    }
   }
 }
diff --git a/test/564-checker-bitcount/expected.txt b/test/564-checker-bitcount/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/564-checker-bitcount/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/564-checker-bitcount/info.txt b/test/564-checker-bitcount/info.txt
new file mode 100644
index 0000000..57db66b
--- /dev/null
+++ b/test/564-checker-bitcount/info.txt
@@ -0,0 +1 @@
+Unit test for 32-bit and 64-bit bit count operation.
diff --git a/test/564-checker-bitcount/src/Main.java b/test/564-checker-bitcount/src/Main.java
new file mode 100644
index 0000000..b250145
--- /dev/null
+++ b/test/564-checker-bitcount/src/Main.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // TODO: make this work when b/26700769 is done.
+  //
+  // CHECK-START-X86_64: int Main.bits32(int) disassembly (after)
+  // CHECK-DAG: popcnt
+  //
+  // CHECK-START-X86_64: int Main.bits32(int) disassembly (after)
+  // CHECK-NOT: call
+  private static int bits32(int x) {
+    return Integer.bitCount(x);
+  }
+
+  // TODO: make this work when b/26700769 is done.
+  //
+  // CHECK-START-X86_64: int Main.bits64(long) disassembly (after)
+  // CHECK-DAG: popcnt
+  //
+  // CHECK-START-X86_64: int Main.bits64(long) disassembly (after)
+  // CHECK-NOT: call
+  private static int bits64(long x) {
+    return Long.bitCount(x);
+  }
+
+  public static void main(String args[]) {
+    expectEquals32(bits32(0x00000000), 0);
+    expectEquals32(bits32(0x00000001), 1);
+    expectEquals32(bits32(0x10000000), 1);
+    expectEquals32(bits32(0x10000001), 2);
+    expectEquals32(bits32(0x00000003), 2);
+    expectEquals32(bits32(0x70000000), 3);
+    expectEquals32(bits32(0x000F0000), 4);
+    expectEquals32(bits32(0x00001111), 4);
+    expectEquals32(bits32(0x11110000), 4);
+    expectEquals32(bits32(0x11111111), 8);
+    expectEquals32(bits32(0x12345678), 13);
+    expectEquals32(bits32(0x9ABCDEF0), 19);
+    expectEquals32(bits32(0xFFFFFFFF), 32);
+
+    for (int i = 0; i < 32; i++) {
+      expectEquals32(bits32(1 << i), 1);
+    }
+
+    expectEquals64(bits64(0x0000000000000000L), 0);
+    expectEquals64(bits64(0x0000000000000001L), 1);
+    expectEquals64(bits64(0x1000000000000000L), 1);
+    expectEquals64(bits64(0x1000000000000001L), 2);
+    expectEquals64(bits64(0x0000000000000003L), 2);
+    expectEquals64(bits64(0x7000000000000000L), 3);
+    expectEquals64(bits64(0x000F000000000000L), 4);
+    expectEquals64(bits64(0x0000000011111111L), 8);
+    expectEquals64(bits64(0x1111111100000000L), 8);
+    expectEquals64(bits64(0x1111111111111111L), 16);
+    expectEquals64(bits64(0x123456789ABCDEF1L), 33);
+    expectEquals64(bits64(0xFFFFFFFFFFFFFFFFL), 64);
+
+    for (int i = 0; i < 64; i++) {
+      expectEquals64(bits64(1L << i), 1);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals32(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+  private static void expectEquals64(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/564-checker-inline-loop/expected.txt b/test/564-checker-inline-loop/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/564-checker-inline-loop/expected.txt
diff --git a/test/564-checker-inline-loop/info.txt b/test/564-checker-inline-loop/info.txt
new file mode 100644
index 0000000..a590bc6
--- /dev/null
+++ b/test/564-checker-inline-loop/info.txt
@@ -0,0 +1 @@
+Tests inlining of loops in the optimizing compiler.
diff --git a/test/564-checker-inline-loop/src/Main.java b/test/564-checker-inline-loop/src/Main.java
new file mode 100644
index 0000000..6929913
--- /dev/null
+++ b/test/564-checker-inline-loop/src/Main.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: int Main.inlineLoop() inliner (before)
+  /// CHECK-DAG:     <<Invoke:i\d+>>  InvokeStaticOrDirect
+  /// CHECK-DAG:                      Return [<<Invoke>>]
+
+  /// CHECK-START: int Main.inlineLoop() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  /// CHECK-START: int Main.inlineLoop() inliner (after)
+  /// CHECK-DAG:     <<Constant:i\d+>>   IntConstant 42
+  /// CHECK-DAG:                         Return [<<Constant>>]
+
+  /// CHECK-START: int Main.inlineLoop() licm (after)
+  /// CHECK:                         Goto loop:{{B\d+}}
+
+  public static int inlineLoop() {
+    return loopMethod();
+  }
+
+  /// CHECK-START: void Main.inlineWithinLoop() inliner (before)
+  /// CHECK:      InvokeStaticOrDirect
+
+  /// CHECK-START: void Main.inlineWithinLoop() inliner (after)
+  /// CHECK-NOT:  InvokeStaticOrDirect
+
+  /// CHECK-START: void Main.inlineWithinLoop() licm (after)
+  /// CHECK-DAG:  Goto loop:<<OuterLoop:B\d+>> outer_loop:none
+  /// CHECK-DAG:  Goto outer_loop:<<OuterLoop>>
+
+  public static void inlineWithinLoop() {
+    while (doLoop) {
+      loopMethod();
+    }
+  }
+
+  public static int loopMethod() {
+    while (doLoop) {}
+    return 42;
+  }
+
+  public static boolean doLoop = false;
+
+  public static void main(String[] args) {
+    inlineLoop();
+    inlineWithinLoop();
+  }
+}
diff --git a/test/564-checker-irreducible-loop/expected.txt b/test/564-checker-irreducible-loop/expected.txt
new file mode 100644
index 0000000..d81cc07
--- /dev/null
+++ b/test/564-checker-irreducible-loop/expected.txt
@@ -0,0 +1 @@
+42
diff --git a/test/564-checker-irreducible-loop/info.txt b/test/564-checker-irreducible-loop/info.txt
new file mode 100644
index 0000000..1e0dd02
--- /dev/null
+++ b/test/564-checker-irreducible-loop/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing in the presence of
+an irreducible loop.
diff --git a/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..b82ed92
--- /dev/null
+++ b/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -0,0 +1,61 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+## CHECK-START-X86: int IrreducibleLoop.simpleLoop(int) dead_code_elimination (before)
+## CHECK-DAG: <<Method:(i|j)\d+>> CurrentMethod
+## CHECK-DAG: <<Constant:i\d+>>   IntConstant 42
+## CHECK-DAG:                     InvokeStaticOrDirect [<<Constant>>,<<Method>>] loop:{{B\d+}} irreducible:true
+## CHECK-DAG:                     InvokeStaticOrDirect [<<Constant>>,<<Method>>] loop:none
+.method public static simpleLoop(I)I
+   .registers 3
+   const/16 v0, 42
+   if-eqz p0, :loop_entry
+   goto :other_loop_pre_entry
+
+   # The then part: beginning of the irreducible loop.
+   :loop_entry
+   if-nez p0, :exit
+   invoke-static {v0},LIrreducibleLoop;->$noinline$m(I)V
+   :other_loop_entry
+   goto :loop_entry
+
+   # The else part: a block uses the ArtMethod and branches to
+   # a block that doesn't. The register allocator used to trip there, as the
+   # ArtMethod was a live_in of the last block before the loop, but did not have
+   # a location due to our liveness analysis.
+   :other_loop_pre_entry
+   if-eqz p0, :other_loop_entry
+   invoke-static {v0},LIrreducibleLoop;->$noinline$m(I)V
+   goto :other_loop_entry
+
+   :exit
+   return v0
+.end method
+
+.method public static $noinline$m(I)V
+   .registers 3
+   const/16 v0, 0
+   sget-boolean v1,LIrreducibleLoop;->doThrow:Z
+   if-eqz v1, :exit
+   # Prevent inlining.
+   throw v0
+   :exit
+   return-void
+.end method
+
+.field public static doThrow:Z
diff --git a/test/564-checker-irreducible-loop/src/Main.java b/test/564-checker-irreducible-loop/src/Main.java
new file mode 100644
index 0000000..94e3357
--- /dev/null
+++ b/test/564-checker-irreducible-loop/src/Main.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    Method m = c.getMethod("simpleLoop", int.class);
+    Object[] arguments = { 42 };
+    System.out.println(m.invoke(null, arguments));
+  }
+}
diff --git a/test/971-iface-super-partial-compile-generated/build b/test/971-iface-super/build
similarity index 100%
rename from test/971-iface-super-partial-compile-generated/build
rename to test/971-iface-super/build
diff --git a/test/971-iface-super-partial-compile-generated/expected.txt b/test/971-iface-super/expected.txt
similarity index 100%
rename from test/971-iface-super-partial-compile-generated/expected.txt
rename to test/971-iface-super/expected.txt
diff --git a/test/971-iface-super-partial-compile-generated/info.txt b/test/971-iface-super/info.txt
similarity index 100%
rename from test/971-iface-super-partial-compile-generated/info.txt
rename to test/971-iface-super/info.txt
diff --git a/test/971-iface-super-partial-compile-generated/run b/test/971-iface-super/run
similarity index 100%
rename from test/971-iface-super-partial-compile-generated/run
rename to test/971-iface-super/run
diff --git a/test/971-iface-super-partial-compile-generated/util-src/generate_java.py b/test/971-iface-super/util-src/generate_java.py
similarity index 100%
rename from test/971-iface-super-partial-compile-generated/util-src/generate_java.py
rename to test/971-iface-super/util-src/generate_java.py
diff --git a/test/971-iface-super-partial-compile-generated/util-src/generate_smali.py b/test/971-iface-super/util-src/generate_smali.py
similarity index 100%
rename from test/971-iface-super-partial-compile-generated/util-src/generate_smali.py
rename to test/971-iface-super/util-src/generate_smali.py
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index f62d0e9..48e10c3 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -70,6 +70,7 @@
 	$(hide) DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \
 	  SMALI=$(abspath $(HOST_OUT_EXECUTABLES)/smali) \
 	  DXMERGER=$(abspath $(HOST_OUT_EXECUTABLES)/dexmerger) \
+	  JACK_VERSION=$(JACK_DEFAULT_VERSION) \
 	  JACK=$(abspath $(JACK)) \
 	  JACK_CLASSPATH=$(TARGET_JACK_CLASSPATH) \
 	  JILL_JAR=$(abspath $(JILL_JAR)) \
@@ -242,7 +243,7 @@
   968-default-partial-compile-generated \
   969-iface-super \
   970-iface-super-resolution-generated \
-  971-iface-super-partial-compile-generated
+  971-iface-super
 
 # Check if we have python3 to run our tests.
 ifeq ($(wildcard /usr/bin/python3),)
@@ -620,6 +621,11 @@
 
 TEST_ART_BROKEN_MULTI_IMAGE_RUN_TESTS :=
 
+# Test is flaky b/26733951.
+ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+    $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
+    $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),961-default-iface-resolution-generated,$(ALL_ADDRESS_SIZES))
+
 # Clear variables ahead of appending to them when defining tests.
 $(foreach target, $(TARGET_TYPES), $(eval ART_RUN_TEST_$(call name-to-var,$(target))_RULES :=))
 $(foreach target, $(TARGET_TYPES), \
@@ -967,6 +973,7 @@
 	    JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \
 	    SMALI=$(abspath $(HOST_OUT_EXECUTABLES)/smali) \
 	    DXMERGER=$(abspath $(HOST_OUT_EXECUTABLES)/dexmerger) \
+	    JACK_VERSION=$(JACK_DEFAULT_VERSION) \
 	    JACK=$(abspath $(JACK)) \
 	    JACK_CLASSPATH=$$(PRIVATE_JACK_CLASSPATH) \
 	    JILL_JAR=$(abspath $(JILL_JAR)) \
diff --git a/test/127-secondarydex/src/Test.java b/test/ProfileTestMultiDex/Main.java
similarity index 66%
copy from test/127-secondarydex/src/Test.java
copy to test/ProfileTestMultiDex/Main.java
index 8547e79..41532ea 100644
--- a/test/127-secondarydex/src/Test.java
+++ b/test/ProfileTestMultiDex/Main.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2016 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,16 +14,14 @@
  * limitations under the License.
  */
 
-public class Test extends Super {
-    public void test(Test t) {
-        t.print();
-    }
-
-    private void print() {
-        System.out.println("Test");
-    }
-
-    public String toString() {
-        return new String("Test");
-    }
+class Main {
+  public String getA() {
+    return "A";
+  }
+  public String getB() {
+    return "B";
+  }
+  public String getC() {
+    return "C";
+  }
 }
diff --git a/test/127-secondarydex/src/Test.java b/test/ProfileTestMultiDex/Second.java
similarity index 66%
copy from test/127-secondarydex/src/Test.java
copy to test/ProfileTestMultiDex/Second.java
index 8547e79..4ac5abc 100644
--- a/test/127-secondarydex/src/Test.java
+++ b/test/ProfileTestMultiDex/Second.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2016 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,16 +14,14 @@
  * limitations under the License.
  */
 
-public class Test extends Super {
-    public void test(Test t) {
-        t.print();
-    }
-
-    private void print() {
-        System.out.println("Test");
-    }
-
-    public String toString() {
-        return new String("Test");
-    }
+class Second {
+  public String getX() {
+    return "X";
+  }
+  public String getY() {
+    return "Y";
+  }
+  public String getZ() {
+    return "Z";
+  }
 }
diff --git a/test/ProfileTestMultiDex/main.jpp b/test/ProfileTestMultiDex/main.jpp
new file mode 100644
index 0000000..f2e3b4e
--- /dev/null
+++ b/test/ProfileTestMultiDex/main.jpp
@@ -0,0 +1,3 @@
+main:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Second
diff --git a/test/ProfileTestMultiDex/main.list b/test/ProfileTestMultiDex/main.list
new file mode 100644
index 0000000..44ba78e
--- /dev/null
+++ b/test/ProfileTestMultiDex/main.list
@@ -0,0 +1 @@
+Main.class
diff --git a/tools/art b/tools/art
index 304a9d0..d91b451 100644
--- a/tools/art
+++ b/tools/art
@@ -75,6 +75,7 @@
 ANDROID_ROOT=$PROG_DIR/..
 LIBDIR=$(find_libdir)
 LD_LIBRARY_PATH=$ANDROID_ROOT/$LIBDIR
+DEBUG_OPTION=""
 
 DELETE_ANDROID_DATA=false
 # If ANDROID_DATA is the system ANDROID_DATA or is not set, use our own,
@@ -87,6 +88,7 @@
 
 if [ z"$PERF" != z ]; then
   invoke_with="perf record -o $ANDROID_DATA/perf.data -e cycles:u $invoke_with"
+  DEBUG_OPTION="-Xcompiler-option --generate-debug-info"
 fi
 
 # We use the PIC core image to work with perf.
@@ -99,7 +101,7 @@
     -XXlib:$LIBART \
     -Xnorelocate \
     -Ximage:$ANDROID_ROOT/framework/core-optimizing-pic.art \
-    -Xcompiler-option --generate-debug-info \
+    $DEBUG_OPTION \
     "$@"
 
 EXIT_STATUS=$?
diff --git a/tools/libcore_failures_concurrent_collector.txt b/tools/libcore_failures_concurrent_collector.txt
index e55c000..6ea83d2 100644
--- a/tools/libcore_failures_concurrent_collector.txt
+++ b/tools/libcore_failures_concurrent_collector.txt
@@ -30,6 +30,7 @@
   names: ["libcore.java.util.zip.DeflaterOutputStreamTest#testSyncFlushDisabled",
           "libcore.java.util.zip.GZIPOutputStreamTest#testSyncFlushEnabled",
           "libcore.java.util.zip.OldAndroidGZIPStreamTest#testGZIPStream",
+          "libcore.java.util.zip.OldAndroidZipStreamTest#testZipStream",
           "libcore.java.util.zip.ZipFileTest#testZipFileWithLotsOfEntries",
           "libcore.java.util.zip.ZipInputStreamTest#testLongMessage"],
   bug: 26507762