Merge "Factor out common code for SetPendingNext."
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index eec471e..dc53853 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -318,6 +318,10 @@
   art_cflags += -DART_READ_BARRIER_TYPE_IS_$(ART_READ_BARRIER_TYPE)=1
   art_asflags += -DART_USE_READ_BARRIER=1
   art_asflags += -DART_READ_BARRIER_TYPE_IS_$(ART_READ_BARRIER_TYPE)=1
+
+  # Temporarily override -fstack-protector-strong with -fstack-protector to avoid a major
+  # slowdown with the read barrier config. b/26744236.
+  art_cflags += -fstack-protector
 endif
 
 ifeq ($(ART_USE_TLAB),true)
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index af64470..e3f0c24 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -39,6 +39,7 @@
   NonStaticLeafMethods \
   ProtoCompare \
   ProtoCompare2 \
+  ProfileTestMultiDex \
   StaticLeafMethods \
   Statics \
   StaticsFromCode \
@@ -65,7 +66,7 @@
 
 # Dex file dependencies for each gtest.
 ART_GTEST_class_linker_test_DEX_DEPS := Interfaces MultiDex MyClass Nested Statics StaticsFromCode
-ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods
+ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods ProfileTestMultiDex
 ART_GTEST_dex_cache_test_DEX_DEPS := Main
 ART_GTEST_dex_file_test_DEX_DEPS := GetMethodSignature Main Nested
 ART_GTEST_exception_test_DEX_DEPS := ExceptionHandle
@@ -78,6 +79,8 @@
 ART_GTEST_object_test_DEX_DEPS := ProtoCompare ProtoCompare2 StaticsFromCode XandY
 ART_GTEST_proxy_test_DEX_DEPS := Interfaces
 ART_GTEST_reflection_test_DEX_DEPS := Main NonStaticLeafMethods StaticLeafMethods
+ART_GTEST_profile_assistant_test_DEX_DEPS := ProfileTestMultiDex
+ART_GTEST_profile_compilation_info_test_DEX_DEPS := ProfileTestMultiDex
 ART_GTEST_stub_test_DEX_DEPS := AllFields
 ART_GTEST_transaction_test_DEX_DEPS := Transaction
 ART_GTEST_type_lookup_table_test_DEX_DEPS := Lookup
@@ -191,13 +194,12 @@
   runtime/gc/collector/immune_spaces_test.cc \
   runtime/gc/heap_test.cc \
   runtime/gc/reference_queue_test.cc \
-  runtime/gc/space/dlmalloc_space_base_test.cc \
   runtime/gc/space/dlmalloc_space_static_test.cc \
   runtime/gc/space/dlmalloc_space_random_test.cc \
-  runtime/gc/space/rosalloc_space_base_test.cc \
+  runtime/gc/space/large_object_space_test.cc \
   runtime/gc/space/rosalloc_space_static_test.cc \
   runtime/gc/space/rosalloc_space_random_test.cc \
-  runtime/gc/space/large_object_space_test.cc \
+  runtime/gc/space/space_create_test.cc \
   runtime/gc/task_processor_test.cc \
   runtime/gtest_test.cc \
   runtime/handle_scope_test.cc \
@@ -208,6 +210,7 @@
   runtime/interpreter/safe_math_test.cc \
   runtime/interpreter/unstarted_runtime_test.cc \
   runtime/java_vm_ext_test.cc \
+  runtime/jit/profile_compilation_info_test.cc \
   runtime/lambda/closure_test.cc \
   runtime/lambda/shorty_field_type_test.cc \
   runtime/leb128_test.cc \
@@ -267,6 +270,7 @@
   compiler/optimizing/ssa_test.cc \
   compiler/optimizing/stack_map_test.cc \
   compiler/optimizing/suspend_check_test.cc \
+  compiler/profile_assistant_test.cc \
   compiler/utils/arena_allocator_test.cc \
   compiler/utils/dedupe_set_test.cc \
   compiler/utils/swap_space_test.cc \
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index fe83ba9..dc2c9c9 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -535,7 +535,7 @@
 
 /* -Xexperimental:_ */
 TEST_F(CmdlineParserTest, TestExperimentalFlags) {
-  // Off by default
+  // Default
   EXPECT_SINGLE_PARSE_DEFAULT_VALUE(ExperimentalFlags::kNone,
                                     "",
                                     M::Experimental);
@@ -549,16 +549,6 @@
   EXPECT_SINGLE_PARSE_VALUE(ExperimentalFlags::kLambdas,
                             "-Xexperimental:lambdas",
                             M::Experimental);
-  // Enabled explicitly
-  EXPECT_SINGLE_PARSE_VALUE(ExperimentalFlags::kDefaultMethods,
-                            "-Xexperimental:default-methods",
-                            M::Experimental);
-
-  // Enabled both
-  EXPECT_SINGLE_PARSE_VALUE(ExperimentalFlags::kDefaultMethods | ExperimentalFlags::kLambdas,
-                            "-Xexperimental:default-methods "
-                            "-Xexperimental:lambdas",
-                            M::Experimental);
 }
 
 // -Xverify:_
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index 6c0a0e1..740199d 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -616,6 +616,8 @@
         log_verbosity.threads = true;
       } else if (verbose_options[j] == "verifier") {
         log_verbosity.verifier = true;
+      } else if (verbose_options[j] == "image") {
+        log_verbosity.image = true;
       } else {
         return Result::Usage(std::string("Unknown -verbose option ") + verbose_options[j]);
       }
@@ -845,11 +847,9 @@
 struct CmdlineType<ExperimentalFlags> : CmdlineTypeParser<ExperimentalFlags> {
   Result ParseAndAppend(const std::string& option, ExperimentalFlags& existing) {
     if (option == "none") {
-      existing = existing | ExperimentalFlags::kNone;
+      existing = ExperimentalFlags::kNone;
     } else if (option == "lambdas") {
       existing = existing | ExperimentalFlags::kLambdas;
-    } else if (option == "default-methods") {
-      existing = existing | ExperimentalFlags::kDefaultMethods;
     } else {
       return Result::Failure(std::string("Unknown option '") + option + "'");
     }
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 4589736..87eff82 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -66,7 +66,6 @@
 	jit/jit_compiler.cc \
 	jni/quick/calling_convention.cc \
 	jni/quick/jni_compiler.cc \
-	optimizing/boolean_simplifier.cc \
 	optimizing/bounds_check_elimination.cc \
 	optimizing/builder.cc \
 	optimizing/code_generator.cc \
@@ -94,6 +93,7 @@
 	optimizing/prepare_for_register_allocation.cc \
 	optimizing/reference_type_propagation.cc \
 	optimizing/register_allocator.cc \
+	optimizing/select_generator.cc \
 	optimizing/sharpening.cc \
 	optimizing/side_effects_analysis.cc \
 	optimizing/ssa_builder.cc \
@@ -258,9 +258,9 @@
   ifeq ($$(art_ndebug_or_debug),ndebug)
     LOCAL_MODULE := libart-compiler
     ifeq ($$(art_static_or_shared), static)
-      LOCAL_STATIC_LIBRARIES += libart liblz4
+      LOCAL_STATIC_LIBRARIES += libart liblz4 liblzma
     else
-      LOCAL_SHARED_LIBRARIES += libart liblz4
+      LOCAL_SHARED_LIBRARIES += libart liblz4 liblzma
     endif
     ifeq ($$(art_target_or_host),target)
       LOCAL_FDO_SUPPORT := true
@@ -268,9 +268,9 @@
   else # debug
     LOCAL_MODULE := libartd-compiler
     ifeq ($$(art_static_or_shared), static)
-      LOCAL_STATIC_LIBRARIES += libartd liblz4
+      LOCAL_STATIC_LIBRARIES += libartd liblz4 liblzma
     else
-      LOCAL_SHARED_LIBRARIES += libartd liblz4
+      LOCAL_SHARED_LIBRARIES += libartd liblz4 liblzma
     endif
   endif
 
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 9594dce..afc8463 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -168,6 +168,12 @@
   return nullptr;
 }
 
+// Get ProfileCompilationInfo that should be passed to the driver.
+ProfileCompilationInfo* CommonCompilerTest::GetProfileCompilationInfo() {
+  // Null, profile information will not be taken into account.
+  return nullptr;
+}
+
 void CommonCompilerTest::SetUp() {
   CommonRuntimeTest::SetUp();
   {
@@ -207,7 +213,7 @@
                                             timer_.get(),
                                             -1,
                                             /* dex_to_oat_map */ nullptr,
-                                            /* profile_compilation_info */ nullptr));
+                                            GetProfileCompilationInfo()));
   // We typically don't generate an image in unit tests, disable this optimization by default.
   compiler_driver_->SetSupportBootImageFixup(false);
 }
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index b491946..7e0fbab 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -23,6 +23,7 @@
 
 #include "common_runtime_test.h"
 #include "compiler.h"
+#include "jit/offline_profiling_info.h"
 #include "oat_file.h"
 
 namespace art {
@@ -75,6 +76,8 @@
   // driver assumes ownership of the set, so the test should properly release the set.
   virtual std::unordered_set<std::string>* GetCompiledMethods();
 
+  virtual ProfileCompilationInfo* GetProfileCompilationInfo();
+
   virtual void TearDown();
 
   void CompileClass(mirror::ClassLoader* class_loader, const char* class_name)
diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc
index 658e7d6..c250bd9 100644
--- a/compiler/dex/mir_method_info.cc
+++ b/compiler/dex/mir_method_info.cc
@@ -100,8 +100,12 @@
     } else {
       // The method index is actually the dex PC in this case.
       // Calculate the proper dex file and target method idx.
+
+      // We must be in JIT mode if we get here.
       CHECK(use_jit);
-      CHECK_EQ(invoke_type, kVirtual);
+
+      // The invoke type better be virtual, except for the string init special case above.
+      CHECK_EQ(invoke_type, string_init ? kDirect : kVirtual);
       // Don't devirt if we are in a different dex file since we can't have direct invokes in
       // another dex file unless we always put a direct / patch pointer.
       devirt_target = nullptr;
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index d68835a..af6f91f 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1076,7 +1076,7 @@
       pc_rel_temp_(nullptr),
       dex_cache_arrays_min_offset_(std::numeric_limits<uint32_t>::max()),
       cfi_(&last_lir_insn_,
-           cu->compiler_driver->GetCompilerOptions().GetGenerateDebugInfo(),
+           cu->compiler_driver->GetCompilerOptions().GenerateAnyDebugInfo(),
            arena),
       in_to_reg_storage_mapping_(arena) {
   switch_tables_.reserve(4);
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 4617668..22b178c 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -37,12 +37,21 @@
 static constexpr bool kIntrinsicIsStatic[] = {
     true,   // kIntrinsicDoubleCvt
     true,   // kIntrinsicFloatCvt
+    true,   // kIntrinsicFloatIsInfinite
+    true,   // kIntrinsicDoubleIsInfinite
+    true,   // kIntrinsicFloatIsNaN
+    true,   // kIntrinsicDoubleIsNaN
     true,   // kIntrinsicReverseBits
     true,   // kIntrinsicReverseBytes
+    true,   // kIntrinsicBitCount
+    true,   // kIntrinsicCompare,
+    true,   // kIntrinsicHighestOneBit
+    true,   // kIntrinsicLowestOneBit
     true,   // kIntrinsicNumberOfLeadingZeros
     true,   // kIntrinsicNumberOfTrailingZeros
     true,   // kIntrinsicRotateRight
     true,   // kIntrinsicRotateLeft
+    true,   // kIntrinsicSignum
     true,   // kIntrinsicAbsInt
     true,   // kIntrinsicAbsLong
     true,   // kIntrinsicAbsFloat
@@ -97,14 +106,23 @@
               "arraysize of kIntrinsicIsStatic unexpected");
 static_assert(kIntrinsicIsStatic[kIntrinsicDoubleCvt], "DoubleCvt must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicFloatCvt], "FloatCvt must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicFloatIsInfinite], "FloatIsInfinite must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicDoubleIsInfinite], "DoubleIsInfinite must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicFloatIsNaN], "FloatIsNaN must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicDoubleIsNaN], "DoubleIsNaN must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicReverseBits], "ReverseBits must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicReverseBytes], "ReverseBytes must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicBitCount], "BitCount must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicCompare], "Compare must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicHighestOneBit], "HighestOneBit must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicLowestOneBit], "LowestOneBit  must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfLeadingZeros],
               "NumberOfLeadingZeros must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfTrailingZeros],
               "NumberOfTrailingZeros must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicRotateRight], "RotateRight must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicRotateLeft], "RotateLeft must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicSignum], "Signum must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicAbsInt], "AbsInt must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicAbsLong], "AbsLong must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicAbsFloat], "AbsFloat must be static");
@@ -259,6 +277,8 @@
     "equals",                // kNameCacheEquals
     "getCharsNoCheck",       // kNameCacheGetCharsNoCheck
     "isEmpty",               // kNameCacheIsEmpty
+    "isInfinite",            // kNameCacheIsInfinite
+    "isNaN",                 // kNameCacheIsNaN
     "indexOf",               // kNameCacheIndexOf
     "length",                // kNameCacheLength
     "<init>",                // kNameCacheInit
@@ -293,10 +313,15 @@
     "putObjectVolatile",     // kNameCachePutObjectVolatile
     "putOrderedObject",      // kNameCachePutOrderedObject
     "arraycopy",             // kNameCacheArrayCopy
+    "bitCount",              // kNameCacheBitCount
+    "compare",               // kNameCacheCompare
+    "highestOneBit",         // kNameCacheHighestOneBit
+    "lowestOneBit",          // kNameCacheLowestOneBit
     "numberOfLeadingZeros",  // kNameCacheNumberOfLeadingZeros
     "numberOfTrailingZeros",  // kNameCacheNumberOfTrailingZeros
     "rotateRight",           // kNameCacheRotateRight
     "rotateLeft",            // kNameCacheRotateLeft
+    "signum",                // kNameCacheSignum
 };
 
 const DexFileMethodInliner::ProtoDef DexFileMethodInliner::kProtoCacheDefs[] = {
@@ -316,10 +341,14 @@
     { kClassCacheFloat, 2, { kClassCacheFloat, kClassCacheFloat } },
     // kProtoCacheD_J
     { kClassCacheLong, 1, { kClassCacheDouble } },
+    // kProtoCacheD_Z
+    { kClassCacheBoolean, 1, { kClassCacheDouble } },
     // kProtoCacheJ_D
     { kClassCacheDouble, 1, { kClassCacheLong } },
     // kProtoCacheF_I
     { kClassCacheInt, 1, { kClassCacheFloat } },
+    // kProtoCacheF_Z
+    { kClassCacheBoolean, 1, { kClassCacheFloat } },
     // kProtoCacheI_F
     { kClassCacheFloat, 1, { kClassCacheInt } },
     // kProtoCacheII_I
@@ -348,6 +377,8 @@
     { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheInt } },
     // kProtoCacheJJ_J
     { kClassCacheLong, 2, { kClassCacheLong, kClassCacheLong } },
+    // kProtoCacheJJ_I
+    { kClassCacheInt, 2, { kClassCacheLong, kClassCacheLong } },
     // kProtoCacheJJ_V
     { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheLong } },
     // kProtoCacheJS_V
@@ -441,16 +472,31 @@
     INTRINSIC(JavaLangFloat, FloatToRawIntBits, F_I, kIntrinsicFloatCvt, 0),
     INTRINSIC(JavaLangFloat, IntBitsToFloat, I_F, kIntrinsicFloatCvt, kIntrinsicFlagToFloatingPoint),
 
+    INTRINSIC(JavaLangFloat, IsInfinite, F_Z, kIntrinsicFloatIsInfinite, 0),
+    INTRINSIC(JavaLangDouble, IsInfinite, D_Z, kIntrinsicDoubleIsInfinite, 0),
+    INTRINSIC(JavaLangFloat, IsNaN, F_Z, kIntrinsicFloatIsNaN, 0),
+    INTRINSIC(JavaLangDouble, IsNaN, D_Z, kIntrinsicDoubleIsNaN, 0),
+
     INTRINSIC(JavaLangInteger, ReverseBytes, I_I, kIntrinsicReverseBytes, k32),
     INTRINSIC(JavaLangLong, ReverseBytes, J_J, kIntrinsicReverseBytes, k64),
     INTRINSIC(JavaLangShort, ReverseBytes, S_S, kIntrinsicReverseBytes, kSignedHalf),
     INTRINSIC(JavaLangInteger, Reverse, I_I, kIntrinsicReverseBits, k32),
     INTRINSIC(JavaLangLong, Reverse, J_J, kIntrinsicReverseBits, k64),
 
+    INTRINSIC(JavaLangInteger, BitCount, I_I, kIntrinsicBitCount, k32),
+    INTRINSIC(JavaLangLong, BitCount, J_I, kIntrinsicBitCount, k64),
+    INTRINSIC(JavaLangInteger, Compare, II_I, kIntrinsicCompare, k32),
+    INTRINSIC(JavaLangLong, Compare, JJ_I, kIntrinsicCompare, k64),
+    INTRINSIC(JavaLangInteger, HighestOneBit, I_I, kIntrinsicHighestOneBit, k32),
+    INTRINSIC(JavaLangLong, HighestOneBit, J_J, kIntrinsicHighestOneBit, k64),
+    INTRINSIC(JavaLangInteger, LowestOneBit, I_I, kIntrinsicLowestOneBit, k32),
+    INTRINSIC(JavaLangLong, LowestOneBit, J_J, kIntrinsicLowestOneBit, k64),
     INTRINSIC(JavaLangInteger, NumberOfLeadingZeros, I_I, kIntrinsicNumberOfLeadingZeros, k32),
     INTRINSIC(JavaLangLong, NumberOfLeadingZeros, J_I, kIntrinsicNumberOfLeadingZeros, k64),
     INTRINSIC(JavaLangInteger, NumberOfTrailingZeros, I_I, kIntrinsicNumberOfTrailingZeros, k32),
     INTRINSIC(JavaLangLong, NumberOfTrailingZeros, J_I, kIntrinsicNumberOfTrailingZeros, k64),
+    INTRINSIC(JavaLangInteger, Signum, I_I, kIntrinsicSignum, k32),
+    INTRINSIC(JavaLangLong, Signum, J_I, kIntrinsicSignum, k64),
 
     INTRINSIC(JavaLangMath,       Abs, I_I, kIntrinsicAbsInt, 0),
     INTRINSIC(JavaLangStrictMath, Abs, I_I, kIntrinsicAbsInt, 0),
@@ -745,10 +791,19 @@
                                           intrinsic.d.data & kIntrinsicFlagIsOrdered);
     case kIntrinsicSystemArrayCopyCharArray:
       return backend->GenInlinedArrayCopyCharArray(info);
+    case kIntrinsicFloatIsInfinite:
+    case kIntrinsicDoubleIsInfinite:
+    case kIntrinsicFloatIsNaN:
+    case kIntrinsicDoubleIsNaN:
+    case kIntrinsicBitCount:
+    case kIntrinsicCompare:
+    case kIntrinsicHighestOneBit:
+    case kIntrinsicLowestOneBit:
     case kIntrinsicNumberOfLeadingZeros:
     case kIntrinsicNumberOfTrailingZeros:
     case kIntrinsicRotateRight:
     case kIntrinsicRotateLeft:
+    case kIntrinsicSignum:
     case kIntrinsicSystemArrayCopy:
       return false;   // not implemented in quick.
     default:
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index ac70577..59b8a53 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -190,6 +190,8 @@
       kNameCacheEquals,
       kNameCacheGetCharsNoCheck,
       kNameCacheIsEmpty,
+      kNameCacheIsInfinite,
+      kNameCacheIsNaN,
       kNameCacheIndexOf,
       kNameCacheLength,
       kNameCacheInit,
@@ -224,10 +226,15 @@
       kNameCachePutObjectVolatile,
       kNameCachePutOrderedObject,
       kNameCacheArrayCopy,
+      kNameCacheBitCount,
+      kNameCacheCompare,
+      kNameCacheHighestOneBit,
+      kNameCacheLowestOneBit,
       kNameCacheNumberOfLeadingZeros,
       kNameCacheNumberOfTrailingZeros,
       kNameCacheRotateRight,
       kNameCacheRotateLeft,
+      kNameCacheSignum,
       kNameCacheLast
     };
 
@@ -246,8 +253,10 @@
       kProtoCacheF_F,
       kProtoCacheFF_F,
       kProtoCacheD_J,
+      kProtoCacheD_Z,
       kProtoCacheJ_D,
       kProtoCacheF_I,
+      kProtoCacheF_Z,
       kProtoCacheI_F,
       kProtoCacheII_I,
       kProtoCacheI_C,
@@ -262,6 +271,7 @@
       kProtoCacheJB_V,
       kProtoCacheJI_V,
       kProtoCacheJJ_J,
+      kProtoCacheJJ_I,
       kProtoCacheJJ_V,
       kProtoCacheJS_V,
       kProtoCacheObject_Z,
diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc
index c5df134..0cd41bb 100644
--- a/compiler/dex/quick/quick_cfi_test.cc
+++ b/compiler/dex/quick/quick_cfi_test.cc
@@ -71,6 +71,7 @@
       nullptr,
       false,
       "",
+      false,
       false);
     VerificationResults verification_results(&compiler_options);
     DexFileToMethodInlinerMap method_inliner_map;
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index ebc9a2c..027290f 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -486,11 +486,6 @@
 static_assert(sizeof(kUnsupportedOpcodesSize) == 8 * sizeof(size_t),
               "kUnsupportedOpcodesSize unexpected");
 
-static bool IsUnsupportedExperimentalLambdasOnly(size_t i) {
-  DCHECK_LE(i, arraysize(kUnsupportedOpcodes));
-  return kUnsupportedOpcodes[i] == kUnsupportedLambdaOpcodes;
-}
-
 // The maximum amount of Dalvik register in a method for which we will start compiling. Tries to
 // avoid an abort when we need to manage more SSA registers than we can.
 static constexpr size_t kMaxAllowedDalvikRegisters = INT16_MAX / 2;
@@ -513,36 +508,6 @@
   return true;
 }
 
-// If the ISA has unsupported opcodes, should we skip scanning over them?
-//
-// Most of the time we're compiling non-experimental files, so scanning just slows
-// performance down by as much as 6% with 4 threads.
-// In the rare cases we compile experimental opcodes, the runtime has an option to enable it,
-// which will force scanning for any unsupported opcodes.
-static bool SkipScanningUnsupportedOpcodes(InstructionSet instruction_set) {
-  Runtime* runtime = Runtime::Current();
-  if (UNLIKELY(runtime->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods))) {
-    // Always need to scan opcodes if we have default methods since invoke-super for interface
-    // methods is never going to be supported in the quick compiler.
-    return false;
-  } else if (UNLIKELY(kUnsupportedOpcodesSize[instruction_set] == 0U)) {
-    // All opcodes are supported no matter what. Usually not the case
-    // since experimental opcodes are not implemented in the quick compiler.
-    return true;
-  } else if (LIKELY(!Runtime::Current()->
-                      AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas))) {
-    // Experimental opcodes are disabled.
-    //
-    // If all unsupported opcodes are experimental we don't need to do scanning.
-    return IsUnsupportedExperimentalLambdasOnly(instruction_set);
-  } else {
-    // Experimental opcodes are enabled.
-    //
-    // Do the opcode scanning if the ISA has any unsupported opcodes.
-    return false;
-  }
-}
-
 bool QuickCompiler::CanCompileInstruction(const MIR* mir,
                                           const DexFile& dex_file) const {
   switch (mir->dalvikInsn.opcode) {
@@ -572,11 +537,8 @@
     return false;
   }
 
-  // Check whether we do have limitations at all.
-  if (kSupportedTypes[cu->instruction_set] == nullptr &&
-      SkipScanningUnsupportedOpcodes(cu->instruction_set)) {
-    return true;
-  }
+  // Since the quick compiler doesn't (and never will) support default methods we always need to
+  // scan opcodes.
 
   // Check if we can compile the prototype.
   const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
index d63878d..efdc333 100644
--- a/compiler/dex/quick/x86/quick_assemble_x86_test.cc
+++ b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
@@ -54,6 +54,7 @@
         nullptr,
         false,
         "",
+        false,
         false));
     verification_results_.reset(new VerificationResults(compiler_options_.get()));
     method_inliner_map_.reset(new DexFileToMethodInlinerMap());
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index d021525..f1b7458 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -39,6 +39,7 @@
 #include "compiler_driver-inl.h"
 #include "dex_compilation_unit.h"
 #include "dex_file-inl.h"
+#include "dex_instruction-inl.h"
 #include "dex/dex_to_dex_compiler.h"
 #include "dex/verification_results.h"
 #include "dex/verified_method.h"
@@ -365,7 +366,7 @@
       classes_to_compile_(compiled_classes),
       methods_to_compile_(compiled_methods),
       had_hard_verifier_failure_(false),
-      thread_count_(thread_count),
+      parallel_thread_count_(thread_count),
       stats_(new AOTCompilationStats),
       dump_stats_(dump_stats),
       dump_passes_(dump_passes),
@@ -435,24 +436,27 @@
                                 const std::vector<const DexFile*>& dex_files,
                                 TimingLogger* timings) {
   DCHECK(!Runtime::Current()->IsStarted());
-  std::unique_ptr<ThreadPool> thread_pool(
-      new ThreadPool("Compiler driver thread pool", thread_count_ - 1));
+
+  InitializeThreadPools();
+
   VLOG(compiler) << "Before precompile " << GetMemoryUsageString(false);
   // Precompile:
   // 1) Load image classes
   // 2) Resolve all classes
   // 3) Attempt to verify all classes
   // 4) Attempt to initialize image classes, and trivially initialized classes
-  PreCompile(class_loader, dex_files, thread_pool.get(), timings);
+  PreCompile(class_loader, dex_files, timings);
   // Compile:
   // 1) Compile all classes and methods enabled for compilation. May fall back to dex-to-dex
   //    compilation.
   if (!GetCompilerOptions().VerifyAtRuntime()) {
-    Compile(class_loader, dex_files, thread_pool.get(), timings);
+    Compile(class_loader, dex_files, timings);
   }
   if (dump_stats_) {
     stats_->Dump();
   }
+
+  FreeThreadPools();
 }
 
 static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel(
@@ -653,8 +657,9 @@
   std::vector<const DexFile*> dex_files;
   dex_files.push_back(dex_file);
 
-  std::unique_ptr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", 0U));
-  PreCompile(jclass_loader, dex_files, thread_pool.get(), timings);
+  InitializeThreadPools();
+
+  PreCompile(jclass_loader, dex_files, timings);
 
   // Can we run DEX-to-DEX compiler on this class ?
   optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level =
@@ -677,20 +682,147 @@
                 true,
                 dex_cache);
 
+  FreeThreadPools();
+
   self->GetJniEnv()->DeleteGlobalRef(jclass_loader);
 }
 
-void CompilerDriver::Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                             ThreadPool* thread_pool, TimingLogger* timings) {
+void CompilerDriver::Resolve(jobject class_loader,
+                             const std::vector<const DexFile*>& dex_files,
+                             TimingLogger* timings) {
+  // Resolution allocates classes and needs to run single-threaded to be deterministic.
+  bool force_determinism = GetCompilerOptions().IsForceDeterminism();
+  ThreadPool* resolve_thread_pool = force_determinism
+                                     ? single_thread_pool_.get()
+                                     : parallel_thread_pool_.get();
+  size_t resolve_thread_count = force_determinism ? 1U : parallel_thread_count_;
+
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != nullptr);
-    ResolveDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
+    ResolveDexFile(class_loader,
+                   *dex_file,
+                   dex_files,
+                   resolve_thread_pool,
+                   resolve_thread_count,
+                   timings);
   }
 }
 
-void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                                ThreadPool* thread_pool, TimingLogger* timings) {
+// Resolve const-strings in the code. Done to have deterministic allocation behavior. Right now
+// this is single-threaded for simplicity.
+// TODO: Collect the relevant string indices in parallel, then allocate them sequentially in a
+//       stable order.
+
+static void ResolveConstStrings(CompilerDriver* driver,
+                                const DexFile& dex_file,
+                                const DexFile::CodeItem* code_item) {
+  if (code_item == nullptr) {
+    // Abstract or native method.
+    return;
+  }
+
+  const uint16_t* code_ptr = code_item->insns_;
+  const uint16_t* code_end = code_item->insns_ + code_item->insns_size_in_code_units_;
+
+  while (code_ptr < code_end) {
+    const Instruction* inst = Instruction::At(code_ptr);
+    switch (inst->Opcode()) {
+      case Instruction::CONST_STRING: {
+        uint32_t string_index = inst->VRegB_21c();
+        driver->CanAssumeStringIsPresentInDexCache(dex_file, string_index);
+        break;
+      }
+      case Instruction::CONST_STRING_JUMBO: {
+        uint32_t string_index = inst->VRegB_31c();
+        driver->CanAssumeStringIsPresentInDexCache(dex_file, string_index);
+        break;
+      }
+
+      default:
+        break;
+    }
+
+    code_ptr += inst->SizeInCodeUnits();
+  }
+}
+
+static void ResolveConstStrings(CompilerDriver* driver,
+                                const std::vector<const DexFile*>& dex_files,
+                                TimingLogger* timings) {
+  for (const DexFile* dex_file : dex_files) {
+    TimingLogger::ScopedTiming t("Resolve const-string Strings", timings);
+
+    size_t class_def_count = dex_file->NumClassDefs();
+    for (size_t class_def_index = 0; class_def_index < class_def_count; ++class_def_index) {
+      const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index);
+
+      const uint8_t* class_data = dex_file->GetClassData(class_def);
+      if (class_data == nullptr) {
+        // empty class, probably a marker interface
+        continue;
+      }
+
+      ClassDataItemIterator it(*dex_file, class_data);
+      // Skip fields
+      while (it.HasNextStaticField()) {
+        it.Next();
+      }
+      while (it.HasNextInstanceField()) {
+        it.Next();
+      }
+
+      bool compilation_enabled = driver->IsClassToCompile(
+          dex_file->StringByTypeIdx(class_def.class_idx_));
+      if (!compilation_enabled) {
+        // Compilation is skipped, do not resolve const-string in code of this class.
+        // TODO: Make sure that inlining honors this.
+        continue;
+      }
+
+      // Direct methods.
+      int64_t previous_direct_method_idx = -1;
+      while (it.HasNextDirectMethod()) {
+        uint32_t method_idx = it.GetMemberIndex();
+        if (method_idx == previous_direct_method_idx) {
+          // smali can create dex files with two encoded_methods sharing the same method_idx
+          // http://code.google.com/p/smali/issues/detail?id=119
+          it.Next();
+          continue;
+        }
+        previous_direct_method_idx = method_idx;
+        ResolveConstStrings(driver, *dex_file, it.GetMethodCodeItem());
+        it.Next();
+      }
+      // Virtual methods.
+      int64_t previous_virtual_method_idx = -1;
+      while (it.HasNextVirtualMethod()) {
+        uint32_t method_idx = it.GetMemberIndex();
+        if (method_idx == previous_virtual_method_idx) {
+          // smali can create dex files with two encoded_methods sharing the same method_idx
+          // http://code.google.com/p/smali/issues/detail?id=119
+          it.Next();
+          continue;
+        }
+        previous_virtual_method_idx = method_idx;
+        ResolveConstStrings(driver, *dex_file, it.GetMethodCodeItem());
+        it.Next();
+      }
+      DCHECK(!it.HasNext());
+    }
+  }
+}
+
+inline void CompilerDriver::CheckThreadPools() {
+  DCHECK(parallel_thread_pool_ != nullptr);
+  DCHECK(single_thread_pool_ != nullptr);
+}
+
+void CompilerDriver::PreCompile(jobject class_loader,
+                                const std::vector<const DexFile*>& dex_files,
+                                TimingLogger* timings) {
+  CheckThreadPools();
+
   LoadImageClasses(timings);
   VLOG(compiler) << "LoadImageClasses: " << GetMemoryUsageString(false);
 
@@ -700,20 +832,26 @@
   // We need to resolve for never_verify since it needs to run dex to dex to add the
   // RETURN_VOID_NO_BARRIER.
   if (never_verify || verification_enabled) {
-    Resolve(class_loader, dex_files, thread_pool, timings);
+    Resolve(class_loader, dex_files, timings);
     VLOG(compiler) << "Resolve: " << GetMemoryUsageString(false);
   }
 
   if (never_verify) {
     VLOG(compiler) << "Verify none mode specified, skipping verification.";
-    SetVerified(class_loader, dex_files, thread_pool, timings);
+    SetVerified(class_loader, dex_files, timings);
   }
 
   if (!verification_enabled) {
     return;
   }
 
-  Verify(class_loader, dex_files, thread_pool, timings);
+  if (GetCompilerOptions().IsForceDeterminism() && IsBootImage()) {
+    // Resolve strings from const-string. Do this now to have a deterministic image.
+    ResolveConstStrings(this, dex_files, timings);
+    VLOG(compiler) << "Resolve const-strings: " << GetMemoryUsageString(false);
+  }
+
+  Verify(class_loader, dex_files, timings);
   VLOG(compiler) << "Verify: " << GetMemoryUsageString(false);
 
   if (had_hard_verifier_failure_ && GetCompilerOptions().AbortOnHardVerifierFailure()) {
@@ -721,7 +859,7 @@
                << "situations. Please check the log.";
   }
 
-  InitializeClasses(class_loader, dex_files, thread_pool, timings);
+  InitializeClasses(class_loader, dex_files, timings);
   VLOG(compiler) << "InitializeClasses: " << GetMemoryUsageString(false);
 
   UpdateImageClasses(timings);
@@ -730,8 +868,8 @@
 
 bool CompilerDriver::IsImageClass(const char* descriptor) const {
   if (!IsBootImage()) {
-    // NOTE: Currently unreachable, all callers check IsImage().
-    return false;
+    // NOTE: Currently only reachable from InitImageMethodVisitor for the app image case.
+    return true;
   } else {
     return image_classes_->find(descriptor) != image_classes_->end();
   }
@@ -1759,6 +1897,9 @@
 
     // Wait for all the worker threads to finish.
     thread_pool_->Wait(self, true, false);
+
+    // And stop the workers accepting jobs.
+    thread_pool_->StopWorkers(self);
   }
 
   size_t NextIndex() {
@@ -1995,9 +2136,12 @@
   const ParallelCompilationManager* const manager_;
 };
 
-void CompilerDriver::ResolveDexFile(jobject class_loader, const DexFile& dex_file,
+void CompilerDriver::ResolveDexFile(jobject class_loader,
+                                    const DexFile& dex_file,
                                     const std::vector<const DexFile*>& dex_files,
-                                    ThreadPool* thread_pool, TimingLogger* timings) {
+                                    ThreadPool* thread_pool,
+                                    size_t thread_count,
+                                    TimingLogger* timings) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
 
   // TODO: we could resolve strings here, although the string table is largely filled with class
@@ -2010,27 +2154,43 @@
     // classdefs are resolved by ResolveClassFieldsAndMethods.
     TimingLogger::ScopedTiming t("Resolve Types", timings);
     ResolveTypeVisitor visitor(&context);
-    context.ForAll(0, dex_file.NumTypeIds(), &visitor, thread_count_);
+    context.ForAll(0, dex_file.NumTypeIds(), &visitor, thread_count);
   }
 
   TimingLogger::ScopedTiming t("Resolve MethodsAndFields", timings);
   ResolveClassFieldsAndMethodsVisitor visitor(&context);
-  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_);
+  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
 }
 
-void CompilerDriver::SetVerified(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                                 ThreadPool* thread_pool, TimingLogger* timings) {
+void CompilerDriver::SetVerified(jobject class_loader,
+                                 const std::vector<const DexFile*>& dex_files,
+                                 TimingLogger* timings) {
+  // This can be run in parallel.
   for (const DexFile* dex_file : dex_files) {
     CHECK(dex_file != nullptr);
-    SetVerifiedDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
+    SetVerifiedDexFile(class_loader,
+                       *dex_file,
+                       dex_files,
+                       parallel_thread_pool_.get(),
+                       parallel_thread_count_,
+                       timings);
   }
 }
 
-void CompilerDriver::Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                            ThreadPool* thread_pool, TimingLogger* timings) {
+void CompilerDriver::Verify(jobject class_loader,
+                            const std::vector<const DexFile*>& dex_files,
+                            TimingLogger* timings) {
+  // Note: verification should not be pulling in classes anymore when compiling the boot image,
+  //       as all should have been resolved before. As such, doing this in parallel should still
+  //       be deterministic.
   for (const DexFile* dex_file : dex_files) {
     CHECK(dex_file != nullptr);
-    VerifyDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
+    VerifyDexFile(class_loader,
+                  *dex_file,
+                  dex_files,
+                  parallel_thread_pool_.get(),
+                  parallel_thread_count_,
+                  timings);
   }
 }
 
@@ -2104,15 +2264,18 @@
   const ParallelCompilationManager* const manager_;
 };
 
-void CompilerDriver::VerifyDexFile(jobject class_loader, const DexFile& dex_file,
+void CompilerDriver::VerifyDexFile(jobject class_loader,
+                                   const DexFile& dex_file,
                                    const std::vector<const DexFile*>& dex_files,
-                                   ThreadPool* thread_pool, TimingLogger* timings) {
+                                   ThreadPool* thread_pool,
+                                   size_t thread_count,
+                                   TimingLogger* timings) {
   TimingLogger::ScopedTiming t("Verify Dex File", timings);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files,
                                      thread_pool);
   VerifyClassVisitor visitor(&context);
-  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_);
+  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
 }
 
 class SetVerifiedClassVisitor : public CompilationVisitor {
@@ -2162,15 +2325,18 @@
   const ParallelCompilationManager* const manager_;
 };
 
-void CompilerDriver::SetVerifiedDexFile(jobject class_loader, const DexFile& dex_file,
+void CompilerDriver::SetVerifiedDexFile(jobject class_loader,
+                                        const DexFile& dex_file,
                                         const std::vector<const DexFile*>& dex_files,
-                                        ThreadPool* thread_pool, TimingLogger* timings) {
+                                        ThreadPool* thread_pool,
+                                        size_t thread_count,
+                                        TimingLogger* timings) {
   TimingLogger::ScopedTiming t("Verify Dex File", timings);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files,
                                      thread_pool);
   SetVerifiedClassVisitor visitor(&context);
-  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_);
+  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
 }
 
 class InitializeClassVisitor : public CompilationVisitor {
@@ -2271,31 +2437,37 @@
   const ParallelCompilationManager* const manager_;
 };
 
-void CompilerDriver::InitializeClasses(jobject jni_class_loader, const DexFile& dex_file,
+void CompilerDriver::InitializeClasses(jobject jni_class_loader,
+                                       const DexFile& dex_file,
                                        const std::vector<const DexFile*>& dex_files,
-                                       ThreadPool* thread_pool, TimingLogger* timings) {
+                                       TimingLogger* timings) {
   TimingLogger::ScopedTiming t("InitializeNoClinit", timings);
+
+  // Initialization allocates objects and needs to run single-threaded to be deterministic.
+  bool force_determinism = GetCompilerOptions().IsForceDeterminism();
+  ThreadPool* init_thread_pool = force_determinism
+                                     ? single_thread_pool_.get()
+                                     : parallel_thread_pool_.get();
+  size_t init_thread_count = force_determinism ? 1U : parallel_thread_count_;
+
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, dex_files,
-                                     thread_pool);
-  size_t thread_count;
+                                     init_thread_pool);
   if (IsBootImage()) {
     // TODO: remove this when transactional mode supports multithreading.
-    thread_count = 1U;
-  } else {
-    thread_count = thread_count_;
+    init_thread_count = 1U;
   }
   InitializeClassVisitor visitor(&context);
-  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
+  context.ForAll(0, dex_file.NumClassDefs(), &visitor, init_thread_count);
 }
 
 void CompilerDriver::InitializeClasses(jobject class_loader,
                                        const std::vector<const DexFile*>& dex_files,
-                                       ThreadPool* thread_pool, TimingLogger* timings) {
+                                       TimingLogger* timings) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != nullptr);
-    InitializeClasses(class_loader, *dex_file, dex_files, thread_pool, timings);
+    InitializeClasses(class_loader, *dex_file, dex_files, timings);
   }
   if (IsBootImage()) {
     // Prune garbage objects created during aborted transactions.
@@ -2303,8 +2475,9 @@
   }
 }
 
-void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                             ThreadPool* thread_pool, TimingLogger* timings) {
+void CompilerDriver::Compile(jobject class_loader,
+                             const std::vector<const DexFile*>& dex_files,
+                             TimingLogger* timings) {
   if (kDebugProfileGuidedCompilation) {
     LOG(INFO) << "[ProfileGuidedCompilation] " <<
         ((profile_compilation_info_ == nullptr)
@@ -2314,7 +2487,12 @@
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != nullptr);
-    CompileDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
+    CompileDexFile(class_loader,
+                   *dex_file,
+                   dex_files,
+                   parallel_thread_pool_.get(),
+                   parallel_thread_count_,
+                   timings);
   }
   VLOG(compiler) << "Compile: " << GetMemoryUsageString(false);
 }
@@ -2421,14 +2599,17 @@
   const ParallelCompilationManager* const manager_;
 };
 
-void CompilerDriver::CompileDexFile(jobject class_loader, const DexFile& dex_file,
+void CompilerDriver::CompileDexFile(jobject class_loader,
+                                    const DexFile& dex_file,
                                     const std::vector<const DexFile*>& dex_files,
-                                    ThreadPool* thread_pool, TimingLogger* timings) {
+                                    ThreadPool* thread_pool,
+                                    size_t thread_count,
+                                    TimingLogger* timings) {
   TimingLogger::ScopedTiming t("Compile Dex File", timings);
   ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), class_loader, this,
                                      &dex_file, dex_files, thread_pool);
   CompileClassVisitor visitor(&context);
-  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_);
+  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
 }
 
 void CompilerDriver::AddCompiledMethod(const MethodReference& method_ref,
@@ -2582,11 +2763,24 @@
                                        const DexFile* inlined_into) const {
   // We're not allowed to inline across dex files if we're the no-inline-from dex file.
   if (inlined_from != inlined_into &&
-      compiler_options_->GetNoInlineFromDexFile() == inlined_from) {
+      compiler_options_->GetNoInlineFromDexFile() != nullptr &&
+      ContainsElement(*compiler_options_->GetNoInlineFromDexFile(), inlined_from)) {
     return false;
   }
 
   return true;
 }
 
+void CompilerDriver::InitializeThreadPools() {
+  size_t parallel_count = parallel_thread_count_ > 0 ? parallel_thread_count_ - 1 : 0;
+  parallel_thread_pool_.reset(
+      new ThreadPool("Compiler driver thread pool", parallel_count));
+  single_thread_pool_.reset(new ThreadPool("Single-threaded Compiler driver thread pool", 0));
+}
+
+void CompilerDriver::FreeThreadPools() {
+  parallel_thread_pool_.reset();
+  single_thread_pool_.reset();
+}
+
 }  // namespace art
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 6a2f7bf..5e35cbb 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -411,7 +411,7 @@
   }
 
   size_t GetThreadCount() const {
-    return thread_count_;
+    return parallel_thread_count_;
   }
 
   bool GetDumpStats() const {
@@ -550,8 +550,9 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
-  void PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                  ThreadPool* thread_pool, TimingLogger* timings)
+  void PreCompile(jobject class_loader,
+                  const std::vector<const DexFile*>& dex_files,
+                  TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
 
   void LoadImageClasses(TimingLogger* timings) REQUIRES(!Locks::mutator_lock_);
@@ -559,49 +560,71 @@
   // Attempt to resolve all type, methods, fields, and strings
   // referenced from code in the dex file following PathClassLoader
   // ordering semantics.
-  void Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-               ThreadPool* thread_pool, TimingLogger* timings)
+  void Resolve(jobject class_loader,
+               const std::vector<const DexFile*>& dex_files,
+               TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
-  void ResolveDexFile(jobject class_loader, const DexFile& dex_file,
+  void ResolveDexFile(jobject class_loader,
+                      const DexFile& dex_file,
                       const std::vector<const DexFile*>& dex_files,
-                      ThreadPool* thread_pool, TimingLogger* timings)
+                      ThreadPool* thread_pool,
+                      size_t thread_count,
+                      TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
-  void Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-              ThreadPool* thread_pool, TimingLogger* timings);
-  void VerifyDexFile(jobject class_loader, const DexFile& dex_file,
+  void Verify(jobject class_loader,
+              const std::vector<const DexFile*>& dex_files,
+              TimingLogger* timings);
+  void VerifyDexFile(jobject class_loader,
+                     const DexFile& dex_file,
                      const std::vector<const DexFile*>& dex_files,
-                     ThreadPool* thread_pool, TimingLogger* timings)
+                     ThreadPool* thread_pool,
+                     size_t thread_count,
+                     TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
-  void SetVerified(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                   ThreadPool* thread_pool, TimingLogger* timings);
-  void SetVerifiedDexFile(jobject class_loader, const DexFile& dex_file,
+  void SetVerified(jobject class_loader,
+                   const std::vector<const DexFile*>& dex_files,
+                   TimingLogger* timings);
+  void SetVerifiedDexFile(jobject class_loader,
+                          const DexFile& dex_file,
                           const std::vector<const DexFile*>& dex_files,
-                          ThreadPool* thread_pool, TimingLogger* timings)
+                          ThreadPool* thread_pool,
+                          size_t thread_count,
+                          TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
-  void InitializeClasses(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                         ThreadPool* thread_pool, TimingLogger* timings)
-      REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
-  void InitializeClasses(jobject class_loader, const DexFile& dex_file,
+  void InitializeClasses(jobject class_loader,
                          const std::vector<const DexFile*>& dex_files,
-                         ThreadPool* thread_pool, TimingLogger* timings)
+                         TimingLogger* timings)
+      REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
+  void InitializeClasses(jobject class_loader,
+                         const DexFile& dex_file,
+                         const std::vector<const DexFile*>& dex_files,
+                         TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
 
   void UpdateImageClasses(TimingLogger* timings) REQUIRES(!Locks::mutator_lock_);
   static void FindClinitImageClassesCallback(mirror::Object* object, void* arg)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-               ThreadPool* thread_pool, TimingLogger* timings);
-  void CompileDexFile(jobject class_loader, const DexFile& dex_file,
+  void Compile(jobject class_loader,
+               const std::vector<const DexFile*>& dex_files,
+               TimingLogger* timings);
+  void CompileDexFile(jobject class_loader,
+                      const DexFile& dex_file,
                       const std::vector<const DexFile*>& dex_files,
-                      ThreadPool* thread_pool, TimingLogger* timings)
+                      ThreadPool* thread_pool,
+                      size_t thread_count,
+                      TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
   bool MayInlineInternal(const DexFile* inlined_from, const DexFile* inlined_into) const;
 
+  void InitializeThreadPools();
+  void FreeThreadPools();
+  void CheckThreadPools();
+
   const CompilerOptions* const compiler_options_;
   VerificationResults* const verification_results_;
   DexFileToMethodInlinerMap* const method_inliner_map_;
@@ -652,7 +675,12 @@
 
   bool had_hard_verifier_failure_;
 
-  size_t thread_count_;
+  // A thread pool that can (potentially) run tasks in parallel.
+  std::unique_ptr<ThreadPool> parallel_thread_pool_;
+  size_t parallel_thread_count_;
+
+  // A thread pool that guarantees running single-threaded on the main thread.
+  std::unique_ptr<ThreadPool> single_thread_pool_;
 
   class AOTCompilationStats;
   std::unique_ptr<AOTCompilationStats> stats_;
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 82c0e86..4c03e5d 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -31,6 +31,7 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "handle_scope-inl.h"
+#include "jit/offline_profiling_info.h"
 #include "scoped_thread_state_change.h"
 
 namespace art {
@@ -240,6 +241,94 @@
   EXPECT_TRUE(expected->empty());
 }
 
+class CompilerDriverProfileTest : public CompilerDriverTest {
+ protected:
+  ProfileCompilationInfo* GetProfileCompilationInfo() OVERRIDE {
+    ScopedObjectAccess soa(Thread::Current());
+    std::vector<std::unique_ptr<const DexFile>> dex_files = OpenTestDexFiles("ProfileTestMultiDex");
+
+    ProfileCompilationInfo info;
+    for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
+      std::cout << std::string(dex_file->GetLocation());
+      profile_info_.AddData(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 1);
+      profile_info_.AddData(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 2);
+    }
+    return &profile_info_;
+  }
+
+  std::unordered_set<std::string> GetExpectedMethodsForClass(const std::string& clazz) {
+    if (clazz == "Main") {
+      return std::unordered_set<std::string>({
+          "java.lang.String Main.getA()",
+          "java.lang.String Main.getB()"});
+    } else if (clazz == "Second") {
+      return std::unordered_set<std::string>({
+          "java.lang.String Second.getX()",
+          "java.lang.String Second.getY()"});
+    } else {
+      return std::unordered_set<std::string>();
+    }
+  }
+
+  void CheckCompiledMethods(jobject class_loader,
+                            const std::string& clazz,
+                            const std::unordered_set<std::string>& expected_methods) {
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    Thread* self = Thread::Current();
+    ScopedObjectAccess soa(self);
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ClassLoader> h_loader(hs.NewHandle(
+        reinterpret_cast<mirror::ClassLoader*>(self->DecodeJObject(class_loader))));
+    mirror::Class* klass = class_linker->FindClass(self, clazz.c_str(), h_loader);
+    ASSERT_NE(klass, nullptr);
+
+    const auto pointer_size = class_linker->GetImagePointerSize();
+    size_t number_of_compiled_methods = 0;
+    for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+      std::string name = PrettyMethod(&m, true);
+      const void* code = m.GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
+      ASSERT_NE(code, nullptr);
+      if (expected_methods.find(name) != expected_methods.end()) {
+        number_of_compiled_methods++;
+        EXPECT_FALSE(class_linker->IsQuickToInterpreterBridge(code));
+      } else {
+        EXPECT_TRUE(class_linker->IsQuickToInterpreterBridge(code));
+      }
+    }
+    EXPECT_EQ(expected_methods.size(), number_of_compiled_methods);
+  }
+
+ private:
+  ProfileCompilationInfo profile_info_;
+};
+
+TEST_F(CompilerDriverProfileTest, ProfileGuidedCompilation) {
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
+  TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK();
+  TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
+  Thread* self = Thread::Current();
+  jobject class_loader;
+  {
+    ScopedObjectAccess soa(self);
+    class_loader = LoadDex("ProfileTestMultiDex");
+  }
+  ASSERT_NE(class_loader, nullptr);
+
+  // Need to enable dex-file writability. Methods rejected to be compiled will run through the
+  // dex-to-dex compiler.
+  ProfileCompilationInfo info;
+  for (const DexFile* dex_file : GetDexFiles(class_loader)) {
+    ASSERT_TRUE(dex_file->EnableWrite());
+  }
+
+  CompileAll(class_loader);
+
+  std::unordered_set<std::string> m = GetExpectedMethodsForClass("Main");
+  std::unordered_set<std::string> s = GetExpectedMethodsForClass("Second");
+  CheckCompiledMethods(class_loader, "LMain;", m);
+  CheckCompiledMethods(class_loader, "LSecond;", s);
+}
+
 // TODO: need check-cast test (when stub complete & we can throw/catch
 
 }  // namespace art
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 2644528..3bf8921 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -37,6 +37,7 @@
       debuggable_(false),
       native_debuggable_(kDefaultNativeDebuggable),
       generate_debug_info_(kDefaultGenerateDebugInfo),
+      generate_mini_debug_info_(kDefaultGenerateMiniDebugInfo),
       implicit_null_checks_(true),
       implicit_so_checks_(true),
       implicit_suspend_checks_(false),
@@ -46,7 +47,8 @@
       abort_on_hard_verifier_failure_(false),
       init_failure_output_(nullptr),
       dump_cfg_file_name_(""),
-      dump_cfg_append_(false) {
+      dump_cfg_append_(false),
+      force_determinism_(false) {
 }
 
 CompilerOptions::~CompilerOptions() {
@@ -62,7 +64,7 @@
                                  size_t num_dex_methods_threshold,
                                  size_t inline_depth_limit,
                                  size_t inline_max_code_units,
-                                 const DexFile* no_inline_from,
+                                 const std::vector<const DexFile*>* no_inline_from,
                                  bool include_patch_information,
                                  double top_k_profile_threshold,
                                  bool debuggable,
@@ -75,7 +77,8 @@
                                  std::ostream* init_failure_output,
                                  bool abort_on_hard_verifier_failure,
                                  const std::string& dump_cfg_file_name,
-                                 bool dump_cfg_append
+                                 bool dump_cfg_append,
+                                 bool force_determinism
                                  ) :  // NOLINT(whitespace/parens)
     compiler_filter_(compiler_filter),
     huge_method_threshold_(huge_method_threshold),
@@ -91,6 +94,7 @@
     debuggable_(debuggable),
     native_debuggable_(kDefaultNativeDebuggable),
     generate_debug_info_(generate_debug_info),
+    generate_mini_debug_info_(kDefaultGenerateMiniDebugInfo),
     implicit_null_checks_(implicit_null_checks),
     implicit_so_checks_(implicit_so_checks),
     implicit_suspend_checks_(implicit_suspend_checks),
@@ -100,7 +104,8 @@
     abort_on_hard_verifier_failure_(abort_on_hard_verifier_failure),
     init_failure_output_(init_failure_output),
     dump_cfg_file_name_(dump_cfg_file_name),
-    dump_cfg_append_(dump_cfg_append) {
+    dump_cfg_append_(dump_cfg_append),
+    force_determinism_(force_determinism) {
 }
 
 void CompilerOptions::ParseHugeMethodMax(const StringPiece& option, UsageFn Usage) {
@@ -215,6 +220,10 @@
     generate_debug_info_ = true;
   } else if (option == "--no-generate-debug-info") {
     generate_debug_info_ = false;
+  } else if (option == "--generate-mini-debug-info") {
+    generate_mini_debug_info_ = true;
+  } else if (option == "--no-generate-mini-debug-info") {
+    generate_mini_debug_info_ = false;
   } else if (option == "--debuggable") {
     debuggable_ = true;
   } else if (option == "--native-debuggable") {
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index d47fc2a..39372b3 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -51,6 +51,7 @@
   static constexpr double kDefaultTopKProfileThreshold = 90.0;
   static const bool kDefaultNativeDebuggable = false;
   static const bool kDefaultGenerateDebugInfo = false;
+  static const bool kDefaultGenerateMiniDebugInfo = false;
   static const bool kDefaultIncludePatchInformation = false;
   static const size_t kDefaultInlineDepthLimit = 3;
   static const size_t kDefaultInlineMaxCodeUnits = 32;
@@ -72,7 +73,7 @@
                   size_t num_dex_methods_threshold,
                   size_t inline_depth_limit,
                   size_t inline_max_code_units,
-                  const DexFile* no_inline_from,
+                  const std::vector<const DexFile*>* no_inline_from,
                   bool include_patch_information,
                   double top_k_profile_threshold,
                   bool debuggable,
@@ -85,7 +86,8 @@
                   std::ostream* init_failure_output,
                   bool abort_on_hard_verifier_failure,
                   const std::string& dump_cfg_file_name,
-                  bool dump_cfg_append);
+                  bool dump_cfg_append,
+                  bool force_determinism);
 
   CompilerFilter GetCompilerFilter() const {
     return compiler_filter_;
@@ -170,10 +172,20 @@
     return native_debuggable_;
   }
 
+  // This flag controls whether the compiler collects debugging information.
+  // The other flags control how the information is written to disk.
+  bool GenerateAnyDebugInfo() const {
+    return GetGenerateDebugInfo() || GetGenerateMiniDebugInfo();
+  }
+
   bool GetGenerateDebugInfo() const {
     return generate_debug_info_;
   }
 
+  bool GetGenerateMiniDebugInfo() const {
+    return generate_mini_debug_info_;
+  }
+
   bool GetImplicitNullChecks() const {
     return implicit_null_checks_;
   }
@@ -220,7 +232,7 @@
     return abort_on_hard_verifier_failure_;
   }
 
-  const DexFile* GetNoInlineFromDexFile() const {
+  const std::vector<const DexFile*>* GetNoInlineFromDexFile() const {
     return no_inline_from_;
   }
 
@@ -234,6 +246,10 @@
     return dump_cfg_append_;
   }
 
+  bool IsForceDeterminism() const {
+    return force_determinism_;
+  }
+
  private:
   void ParseDumpInitFailures(const StringPiece& option, UsageFn Usage);
   void ParsePassOptions(const StringPiece& option, UsageFn Usage);
@@ -257,8 +273,10 @@
   size_t inline_depth_limit_;
   size_t inline_max_code_units_;
 
-  // A dex file from which we should not inline code.
-  const DexFile* no_inline_from_;
+  // Dex files from which we should not inline code.
+  // This is usually a very short list (i.e. a single dex file), so we
+  // prefer vector<> over a lookup-oriented container, such as set<>.
+  const std::vector<const DexFile*>* no_inline_from_;
 
   bool include_patch_information_;
   // When using a profile file only the top K% of the profiled samples will be compiled.
@@ -266,6 +284,7 @@
   bool debuggable_;
   bool native_debuggable_;
   bool generate_debug_info_;
+  bool generate_mini_debug_info_;
   bool implicit_null_checks_;
   bool implicit_so_checks_;
   bool implicit_suspend_checks_;
@@ -286,6 +305,10 @@
   std::string dump_cfg_file_name_;
   bool dump_cfg_append_;
 
+  // Whether the compiler should trade performance for determinism to guarantee exactly reproducable
+  // outcomes.
+  bool force_determinism_;
+
   friend class Dex2Oat;
 
   DISALLOW_COPY_AND_ASSIGN(CompilerOptions);
diff --git a/compiler/dwarf/debug_frame_opcode_writer.h b/compiler/dwarf/debug_frame_opcode_writer.h
index 5a99641..f74f37c 100644
--- a/compiler/dwarf/debug_frame_opcode_writer.h
+++ b/compiler/dwarf/debug_frame_opcode_writer.h
@@ -248,7 +248,7 @@
     }
   }
 
-  void ALWAYS_INLINE DefCFAExpression(void * expr, int expr_size) {
+  void ALWAYS_INLINE DefCFAExpression(uint8_t* expr, int expr_size) {
     if (UNLIKELY(enabled_)) {
       ImplicitlyAdvancePC();
       uses_dwarf3_features_ = true;
@@ -258,7 +258,7 @@
     }
   }
 
-  void ALWAYS_INLINE Expression(Reg reg, void * expr, int expr_size) {
+  void ALWAYS_INLINE Expression(Reg reg, uint8_t* expr, int expr_size) {
     if (UNLIKELY(enabled_)) {
       ImplicitlyAdvancePC();
       uses_dwarf3_features_ = true;
@@ -269,7 +269,7 @@
     }
   }
 
-  void ALWAYS_INLINE ValExpression(Reg reg, void * expr, int expr_size) {
+  void ALWAYS_INLINE ValExpression(Reg reg, uint8_t* expr, int expr_size) {
     if (UNLIKELY(enabled_)) {
       ImplicitlyAdvancePC();
       uses_dwarf3_features_ = true;
diff --git a/compiler/dwarf/debug_info_entry_writer.h b/compiler/dwarf/debug_info_entry_writer.h
index a551e4b..e5bbed3 100644
--- a/compiler/dwarf/debug_info_entry_writer.h
+++ b/compiler/dwarf/debug_info_entry_writer.h
@@ -22,6 +22,7 @@
 
 #include "base/casts.h"
 #include "dwarf/dwarf_constants.h"
+#include "dwarf/expression.h"
 #include "dwarf/writer.h"
 #include "leb128.h"
 
@@ -106,16 +107,16 @@
     }
   }
 
-  void WriteBlock(Attribute attrib, const void* ptr, size_t num_bytes) {
+  void WriteBlock(Attribute attrib, const uint8_t* ptr, size_t num_bytes) {
     AddAbbrevAttribute(attrib, DW_FORM_block);
     this->PushUleb128(num_bytes);
     this->PushData(ptr, num_bytes);
   }
 
-  void WriteExprLoc(Attribute attrib, const void* ptr, size_t num_bytes) {
+  void WriteExprLoc(Attribute attrib, const Expression& expr) {
     AddAbbrevAttribute(attrib, DW_FORM_exprloc);
-    this->PushUleb128(dchecked_integral_cast<uint32_t>(num_bytes));
-    this->PushData(ptr, num_bytes);
+    this->PushUleb128(dchecked_integral_cast<uint32_t>(expr.size()));
+    this->PushData(expr.data());
   }
 
   void WriteData1(Attribute attrib, uint8_t value) {
diff --git a/compiler/dwarf/dwarf_constants.h b/compiler/dwarf/dwarf_constants.h
index 3b570e5..0d7951b 100644
--- a/compiler/dwarf/dwarf_constants.h
+++ b/compiler/dwarf/dwarf_constants.h
@@ -200,11 +200,11 @@
   DW_AT_data_bit_offset = 0x6b,
   DW_AT_const_expr = 0x6c,
   DW_AT_enum_class = 0x6d,
+  DW_AT_linkage_name = 0x6e,
 #ifdef INCLUDE_DWARF5_VALUES
   // Values to be added in Dwarf 5. Final value not yet specified. Values listed
   // may be different than other implementations. Use with caution.
   // TODO Update these values when Dwarf 5 is released.
-  DW_AT_linkage_name = 0x6e,
   DW_AT_call_site_value = 0x6f,
   DW_AT_call_site_data_value = 0x70,
   DW_AT_call_site_target = 0x71,
diff --git a/compiler/dwarf/expression.h b/compiler/dwarf/expression.h
new file mode 100644
index 0000000..1503d03
--- /dev/null
+++ b/compiler/dwarf/expression.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_EXPRESSION_H_
+#define ART_COMPILER_DWARF_EXPRESSION_H_
+
+#include <cstddef>
+#include <cstdint>
+
+#include "dwarf/dwarf_constants.h"
+#include "dwarf/writer.h"
+
+namespace art {
+namespace dwarf {
+
+// Writer for DWARF expressions which are used in .debug_info and .debug_loc sections.
+// See the DWARF specification for the precise meaning of the opcodes.
+// If multiple equivalent encodings are possible, it will choose the most compact one.
+// The writer is not exhaustive - it only implements opcodes we have needed so far.
+class Expression : private Writer<> {
+ public:
+  using Writer<>::data;
+  using Writer<>::size;
+
+  // Push signed integer on the stack.
+  void WriteOpConsts(int32_t value) {
+    if (0 <= value && value < 32) {
+      PushUint8(DW_OP_lit0 + value);
+    } else {
+      PushUint8(DW_OP_consts);
+      PushSleb128(value);
+    }
+  }
+
+  // Push unsigned integer on the stack.
+  void WriteOpConstu(uint32_t value) {
+    if (value < 32) {
+      PushUint8(DW_OP_lit0 + value);
+    } else {
+      PushUint8(DW_OP_constu);
+      PushUleb128(value);
+    }
+  }
+
+  // Variable is stored in given register.
+  void WriteOpReg(uint32_t dwarf_reg_num) {
+    if (dwarf_reg_num < 32) {
+      PushUint8(DW_OP_reg0 + dwarf_reg_num);
+    } else {
+      PushUint8(DW_OP_regx);
+      PushUleb128(dwarf_reg_num);
+    }
+  }
+
+  // Variable is stored on stack.  Also see DW_AT_frame_base.
+  void WriteOpFbreg(int32_t stack_offset) {
+    PushUint8(DW_OP_fbreg);
+    PushSleb128(stack_offset);
+  }
+
+  // The variable is stored in multiple locations (pieces).
+  void WriteOpPiece(uint32_t num_bytes) {
+    PushUint8(DW_OP_piece);
+    PushUleb128(num_bytes);
+  }
+
+  // Loads 32-bit or 64-bit value depending on architecture.
+  void WriteOpDeref() { PushUint8(DW_OP_deref); }
+
+  // Loads value of given byte size.
+  void WriteOpDerefSize(uint8_t num_bytes) {
+    PushUint8(DW_OP_deref_size);
+    PushUint8(num_bytes);
+  }
+
+  // Pop two values and push their sum.
+  void WriteOpPlus() { PushUint8(DW_OP_plus); }
+
+  // Add constant value to value on top of stack.
+  void WriteOpPlusUconst(uint32_t offset) {
+    PushUint8(DW_OP_plus_uconst);
+    PushUleb128(offset);
+  }
+
+  // Negate top of stack.
+  void WriteOpNeg() { PushUint8(DW_OP_neg); }
+
+  // Pop two values and push their bitwise-AND.
+  void WriteOpAnd() { PushUint8(DW_OP_and); }
+
+  // Push stack base pointer as determined from .debug_frame.
+  void WriteOpCallFrameCfa() { PushUint8(DW_OP_call_frame_cfa); }
+
+  // Push address of the variable we are working with.
+  void WriteOpPushObjectAddress() { PushUint8(DW_OP_push_object_address); }
+
+  // Return the top stack as the value of the variable.
+  // Otherwise, the top of stack is the variable's location.
+  void WriteOpStackValue() { PushUint8(DW_OP_stack_value); }
+
+  explicit Expression(std::vector<uint8_t>* buffer) : Writer<>(buffer) {
+    buffer->clear();
+  }
+};
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DWARF_EXPRESSION_H_
diff --git a/compiler/dwarf/headers.h b/compiler/dwarf/headers.h
index f76f76f..137c566 100644
--- a/compiler/dwarf/headers.h
+++ b/compiler/dwarf/headers.h
@@ -70,7 +70,7 @@
       writer.PushUint8(DW_EH_PE_absptr | DW_EH_PE_udata4);  // R: Pointer encoding.
     }
   }
-  writer.PushData(*opcodes.data());
+  writer.PushData(opcodes.data());
   writer.Pad(is64bit ? 8 : 4);
   writer.UpdateUint32(cie_header_start_, writer.data()->size() - cie_header_start_ - 4);
 }
@@ -117,7 +117,7 @@
     writer.PushUint32(code_size);
   }
   writer.PushUleb128(0);  // Augmentation data size.
-  writer.PushData(opcodes);
+  writer.PushData(opcodes.data(), opcodes.size());
   writer.Pad(is64bit ? 8 : 4);
   writer.UpdateUint32(fde_header_start, writer.data()->size() - fde_header_start - 4);
 }
@@ -139,7 +139,7 @@
   writer.PushUint8(entries.Is64bit() ? 8 : 4);
   size_t entries_offset = writer.data()->size();
   DCHECK_EQ(entries_offset, DebugInfoEntryWriter<Vector>::kCompilationUnitHeaderSize);
-  writer.PushData(*entries.data());
+  writer.PushData(entries.data());
   writer.UpdateUint32(start, writer.data()->size() - start - 4);
   // Copy patch locations and make them relative to .debug_info section.
   for (uintptr_t patch_location : entries.GetPatchLocations()) {
@@ -193,7 +193,7 @@
   writer.PushUint8(0);  // Terminate file list.
   writer.UpdateUint32(header_length_pos, writer.data()->size() - header_length_pos - 4);
   size_t opcodes_offset = writer.data()->size();
-  writer.PushData(*opcodes.data());
+  writer.PushData(opcodes.data());
   writer.UpdateUint32(header_start, writer.data()->size() - header_start - 4);
   // Copy patch locations and make them relative to .debug_line section.
   for (uintptr_t patch_location : opcodes.GetPatchLocations()) {
diff --git a/compiler/dwarf/writer.h b/compiler/dwarf/writer.h
index d2add7f..74acf07 100644
--- a/compiler/dwarf/writer.h
+++ b/compiler/dwarf/writer.h
@@ -114,16 +114,16 @@
     data_->insert(data_->end(), value, value + strlen(value) + 1);
   }
 
-  void PushData(const void* ptr, size_t num_bytes) {
-    const char* p = reinterpret_cast<const char*>(ptr);
-    data_->insert(data_->end(), p, p + num_bytes);
+  void PushData(const uint8_t* ptr, size_t num_bytes) {
+    data_->insert(data_->end(), ptr, ptr + num_bytes);
   }
 
-  template<typename Vector2>
-  void PushData(const Vector2& buffer) {
-    static_assert(std::is_same<typename std::add_const<typename Vector::value_type>::type,
-                               const uint8_t>::value, "Invalid value type");
-    data_->insert(data_->end(), buffer.begin(), buffer.end());
+  void PushData(const char* ptr, size_t num_bytes) {
+    data_->insert(data_->end(), ptr, ptr + num_bytes);
+  }
+
+  void PushData(const Vector* buffer) {
+    data_->insert(data_->end(), buffer->begin(), buffer->end());
   }
 
   void UpdateUint32(size_t offset, uint32_t value) {
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index a7461a5..3d24d19 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -100,12 +100,6 @@
       header_.sh_entsize = entsize;
     }
 
-    ~Section() OVERRIDE {
-      if (started_) {
-        CHECK(finished_);
-      }
-    }
-
     // Start writing of this section.
     void Start() {
       CHECK(!started_);
@@ -171,10 +165,15 @@
       }
     }
 
-    // Set desired allocation size for .bss section.
-    void SetSize(Elf_Word size) {
-      CHECK_EQ(header_.sh_type, (Elf_Word)SHT_NOBITS);
+    // Write this section as "NOBITS" section. (used for the .bss section)
+    // This means that the ELF file does not contain the initial data for this section
+    // and it will be zero-initialized when the ELF file is loaded in the running program.
+    void WriteNoBitsSection(Elf_Word size) {
+      DCHECK_NE(header_.sh_flags & SHF_ALLOC, 0u);
+      Start();
+      header_.sh_type = SHT_NOBITS;
       header_.sh_size = size;
+      End();
     }
 
     // This function always succeeds to simplify code.
@@ -352,6 +351,12 @@
     other_sections_.push_back(std::move(s));
   }
 
+  // Set where the next section will be allocated in the virtual address space.
+  void SetVirtualAddress(Elf_Addr address) {
+    DCHECK_GE(address, virtual_address_);
+    virtual_address_ = address;
+  }
+
   void Start() {
     // Reserve space for ELF header and program headers.
     // We do not know the number of headers until later, so
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index e03614f..2e98b69 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -16,15 +16,19 @@
 
 #include "elf_writer_debug.h"
 
+#include <algorithm>
 #include <unordered_set>
 #include <vector>
+#include <cstdio>
 
 #include "base/casts.h"
 #include "base/stl_util.h"
+#include "linear_alloc.h"
 #include "compiled_method.h"
 #include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
 #include "dwarf/dedup_vector.h"
+#include "dwarf/expression.h"
 #include "dwarf/headers.h"
 #include "dwarf/method_debug_info.h"
 #include "dwarf/register.h"
@@ -37,6 +41,11 @@
 #include "stack_map.h"
 #include "utils.h"
 
+// liblzma.
+#include "XzEnc.h"
+#include "7zCrc.h"
+#include "XzCrc64.h"
+
 namespace art {
 namespace dwarf {
 
@@ -219,7 +228,8 @@
 template<typename ElfTypes>
 void WriteCFISection(ElfBuilder<ElfTypes>* builder,
                      const ArrayRef<const MethodDebugInfo>& method_infos,
-                     CFIFormat format) {
+                     CFIFormat format,
+                     bool write_oat_patches) {
   CHECK(format == DW_DEBUG_FRAME_FORMAT || format == DW_EH_FRAME_FORMAT);
   typedef typename ElfTypes::Addr Elf_Addr;
 
@@ -235,6 +245,24 @@
     patch_locations.reserve(method_infos.size());
   }
 
+  // The methods can be written any order.
+  // Let's therefore sort them in the lexicographical order of the opcodes.
+  // This has no effect on its own. However, if the final .debug_frame section is
+  // compressed it reduces the size since similar opcodes sequences are grouped.
+  std::vector<const MethodDebugInfo*> sorted_method_infos;
+  sorted_method_infos.reserve(method_infos.size());
+  for (size_t i = 0; i < method_infos.size(); i++) {
+    sorted_method_infos.push_back(&method_infos[i]);
+  }
+  std::sort(
+      sorted_method_infos.begin(),
+      sorted_method_infos.end(),
+      [](const MethodDebugInfo* lhs, const MethodDebugInfo* rhs) {
+        ArrayRef<const uint8_t> l = lhs->compiled_method_->GetCFIInfo();
+        ArrayRef<const uint8_t> r = rhs->compiled_method_->GetCFIInfo();
+        return std::lexicographical_compare(l.begin(), l.end(), r.begin(), r.end());
+      });
+
   // Write .eh_frame/.debug_frame section.
   auto* cfi_section = (format == DW_DEBUG_FRAME_FORMAT
                        ? builder->GetDebugFrame()
@@ -253,11 +281,11 @@
     cfi_section->WriteFully(buffer.data(), buffer.size());
     buffer_address += buffer.size();
     buffer.clear();
-    for (const MethodDebugInfo& mi : method_infos) {
-      if (!mi.deduped_) {  // Only one FDE per unique address.
-        ArrayRef<const uint8_t> opcodes = mi.compiled_method_->GetCFIInfo();
+    for (const MethodDebugInfo* mi : sorted_method_infos) {
+      if (!mi->deduped_) {  // Only one FDE per unique address.
+        ArrayRef<const uint8_t> opcodes = mi->compiled_method_->GetCFIInfo();
         if (!opcodes.empty()) {
-          const Elf_Addr code_address = text_address + mi.low_pc_;
+          const Elf_Addr code_address = text_address + mi->low_pc_;
           if (format == DW_EH_FRAME_FORMAT) {
             binary_search_table.push_back(
                 dchecked_integral_cast<uint32_t>(code_address));
@@ -265,7 +293,7 @@
                 dchecked_integral_cast<uint32_t>(buffer_address));
           }
           WriteFDE(is64bit, cfi_address, cie_address,
-                   code_address, mi.high_pc_ - mi.low_pc_,
+                   code_address, mi->high_pc_ - mi->low_pc_,
                    opcodes, format, buffer_address, &buffer,
                    &patch_locations);
           cfi_section->WriteFully(buffer.data(), buffer.size());
@@ -306,8 +334,10 @@
     header_section->WriteFully(binary_search_table.data(), binary_search_table.size());
     header_section->End();
   } else {
-    builder->WritePatches(".debug_frame.oat_patches",
-                          ArrayRef<const uintptr_t>(patch_locations));
+    if (write_oat_patches) {
+      builder->WritePatches(".debug_frame.oat_patches",
+                            ArrayRef<const uintptr_t>(patch_locations));
+    }
   }
 }
 
@@ -498,8 +528,10 @@
         WriteName(dex->GetMethodName(dex_method));
         info_.WriteAddr(DW_AT_low_pc, text_address + mi->low_pc_);
         info_.WriteUdata(DW_AT_high_pc, dchecked_integral_cast<uint32_t>(mi->high_pc_-mi->low_pc_));
-        uint8_t frame_base[] = { DW_OP_call_frame_cfa };
-        info_.WriteExprLoc(DW_AT_frame_base, &frame_base, sizeof(frame_base));
+        std::vector<uint8_t> expr_buffer;
+        Expression expr(&expr_buffer);
+        expr.WriteOpCallFrameCfa();
+        info_.WriteExprLoc(DW_AT_frame_base, expr);
         WriteLazyType(dex->GetReturnTypeDescriptor(dex_proto));
 
         // Write parameters. DecodeDebugLocalInfo returns them as well, but it does not
@@ -588,6 +620,7 @@
       info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat"));
       info_.WriteData1(DW_AT_language, DW_LANG_Java);
 
+      std::vector<uint8_t> expr_buffer;
       for (mirror::Class* type : types) {
         if (type->IsPrimitive()) {
           // For primitive types the definition and the declaration is the same.
@@ -603,20 +636,33 @@
           info_.StartTag(DW_TAG_array_type);
           std::string descriptor_string;
           WriteLazyType(element_type->GetDescriptor(&descriptor_string));
+          WriteLinkageName(type);
           info_.WriteUdata(DW_AT_data_member_location, data_offset);
           info_.StartTag(DW_TAG_subrange_type);
-          DCHECK_LT(length_offset, 32u);
-          uint8_t count[] = {
-            DW_OP_push_object_address,
-            static_cast<uint8_t>(DW_OP_lit0 + length_offset),
-            DW_OP_plus,
-            DW_OP_deref_size,
-            4  // Array length is always 32-bit wide.
-          };
-          info_.WriteExprLoc(DW_AT_count, &count, sizeof(count));
+          Expression count_expr(&expr_buffer);
+          count_expr.WriteOpPushObjectAddress();
+          count_expr.WriteOpPlusUconst(length_offset);
+          count_expr.WriteOpDerefSize(4);  // Array length is always 32-bit wide.
+          info_.WriteExprLoc(DW_AT_count, count_expr);
           info_.EndTag();  // DW_TAG_subrange_type.
           info_.EndTag();  // DW_TAG_array_type.
+        } else if (type->IsInterface()) {
+          // Skip.  Variables cannot have an interface as a dynamic type.
+          // We do not expose the interface information to the debugger in any way.
         } else {
+          // Declare base class.  We can not use the standard WriteLazyType
+          // since we want to avoid the DW_TAG_reference_tag wrapping.
+          mirror::Class* base_class = type->GetSuperClass();
+          size_t base_class_declaration_offset = 0;
+          if (base_class != nullptr) {
+            std::string tmp_storage;
+            const char* base_class_desc = base_class->GetDescriptor(&tmp_storage);
+            base_class_declaration_offset = StartClassTag(base_class_desc);
+            info_.WriteFlag(DW_AT_declaration, true);
+            WriteLinkageName(base_class);
+            EndClassTag(base_class_desc);
+          }
+
           std::string descriptor_string;
           const char* desc = type->GetDescriptor(&descriptor_string);
           StartClassTag(desc);
@@ -625,11 +671,40 @@
             info_.WriteUdata(DW_AT_byte_size, type->GetObjectSize());
           }
 
+          WriteLinkageName(type);
+
+          if (type->IsObjectClass()) {
+            // Generate artificial member which is used to get the dynamic type of variable.
+            // The run-time value of this field will correspond to linkage name of some type.
+            // We need to do it only once in j.l.Object since all other types inherit it.
+            info_.StartTag(DW_TAG_member);
+            WriteName(".dynamic_type");
+            WriteLazyType(sizeof(uintptr_t) == 8 ? "J" : "I");
+            info_.WriteFlag(DW_AT_artificial, true);
+            // Create DWARF expression to get the value of the methods_ field.
+            Expression expr(&expr_buffer);
+            // The address of the object has been implicitly pushed on the stack.
+            // Dereference the klass_ field of Object (32-bit; possibly poisoned).
+            DCHECK_EQ(type->ClassOffset().Uint32Value(), 0u);
+            DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Class>), 4u);
+            expr.WriteOpDerefSize(4);
+            if (kPoisonHeapReferences) {
+              expr.WriteOpNeg();
+              // DWARF stack is pointer sized. Ensure that the high bits are clear.
+              expr.WriteOpConstu(0xFFFFFFFF);
+              expr.WriteOpAnd();
+            }
+            // Add offset to the methods_ field.
+            expr.WriteOpPlusUconst(mirror::Class::MethodsOffset().Uint32Value());
+            // Top of stack holds the location of the field now.
+            info_.WriteExprLoc(DW_AT_data_member_location, expr);
+            info_.EndTag();  // DW_TAG_member.
+          }
+
           // Base class.
-          mirror::Class* base_class = type->GetSuperClass();
           if (base_class != nullptr) {
             info_.StartTag(DW_TAG_inheritance);
-            WriteLazyType(base_class->GetDescriptor(&descriptor_string));
+            info_.WriteRef4(DW_AT_type, base_class_declaration_offset);
             info_.WriteUdata(DW_AT_data_member_location, 0);
             info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_public);
             info_.EndTag();  // DW_TAG_inheritance.
@@ -684,6 +759,24 @@
       owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size());
     }
 
+    // Linkage name uniquely identifies type.
+    // It is used to determine the dynamic type of objects.
+    // We use the methods_ field of class since it is unique and it is not moved by the GC.
+    void WriteLinkageName(mirror::Class* type) SHARED_REQUIRES(Locks::mutator_lock_) {
+      auto* methods_ptr = type->GetMethodsPtr();
+      if (methods_ptr == nullptr) {
+        // Some types might have no methods.  Allocate empty array instead.
+        LinearAlloc* allocator = Runtime::Current()->GetLinearAlloc();
+        void* storage = allocator->Alloc(Thread::Current(), sizeof(LengthPrefixedArray<ArtMethod>));
+        methods_ptr = new (storage) LengthPrefixedArray<ArtMethod>(0);
+        type->SetMethodsPtr(methods_ptr, 0, 0);
+        DCHECK(type->GetMethodsPtr() != nullptr);
+      }
+      char name[32];
+      snprintf(name, sizeof(name), "0x%" PRIXPTR, reinterpret_cast<uintptr_t>(methods_ptr));
+      info_.WriteString(DW_AT_linkage_name, name);
+    }
+
     // Write table into .debug_loc which describes location of dex register.
     // The dex register might be valid only at some points and it might
     // move between machine registers and stack.
@@ -713,12 +806,12 @@
       // Write .debug_loc entries.
       const InstructionSet isa = owner_->builder_->GetIsa();
       const bool is64bit = Is64BitInstructionSet(isa);
+      std::vector<uint8_t> expr_buffer;
       for (const VariableLocation& variable_location : variable_locations) {
         // Translate dex register location to DWARF expression.
         // Note that 64-bit value might be split to two distinct locations.
         // (for example, two 32-bit machine registers, or even stack and register)
-        uint8_t buffer[64];
-        uint8_t* pos = buffer;
+        Expression expr(&expr_buffer);
         DexRegisterLocation reg_lo = variable_location.reg_lo;
         DexRegisterLocation reg_hi = variable_location.reg_hi;
         for (int piece = 0; piece < (is64bitValue ? 2 : 1); piece++) {
@@ -727,15 +820,14 @@
           const int32_t value = reg_loc.GetValue();
           if (kind == Kind::kInStack) {
             const size_t frame_size = method_info->compiled_method_->GetFrameSizeInBytes();
-            *(pos++) = DW_OP_fbreg;
             // The stack offset is relative to SP. Make it relative to CFA.
-            pos = EncodeSignedLeb128(pos, value - frame_size);
+            expr.WriteOpFbreg(value - frame_size);
             if (piece == 0 && reg_hi.GetKind() == Kind::kInStack &&
                 reg_hi.GetValue() == value + 4) {
               break;  // the high word is correctly implied by the low word.
             }
           } else if (kind == Kind::kInRegister) {
-            pos = WriteOpReg(pos, GetDwarfCoreReg(isa, value).num());
+            expr.WriteOpReg(GetDwarfCoreReg(isa, value).num());
             if (piece == 0 && reg_hi.GetKind() == Kind::kInRegisterHigh &&
                 reg_hi.GetValue() == value) {
               break;  // the high word is correctly implied by the low word.
@@ -745,22 +837,21 @@
                 piece == 0 && reg_hi.GetKind() == Kind::kInFpuRegister &&
                 reg_hi.GetValue() == value + 1 && value % 2 == 0) {
               // Translate S register pair to D register (e.g. S4+S5 to D2).
-              pos = WriteOpReg(pos, Reg::ArmDp(value / 2).num());
+              expr.WriteOpReg(Reg::ArmDp(value / 2).num());
               break;
             }
             if (isa == kMips || isa == kMips64) {
               // TODO: Find what the DWARF floating point register numbers are on MIPS.
               break;
             }
-            pos = WriteOpReg(pos, GetDwarfFpReg(isa, value).num());
+            expr.WriteOpReg(GetDwarfFpReg(isa, value).num());
             if (piece == 0 && reg_hi.GetKind() == Kind::kInFpuRegisterHigh &&
                 reg_hi.GetValue() == reg_lo.GetValue()) {
               break;  // the high word is correctly implied by the low word.
             }
           } else if (kind == Kind::kConstant) {
-            *(pos++) = DW_OP_consts;
-            pos = EncodeSignedLeb128(pos, value);
-            *(pos++) = DW_OP_stack_value;
+            expr.WriteOpConsts(value);
+            expr.WriteOpStackValue();
           } else if (kind == Kind::kNone) {
             break;
           } else {
@@ -774,14 +865,11 @@
           if (is64bitValue) {
             // Write the marker which is needed by split 64-bit values.
             // This code is skipped by the special cases.
-            *(pos++) = DW_OP_piece;
-            pos = EncodeUnsignedLeb128(pos, 4);
+            expr.WriteOpPiece(4);
           }
         }
 
-        // Check that the buffer is large enough; keep half of it empty for safety.
-        DCHECK_LE(static_cast<size_t>(pos - buffer), sizeof(buffer) / 2);
-        if (pos > buffer) {
+        if (expr.size() > 0) {
           if (is64bit) {
             debug_loc.PushUint64(variable_location.low_pc - compilation_unit_low_pc);
             debug_loc.PushUint64(variable_location.high_pc - compilation_unit_low_pc);
@@ -790,8 +878,8 @@
             debug_loc.PushUint32(variable_location.high_pc - compilation_unit_low_pc);
           }
           // Write the expression.
-          debug_loc.PushUint16(pos - buffer);
-          debug_loc.PushData(buffer, pos - buffer);
+          debug_loc.PushUint16(expr.size());
+          debug_loc.PushData(expr.data());
         } else {
           // Do not generate .debug_loc if the location is not known.
         }
@@ -856,17 +944,6 @@
       }
     }
 
-    // Helper which writes DWARF expression referencing a register.
-    static uint8_t* WriteOpReg(uint8_t* buffer, uint32_t dwarf_reg_num) {
-      if (dwarf_reg_num < 32) {
-        *(buffer++) = DW_OP_reg0 + dwarf_reg_num;
-      } else {
-        *(buffer++) = DW_OP_regx;
-        buffer = EncodeUnsignedLeb128(buffer, dwarf_reg_num);
-      }
-      return buffer;
-    }
-
     // Convert dex type descriptor to DWARF.
     // Returns offset in the compilation unit.
     size_t WriteTypeDeclaration(const std::string& desc) {
@@ -1108,9 +1185,13 @@
         for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(); s++) {
           StackMap stack_map = code_info.GetStackMapAt(s, encoding);
           DCHECK(stack_map.IsValid());
-          const uint32_t pc = stack_map.GetNativePcOffset(encoding);
-          const int32_t dex = stack_map.GetDexPc(encoding);
-          src_mapping_table_from_stack_maps.push_back({pc, dex});
+          // Emit only locations where we have local-variable information.
+          // In particular, skip mappings inside the prologue.
+          if (stack_map.HasDexRegisterMap(encoding)) {
+            const uint32_t pc = stack_map.GetNativePcOffset(encoding);
+            const int32_t dex = stack_map.GetDexPc(encoding);
+            src_mapping_table_from_stack_maps.push_back({pc, dex});
+          }
         }
         std::sort(src_mapping_table_from_stack_maps.begin(),
                   src_mapping_table_from_stack_maps.end());
@@ -1284,8 +1365,9 @@
 }
 
 template <typename ElfTypes>
-void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
-                       const ArrayRef<const MethodDebugInfo>& method_infos) {
+static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
+                              const ArrayRef<const MethodDebugInfo>& method_infos,
+                              bool with_signature) {
   bool generated_mapping_symbol = false;
   auto* strtab = builder->GetStrTab();
   auto* symtab = builder->GetSymTab();
@@ -1305,22 +1387,31 @@
 
   strtab->Start();
   strtab->Write("");  // strtab should start with empty string.
+  std::string last_name;
+  size_t last_name_offset = 0;
   for (const MethodDebugInfo& info : method_infos) {
     if (info.deduped_) {
       continue;  // Add symbol only for the first instance.
     }
-    std::string name = PrettyMethod(info.dex_method_index_, *info.dex_file_, true);
+    std::string name = PrettyMethod(info.dex_method_index_, *info.dex_file_, with_signature);
     if (deduped_addresses.find(info.low_pc_) != deduped_addresses.end()) {
       name += " [DEDUPED]";
     }
+    // If we write method names without signature, we might see the same name multiple times.
+    size_t name_offset = (name == last_name ? last_name_offset : strtab->Write(name));
 
     const auto* text = builder->GetText()->Exists() ? builder->GetText() : nullptr;
     const bool is_relative = (text != nullptr);
     uint32_t low_pc = info.low_pc_;
     // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
     low_pc += info.compiled_method_->CodeDelta();
-    symtab->Add(strtab->Write(name), text, low_pc,
-                is_relative, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC);
+    symtab->Add(name_offset,
+                text,
+                low_pc,
+                is_relative,
+                info.high_pc_ - info.low_pc_,
+                STB_GLOBAL,
+                STT_FUNC);
 
     // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
     // instructions, so that disassembler tools can correctly disassemble.
@@ -1333,6 +1424,9 @@
         generated_mapping_symbol = true;
       }
     }
+
+    last_name = std::move(name);
+    last_name_offset = name_offset;
   }
   strtab->End();
 
@@ -1348,13 +1442,83 @@
                     const ArrayRef<const MethodDebugInfo>& method_infos,
                     CFIFormat cfi_format) {
   // Add methods to .symtab.
-  WriteDebugSymbols(builder, method_infos);
+  WriteDebugSymbols(builder, method_infos, true /* with_signature */);
   // Generate CFI (stack unwinding information).
-  WriteCFISection(builder, method_infos, cfi_format);
+  WriteCFISection(builder, method_infos, cfi_format, true /* write_oat_patches */);
   // Write DWARF .debug_* sections.
   WriteDebugSections(builder, method_infos);
 }
 
+static void XzCompress(const std::vector<uint8_t>* src, std::vector<uint8_t>* dst) {
+  // Configure the compression library.
+  CrcGenerateTable();
+  Crc64GenerateTable();
+  CLzma2EncProps lzma2Props;
+  Lzma2EncProps_Init(&lzma2Props);
+  lzma2Props.lzmaProps.level = 1;  // Fast compression.
+  Lzma2EncProps_Normalize(&lzma2Props);
+  CXzProps props;
+  XzProps_Init(&props);
+  props.lzma2Props = &lzma2Props;
+  // Implement the required interface for communication (written in C so no virtual methods).
+  struct XzCallbacks : public ISeqInStream, public ISeqOutStream, public ICompressProgress {
+    static SRes ReadImpl(void* p, void* buf, size_t* size) {
+      auto* ctx = static_cast<XzCallbacks*>(reinterpret_cast<ISeqInStream*>(p));
+      *size = std::min(*size, ctx->src_->size() - ctx->src_pos_);
+      memcpy(buf, ctx->src_->data() + ctx->src_pos_, *size);
+      ctx->src_pos_ += *size;
+      return SZ_OK;
+    }
+    static size_t WriteImpl(void* p, const void* buf, size_t size) {
+      auto* ctx = static_cast<XzCallbacks*>(reinterpret_cast<ISeqOutStream*>(p));
+      const uint8_t* buffer = reinterpret_cast<const uint8_t*>(buf);
+      ctx->dst_->insert(ctx->dst_->end(), buffer, buffer + size);
+      return size;
+    }
+    static SRes ProgressImpl(void* , UInt64, UInt64) {
+      return SZ_OK;
+    }
+    size_t src_pos_;
+    const std::vector<uint8_t>* src_;
+    std::vector<uint8_t>* dst_;
+  };
+  XzCallbacks callbacks;
+  callbacks.Read = XzCallbacks::ReadImpl;
+  callbacks.Write = XzCallbacks::WriteImpl;
+  callbacks.Progress = XzCallbacks::ProgressImpl;
+  callbacks.src_pos_ = 0;
+  callbacks.src_ = src;
+  callbacks.dst_ = dst;
+  // Compress.
+  SRes res = Xz_Encode(&callbacks, &callbacks, &props, &callbacks);
+  CHECK_EQ(res, SZ_OK);
+}
+
+template <typename ElfTypes>
+void WriteMiniDebugInfo(ElfBuilder<ElfTypes>* parent_builder,
+                        const ArrayRef<const MethodDebugInfo>& method_infos) {
+  const InstructionSet isa = parent_builder->GetIsa();
+  std::vector<uint8_t> buffer;
+  buffer.reserve(KB);
+  VectorOutputStream out("Mini-debug-info ELF file", &buffer);
+  std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out));
+  builder->Start();
+  // Write .rodata and .text as NOBITS sections.
+  // This allows tools to detect virtual address relocation of the parent ELF file.
+  builder->SetVirtualAddress(parent_builder->GetRoData()->GetAddress());
+  builder->GetRoData()->WriteNoBitsSection(parent_builder->GetRoData()->GetSize());
+  builder->SetVirtualAddress(parent_builder->GetText()->GetAddress());
+  builder->GetText()->WriteNoBitsSection(parent_builder->GetText()->GetSize());
+  WriteDebugSymbols(builder.get(), method_infos, false /* with_signature */);
+  WriteCFISection(builder.get(), method_infos, DW_DEBUG_FRAME_FORMAT, false /* write_oat_paches */);
+  builder->End();
+  CHECK(builder->Good());
+  std::vector<uint8_t> compressed_buffer;
+  compressed_buffer.reserve(buffer.size() / 4);
+  XzCompress(&buffer, &compressed_buffer);
+  parent_builder->WriteSection(".gnu_debugdata", &compressed_buffer);
+}
+
 template <typename ElfTypes>
 static ArrayRef<const uint8_t> WriteDebugElfFileForMethodInternal(
     const dwarf::MethodDebugInfo& method_info) {
@@ -1427,6 +1591,12 @@
     ElfBuilder<ElfTypes64>* builder,
     const ArrayRef<const MethodDebugInfo>& method_infos,
     CFIFormat cfi_format);
+template void WriteMiniDebugInfo<ElfTypes32>(
+    ElfBuilder<ElfTypes32>* builder,
+    const ArrayRef<const MethodDebugInfo>& method_infos);
+template void WriteMiniDebugInfo<ElfTypes64>(
+    ElfBuilder<ElfTypes64>* builder,
+    const ArrayRef<const MethodDebugInfo>& method_infos);
 
 }  // namespace dwarf
 }  // namespace art
diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h
index e4bc856..e19da08 100644
--- a/compiler/elf_writer_debug.h
+++ b/compiler/elf_writer_debug.h
@@ -35,6 +35,10 @@
                     const ArrayRef<const MethodDebugInfo>& method_infos,
                     CFIFormat cfi_format);
 
+template <typename ElfTypes>
+void WriteMiniDebugInfo(ElfBuilder<ElfTypes>* builder,
+                        const ArrayRef<const MethodDebugInfo>& method_infos);
+
 ArrayRef<const uint8_t> WriteDebugElfFileForMethod(const dwarf::MethodDebugInfo& method_info);
 
 ArrayRef<const uint8_t> WriteDebugElfFileForClasses(const InstructionSet isa,
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 7b1bdd7..6bf080a 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -137,9 +137,7 @@
 void ElfWriterQuick<ElfTypes>::SetBssSize(size_t bss_size) {
   auto* bss = builder_->GetBss();
   if (bss_size != 0u) {
-    bss->Start();
-    bss->SetSize(bss_size);
-    bss->End();
+    bss->WriteNoBitsSection(bss_size);
   }
 }
 
@@ -152,8 +150,13 @@
 void ElfWriterQuick<ElfTypes>::WriteDebugInfo(
     const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
   if (compiler_options_->GetGenerateDebugInfo()) {
+    // Generate all the debug information we can.
     dwarf::WriteDebugInfo(builder_.get(), method_infos, kCFIFormat);
   }
+  if (compiler_options_->GetGenerateMiniDebugInfo()) {
+    // Generate only some information and compress it.
+    dwarf::WriteMiniDebugInfo(builder_.get(), method_infos);
+  }
 }
 
 template <typename ElfTypes>
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 6859605..a5a7796 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -95,25 +95,38 @@
 
       t.NewTiming("WriteElf");
       SafeMap<std::string, std::string> key_value_store;
-      OatWriter oat_writer(class_linker->GetBootClassPath(),
-                           0,
-                           0,
-                           0,
-                           compiler_driver_.get(),
-                           writer.get(),
-                           /*compiling_boot_image*/true,
-                           &timings,
-                           &key_value_store);
+      const std::vector<const DexFile*>& dex_files = class_linker->GetBootClassPath();
       std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
           compiler_driver_->GetInstructionSet(),
           &compiler_driver_->GetCompilerOptions(),
           oat_file.GetFile());
-      bool success = writer->PrepareImageAddressSpace();
-      ASSERT_TRUE(success);
-
       elf_writer->Start();
-
+      OatWriter oat_writer(/*compiling_boot_image*/true, &timings);
       OutputStream* rodata = elf_writer->StartRoData();
+      for (const DexFile* dex_file : dex_files) {
+        ArrayRef<const uint8_t> raw_dex_file(
+            reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()),
+            dex_file->GetHeader().file_size_);
+        oat_writer.AddRawDexFileSource(raw_dex_file,
+                                       dex_file->GetLocation().c_str(),
+                                       dex_file->GetLocationChecksum());
+      }
+      std::unique_ptr<MemMap> opened_dex_files_map;
+      std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+      bool dex_files_ok = oat_writer.WriteAndOpenDexFiles(
+          rodata,
+          oat_file.GetFile(),
+          compiler_driver_->GetInstructionSet(),
+          compiler_driver_->GetInstructionSetFeatures(),
+          &key_value_store,
+          /* verify */ false,           // Dex files may be dex-to-dex-ed, don't verify.
+          &opened_dex_files_map,
+          &opened_dex_files);
+      ASSERT_TRUE(dex_files_ok);
+      oat_writer.PrepareLayout(compiler_driver_.get(), writer.get(), dex_files);
+      bool image_space_ok = writer->PrepareImageAddressSpace();
+      ASSERT_TRUE(image_space_ok);
+
       bool rodata_ok = oat_writer.WriteRodata(rodata);
       ASSERT_TRUE(rodata_ok);
       elf_writer->EndRoData(rodata);
@@ -123,12 +136,15 @@
       ASSERT_TRUE(text_ok);
       elf_writer->EndText(text);
 
+      bool header_ok = oat_writer.WriteHeader(elf_writer->GetStream(), 0u, 0u, 0u);
+      ASSERT_TRUE(header_ok);
+
       elf_writer->SetBssSize(oat_writer.GetBssSize());
       elf_writer->WriteDynamicSection();
       elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo());
       elf_writer->WritePatchLocations(oat_writer.GetAbsolutePatchLocations());
 
-      success = elf_writer->End();
+      bool success = elf_writer->End();
 
       ASSERT_TRUE(success);
     }
@@ -140,7 +156,11 @@
   {
     std::vector<const char*> dup_oat_filename(1, dup_oat->GetPath().c_str());
     std::vector<const char*> dup_image_filename(1, image_file.GetFilename().c_str());
-    bool success_image = writer->Write(kInvalidImageFd, dup_image_filename, dup_oat_filename);
+    bool success_image = writer->Write(kInvalidFd,
+                                       dup_image_filename,
+                                       kInvalidFd,
+                                       dup_oat_filename,
+                                       dup_oat_filename[0]);
     ASSERT_TRUE(success_image);
     bool success_fixup = ElfWriter::Fixup(dup_oat.get(),
                                           writer->GetOatDataBegin(dup_oat_filename[0]));
@@ -162,7 +182,7 @@
     ASSERT_NE(0U, bitmap_section.Size());
 
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    ASSERT_TRUE(!heap->GetContinuousSpaces().empty());
+    ASSERT_TRUE(heap->HaveContinuousSpaces());
     gc::space::ContinuousSpace* space = heap->GetNonMovingSpace();
     ASSERT_FALSE(space->IsImageSpace());
     ASSERT_TRUE(space != nullptr);
@@ -277,11 +297,17 @@
                              oat_data_begin,
                              oat_data_end,
                              oat_file_end,
+                             /*boot_image_begin*/0U,
+                             /*boot_image_size*/0U,
+                             /*boot_oat_begin*/0U,
+                             /*boot_oat_size_*/0U,
                              sizeof(void*),
                              /*compile_pic*/false,
+                             /*is_pic*/false,
                              ImageHeader::kDefaultStorageMode,
                              /*data_size*/0u);
     ASSERT_TRUE(image_header.IsValid());
+    ASSERT_TRUE(!image_header.IsAppImage());
 
     char* magic = const_cast<char*>(image_header.GetMagic());
     strcpy(magic, "");  // bad magic
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index d0bb201..c8720ea 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -159,17 +159,45 @@
 
 bool ImageWriter::Write(int image_fd,
                         const std::vector<const char*>& image_filenames,
-                        const std::vector<const char*>& oat_filenames) {
+                        int oat_fd,
+                        const std::vector<const char*>& oat_filenames,
+                        const std::string& oat_location) {
+  // If image_fd or oat_fd are not kInvalidFd then we may have empty strings in image_filenames or
+  // oat_filenames.
   CHECK(!image_filenames.empty());
+  if (image_fd != kInvalidFd) {
+    CHECK_EQ(image_filenames.size(), 1u);
+  }
   CHECK(!oat_filenames.empty());
+  if (oat_fd != kInvalidFd) {
+    CHECK_EQ(oat_filenames.size(), 1u);
+  }
   CHECK_EQ(image_filenames.size(), oat_filenames.size());
 
   size_t oat_file_offset = 0;
 
   for (size_t i = 0; i < oat_filenames.size(); ++i) {
     const char* oat_filename = oat_filenames[i];
-    std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename));
-    if (oat_file.get() == nullptr) {
+    std::unique_ptr<File> oat_file;
+
+    if (oat_fd != -1) {
+      if (strlen(oat_filename) == 0u) {
+        oat_file.reset(new File(oat_fd, false));
+      } else {
+        oat_file.reset(new File(oat_fd, oat_filename, false));
+      }
+      int length = oat_file->GetLength();
+      if (length < 0) {
+        PLOG(ERROR) << "Oat file has negative length " << length;
+        return false;
+      } else {
+        // Leave the fd open since dex2oat still needs to write out the oat file with the fd.
+        oat_file->DisableAutoClose();
+      }
+    } else {
+      oat_file.reset(OS::OpenFileReadWrite(oat_filename));
+    }
+    if (oat_file == nullptr) {
       PLOG(ERROR) << "Failed to open oat file " << oat_filename;
       return false;
     }
@@ -181,7 +209,7 @@
       return false;
     }
     Runtime::Current()->GetOatFileManager().RegisterOatFile(
-      std::unique_ptr<const OatFile>(oat_file_));
+        std::unique_ptr<const OatFile>(oat_file_));
 
     const OatHeader& oat_header = oat_file_->GetOatHeader();
     ImageInfo& image_info = GetImageInfo(oat_filename);
@@ -220,8 +248,15 @@
 
     SetOatChecksumFromElfFile(oat_file.get());
 
-    if (oat_file->FlushCloseOrErase() != 0) {
-      LOG(ERROR) << "Failed to flush and close oat file " << oat_filename;
+    if (oat_fd != -1) {
+      // Leave fd open for caller.
+      if (oat_file->Flush() != 0) {
+        LOG(ERROR) << "Failed to flush oat file " << oat_filename << " for " << oat_location;
+        return false;
+      }
+    } else if (oat_file->FlushCloseOrErase() != 0) {
+      LOG(ERROR) << "Failed to flush and close oat file " << oat_filename
+                 << " for " << oat_location;
       return false;
     }
   }
@@ -238,16 +273,22 @@
     const char* oat_filename = oat_filenames[i];
     ImageInfo& image_info = GetImageInfo(oat_filename);
     std::unique_ptr<File> image_file;
-    if (image_fd != kInvalidImageFd) {
-      image_file.reset(new File(image_fd, image_filename, unix_file::kCheckSafeUsage));
+    if (image_fd != kInvalidFd) {
+      if (strlen(image_filename) == 0u) {
+        image_file.reset(new File(image_fd, unix_file::kCheckSafeUsage));
+      } else {
+        LOG(ERROR) << "image fd " << image_fd << " name " << image_filename;
+      }
     } else {
       image_file.reset(OS::CreateEmptyFile(image_filename));
     }
+
     if (image_file == nullptr) {
       LOG(ERROR) << "Failed to open image file " << image_filename;
       return false;
     }
-    if (fchmod(image_file->Fd(), 0644) != 0) {
+
+    if (!compile_app_image_ && fchmod(image_file->Fd(), 0644) != 0) {
       PLOG(ERROR) << "Failed to make image file world readable: " << image_filename;
       image_file->Erase();
       return EXIT_FAILURE;
@@ -701,6 +742,7 @@
     std::unordered_set<mirror::Class*>* visited) {
   DCHECK(early_exit != nullptr);
   DCHECK(visited != nullptr);
+  DCHECK(compile_app_image_);
   if (klass == nullptr) {
     return false;
   }
@@ -717,6 +759,13 @@
   visited->emplace(klass);
   bool result = IsBootClassLoaderNonImageClass(klass);
   bool my_early_exit = false;  // Only for ourselves, ignore caller.
+  // Remove classes that failed to verify since we don't want to have java.lang.VerifyError in the
+  // app image.
+  if (klass->GetStatus() == mirror::Class::kStatusError) {
+    result = true;
+  } else {
+    CHECK(klass->GetVerifyError() == nullptr) << PrettyClass(klass);
+  }
   if (!result) {
     // Check interfaces since these wont be visited through VisitReferences.)
     mirror::IfTable* if_table = klass->GetIfTable();
@@ -727,6 +776,12 @@
           visited);
     }
   }
+  if (klass->IsObjectArrayClass()) {
+    result = result || ContainsBootClassLoaderNonImageClassInternal(
+        klass->GetComponentType(),
+        &my_early_exit,
+        visited);
+  }
   // Check static fields and their classes.
   size_t num_static_fields = klass->NumReferenceStaticFields();
   if (num_static_fields != 0 && klass->IsResolved()) {
@@ -780,7 +835,9 @@
   if (compile_app_image_) {
     // For app images, we need to prune boot loader classes that are not in the boot image since
     // these may have already been loaded when the app image is loaded.
-    return !ContainsBootClassLoaderNonImageClass(klass);
+    // Keep classes in the boot image space since we don't want to re-resolve these.
+    return Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass) ||
+        !ContainsBootClassLoaderNonImageClass(klass);
   }
   std::string temp;
   return compiler_driver_.IsImageClass(klass->GetDescriptor(&temp));
@@ -843,25 +900,25 @@
     for (size_t i = 0, num = dex_cache->NumResolvedMethods(); i != num; ++i) {
       ArtMethod* method =
           mirror::DexCache::GetElementPtrSize(resolved_methods, i, target_ptr_size_);
-      if (method != nullptr) {
-        auto* declaring_class = method->GetDeclaringClass();
-        // Miranda methods may be held live by a class which was not an image class but have a
-        // declaring class which is an image class. Set it to the resolution method to be safe and
-        // prevent dangling pointers.
-        if (method->IsMiranda() || !KeepClass(declaring_class)) {
-          mirror::DexCache::SetElementPtrSize(resolved_methods,
-                                              i,
-                                              resolution_method,
-                                              target_ptr_size_);
-        } else {
-          // Check that the class is still in the classes table.
-          DCHECK(class_linker->ClassInClassTable(declaring_class)) << "Class "
-              << PrettyClass(declaring_class) << " not in class linker table";
-        }
+      DCHECK(method != nullptr) << "Expected resolution method instead of null method";
+      mirror::Class* declaring_class = method->GetDeclaringClass();
+      // Miranda methods may be held live by a class which was not an image class but have a
+      // declaring class which is an image class. Set it to the resolution method to be safe and
+      // prevent dangling pointers.
+      if (method->IsMiranda() || !KeepClass(declaring_class)) {
+        mirror::DexCache::SetElementPtrSize(resolved_methods,
+                                            i,
+                                            resolution_method,
+                                            target_ptr_size_);
+      } else {
+        // Check that the class is still in the classes table.
+        DCHECK(class_linker->ClassInClassTable(declaring_class)) << "Class "
+            << PrettyClass(declaring_class) << " not in class linker table";
       }
     }
+    ArtField** resolved_fields = dex_cache->GetResolvedFields();
     for (size_t i = 0; i < dex_cache->NumResolvedFields(); i++) {
-      ArtField* field = dex_cache->GetResolvedField(i, target_ptr_size_);
+      ArtField* field = mirror::DexCache::GetElementPtrSize(resolved_fields, i, target_ptr_size_);
       if (field != nullptr && !KeepClass(field->GetDeclaringClass())) {
         dex_cache->SetResolvedField(i, nullptr, target_ptr_size_);
       }
@@ -906,6 +963,32 @@
   }
 }
 
+mirror::String* ImageWriter::FindInternedString(mirror::String* string) {
+  Thread* const self = Thread::Current();
+  for (auto& pair : image_info_map_) {
+    const ImageInfo& image_info = pair.second;
+    mirror::String* const found = image_info.intern_table_->LookupStrong(self, string);
+    DCHECK(image_info.intern_table_->LookupWeak(self, string) == nullptr)
+        << string->ToModifiedUtf8();
+    if (found != nullptr) {
+      return found;
+    }
+  }
+  if (compile_app_image_) {
+    Runtime* const runtime = Runtime::Current();
+    mirror::String* found = runtime->GetInternTable()->LookupStrong(self, string);
+    // If we found it in the runtime intern table it could either be in the boot image or interned
+    // during app image compilation. If it was in the boot image return that, otherwise return null
+    // since it belongs to another image space.
+    if (found != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(found)) {
+      return found;
+    }
+    DCHECK(runtime->GetInternTable()->LookupWeak(self, string) == nullptr)
+        << string->ToModifiedUtf8();
+  }
+  return nullptr;
+}
+
 void ImageWriter::CalculateObjectBinSlots(Object* obj) {
   DCHECK(obj != nullptr);
   // if it is a string, we want to intern it if its not interned.
@@ -915,13 +998,16 @@
 
     // we must be an interned string that was forward referenced and already assigned
     if (IsImageBinSlotAssigned(obj)) {
-      DCHECK_EQ(obj, image_info.intern_table_->InternStrongImageString(obj->AsString()));
+      DCHECK_EQ(obj, FindInternedString(obj->AsString()));
       return;
     }
-    // InternImageString allows us to intern while holding the heap bitmap lock. This is safe since
-    // we are guaranteed to not have GC during image writing.
-    mirror::String* const interned = image_info.intern_table_->InternStrongImageString(
-        obj->AsString());
+    // Need to check if the string is already interned in another image info so that we don't have
+    // the intern tables of two different images contain the same string.
+    mirror::String* interned = FindInternedString(obj->AsString());
+    if (interned == nullptr) {
+      // Not in another image space, insert to our table.
+      interned = image_info.intern_table_->InternStrongImageString(obj->AsString());
+    }
     if (obj != interned) {
       if (!IsImageBinSlotAssigned(interned)) {
         // interned obj is after us, allocate its location early
@@ -1066,6 +1152,11 @@
       // Visit and assign offsets for fields and field arrays.
       auto* as_klass = h_obj->AsClass();
       mirror::DexCache* dex_cache = as_klass->GetDexCache();
+      DCHECK_NE(klass->GetStatus(), mirror::Class::kStatusError);
+      if (compile_app_image_) {
+        // Extra sanity, no boot loader classes should be left!
+        CHECK(!IsBootClassLoaderClass(as_klass)) << PrettyClass(as_klass);
+      }
       LengthPrefixedArray<ArtField>* fields[] = {
           as_klass->GetSFieldsPtr(), as_klass->GetIFieldsPtr(),
       };
@@ -1405,6 +1496,13 @@
               << " Oat data end=" << reinterpret_cast<uintptr_t>(oat_data_end)
               << " Oat file end=" << reinterpret_cast<uintptr_t>(oat_file_end);
   }
+  // Store boot image info for app image so that we can relocate.
+  uint32_t boot_image_begin = 0;
+  uint32_t boot_image_end = 0;
+  uint32_t boot_oat_begin = 0;
+  uint32_t boot_oat_end = 0;
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
+  heap->GetBootImagesSize(&boot_image_begin, &boot_image_end, &boot_oat_begin, &boot_oat_end);
 
   // Create the header, leave 0 for data size since we will fill this in as we are writing the
   // image.
@@ -1417,8 +1515,13 @@
                                                PointerToLowMemUInt32(image_info.oat_data_begin_),
                                                PointerToLowMemUInt32(oat_data_end),
                                                PointerToLowMemUInt32(oat_file_end),
+                                               boot_image_begin,
+                                               boot_image_end - boot_image_begin,
+                                               boot_oat_begin,
+                                               boot_oat_end - boot_oat_begin,
                                                target_ptr_size_,
                                                compile_pic_,
+                                               /*is_pic*/compile_app_image_,
                                                image_storage_mode_,
                                                /*data_size*/0u);
 }
@@ -1763,6 +1866,9 @@
   orig->FixupNativePointers(copy, target_ptr_size_, NativeLocationVisitor(this, oat_filename));
   FixupClassVisitor visitor(this, copy);
   static_cast<mirror::Object*>(orig)->VisitReferences(visitor, visitor);
+
+  // Remove the clinitThreadId. This is required for image determinism.
+  copy->SetClinitThreadId(static_cast<pid_t>(0));
 }
 
 void ImageWriter::FixupObject(Object* orig, Object* copy) {
@@ -1805,13 +1911,14 @@
       if (klass == class_linker->GetClassRoot(ClassLinker::kJavaLangDexCache)) {
         FixupDexCache(down_cast<mirror::DexCache*>(orig), down_cast<mirror::DexCache*>(copy));
       } else if (klass->IsClassLoaderClass()) {
+        mirror::ClassLoader* copy_loader = down_cast<mirror::ClassLoader*>(copy);
         // If src is a ClassLoader, set the class table to null so that it gets recreated by the
         // ClassLoader.
-        down_cast<mirror::ClassLoader*>(copy)->SetClassTable(nullptr);
+        copy_loader->SetClassTable(nullptr);
         // Also set allocator to null to be safe. The allocator is created when we create the class
         // table. We also never expect to unload things in the image since they are held live as
         // roots.
-        down_cast<mirror::ClassLoader*>(copy)->SetAllocator(nullptr);
+        copy_loader->SetAllocator(nullptr);
       }
     }
     FixupVisitor visitor(this, copy);
@@ -1889,6 +1996,10 @@
       mirror::DexCache::SetElementPtrSize(copy_fields, i, copy, target_ptr_size_);
     }
   }
+
+  // Remove the DexFile pointers. They will be fixed up when the runtime loads the oat file. Leaving
+  // compiler pointers in here will make the output non-deterministic.
+  copy_dex_cache->SetDexFile(nullptr);
 }
 
 const uint8_t* ImageWriter::GetOatAddress(OatAddress type) const {
@@ -1896,7 +2007,7 @@
   // If we are compiling an app image, we need to use the stubs of the boot image.
   if (compile_app_image_) {
     // Use the current image pointers.
-    std::vector<gc::space::ImageSpace*> image_spaces =
+    const std::vector<gc::space::ImageSpace*>& image_spaces =
         Runtime::Current()->GetHeap()->GetBootImageSpaces();
     DCHECK(!image_spaces.empty());
     const OatFile* oat_file = image_spaces[0]->GetOatFile();
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index ad69038..622eb19 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -49,7 +49,7 @@
 
 class ClassTable;
 
-static constexpr int kInvalidImageFd = -1;
+static constexpr int kInvalidFd = -1;
 
 // Write a Space built during compilation for use during execution.
 class ImageWriter FINAL {
@@ -103,11 +103,15 @@
 
   uint8_t* GetOatFileBegin(const char* oat_filename) const;
 
-  // If image_fd is not kInvalidImageFd, then we use that for the file. Otherwise we open
+  // If image_fd is not kInvalidFd, then we use that for the image file. Otherwise we open
   // the names in image_filenames.
+  // If oat_fd is not kInvalidFd, then we use that for the oat file. Otherwise we open
+  // the names in oat_filenames.
   bool Write(int image_fd,
              const std::vector<const char*>& image_filenames,
-             const std::vector<const char*>& oat_filenames)
+             int oat_fd,
+             const std::vector<const char*>& oat_filenames,
+             const std::string& oat_location)
       REQUIRES(!Locks::mutator_lock_);
 
   uintptr_t GetOatDataBegin(const char* oat_filename) {
@@ -447,6 +451,10 @@
   const ImageInfo& GetConstImageInfo(const char* oat_filename) const;
   const ImageInfo& GetImageInfo(size_t index) const;
 
+  // Find an already strong interned string in the other images or in the boot image. Used to
+  // remove duplicates in the multi image and app image case.
+  mirror::String* FindInternedString(mirror::String* string) SHARED_REQUIRES(Locks::mutator_lock_);
+
   const CompilerDriver& compiler_driver_;
 
   // Beginning target image address for the first image.
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 3a3275a..6774758 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -112,7 +112,8 @@
       /* init_failure_output */ nullptr,
       /* abort_on_hard_verifier_failure */ false,
       /* dump_cfg_file_name */ "",
-      /* dump_cfg_append */ false));
+      /* dump_cfg_append */ false,
+      /* force_determinism */ false));
   for (const std::string& argument : Runtime::Current()->GetCompilerOptions()) {
     compiler_options_->ParseCompilerOption(argument, Usage);
   }
@@ -178,7 +179,7 @@
 
   if (compiler_options_->GetGenerateDebugInfo()) {
 #ifdef __ANDROID__
-    const char* prefix = GetAndroidData();
+    const char* prefix = "/data/misc/trace";
 #else
     const char* prefix = "/tmp";
 #endif
@@ -187,7 +188,8 @@
     std::string perf_filename = std::string(prefix) + "/perf-" + std::to_string(getpid()) + ".map";
     perf_file_.reset(OS::CreateEmptyFileWriteOnly(perf_filename.c_str()));
     if (perf_file_ == nullptr) {
-      LOG(FATAL) << "Could not create perf file at " << perf_filename;
+      LOG(ERROR) << "Could not create perf file at " << perf_filename <<
+                    " Are you on a user build? Perf only works on userdebug/eng builds";
     }
   }
 }
@@ -222,7 +224,7 @@
     ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
     JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
     success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile);
-    if (success && compiler_options_->GetGenerateDebugInfo()) {
+    if (success && perf_file_ != nullptr) {
       const void* ptr = method_to_compile->GetEntryPointFromQuickCompiledCode();
       std::ostringstream stream;
       stream << std::hex
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 52a2382..e920460 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -95,7 +95,7 @@
 
   // Assembler that holds generated instructions
   std::unique_ptr<Assembler> jni_asm(Assembler::Create(instruction_set, instruction_set_features));
-  jni_asm->cfi().SetEnabled(driver->GetCompilerOptions().GetGenerateDebugInfo());
+  jni_asm->cfi().SetEnabled(driver->GetCompilerOptions().GenerateAnyDebugInfo());
 
   // Offsets into data structures
   // TODO: if cross compiling these offsets are for the host not the target
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 7a2b74e..cff2f47 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -38,6 +38,7 @@
 #include "oat_file-inl.h"
 #include "oat_writer.h"
 #include "scoped_thread_state_change.h"
+#include "utils/test_dex_file_builder.h"
 
 namespace art {
 
@@ -125,25 +126,81 @@
 
   bool WriteElf(File* file,
                 const std::vector<const DexFile*>& dex_files,
-                SafeMap<std::string, std::string>& key_value_store) {
+                SafeMap<std::string, std::string>& key_value_store,
+                bool verify) {
     TimingLogger timings("WriteElf", false, false);
-    OatWriter oat_writer(dex_files,
-                         42U,
-                         4096U,
-                         0,
-                         compiler_driver_.get(),
-                         nullptr,
-                         /*compiling_boot_image*/false,
-                         &timings,
-                         &key_value_store);
+    OatWriter oat_writer(/*compiling_boot_image*/false, &timings);
+    for (const DexFile* dex_file : dex_files) {
+      ArrayRef<const uint8_t> raw_dex_file(
+          reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()),
+          dex_file->GetHeader().file_size_);
+      if (!oat_writer.AddRawDexFileSource(raw_dex_file,
+                                          dex_file->GetLocation().c_str(),
+                                          dex_file->GetLocationChecksum())) {
+        return false;
+      }
+    }
+    return DoWriteElf(file, oat_writer, key_value_store, verify);
+  }
+
+  bool WriteElf(File* file,
+                const std::vector<const char*>& dex_filenames,
+                SafeMap<std::string, std::string>& key_value_store,
+                bool verify) {
+    TimingLogger timings("WriteElf", false, false);
+    OatWriter oat_writer(/*compiling_boot_image*/false, &timings);
+    for (const char* dex_filename : dex_filenames) {
+      if (!oat_writer.AddDexFileSource(dex_filename, dex_filename)) {
+        return false;
+      }
+    }
+    return DoWriteElf(file, oat_writer, key_value_store, verify);
+  }
+
+  bool WriteElf(File* file,
+                ScopedFd&& zip_fd,
+                const char* location,
+                SafeMap<std::string, std::string>& key_value_store,
+                bool verify) {
+    TimingLogger timings("WriteElf", false, false);
+    OatWriter oat_writer(/*compiling_boot_image*/false, &timings);
+    if (!oat_writer.AddZippedDexFilesSource(std::move(zip_fd), location)) {
+      return false;
+    }
+    return DoWriteElf(file, oat_writer, key_value_store, verify);
+  }
+
+  bool DoWriteElf(File* file,
+                  OatWriter& oat_writer,
+                  SafeMap<std::string, std::string>& key_value_store,
+                  bool verify) {
     std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
         compiler_driver_->GetInstructionSet(),
         &compiler_driver_->GetCompilerOptions(),
         file);
-
     elf_writer->Start();
-
     OutputStream* rodata = elf_writer->StartRoData();
+    std::unique_ptr<MemMap> opened_dex_files_map;
+    std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+    if (!oat_writer.WriteAndOpenDexFiles(rodata,
+                                         file,
+                                         compiler_driver_->GetInstructionSet(),
+                                         compiler_driver_->GetInstructionSetFeatures(),
+                                         &key_value_store,
+                                         verify,
+                                         &opened_dex_files_map,
+                                         &opened_dex_files)) {
+      return false;
+    }
+    Runtime* runtime = Runtime::Current();
+    ClassLinker* const class_linker = runtime->GetClassLinker();
+    std::vector<const DexFile*> dex_files;
+    for (const std::unique_ptr<const DexFile>& dex_file : opened_dex_files) {
+      dex_files.push_back(dex_file.get());
+      ScopedObjectAccess soa(Thread::Current());
+      class_linker->RegisterDexFile(*dex_file, runtime->GetLinearAlloc());
+    }
+    oat_writer.PrepareLayout(compiler_driver_.get(), nullptr, dex_files);
     if (!oat_writer.WriteRodata(rodata)) {
       return false;
     }
@@ -155,6 +212,10 @@
     }
     elf_writer->EndText(text);
 
+    if (!oat_writer.WriteHeader(elf_writer->GetStream(), 42U, 4096U, 0)) {
+      return false;
+    }
+
     elf_writer->SetBssSize(oat_writer.GetBssSize());
     elf_writer->WriteDynamicSection();
     elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo());
@@ -163,10 +224,124 @@
     return elf_writer->End();
   }
 
+  void TestDexFileInput(bool verify);
+  void TestZipFileInput(bool verify);
+
   std::unique_ptr<const InstructionSetFeatures> insn_features_;
   std::unique_ptr<QuickCompilerCallbacks> callbacks_;
 };
 
+class ZipBuilder {
+ public:
+  explicit ZipBuilder(File* zip_file) : zip_file_(zip_file) { }
+
+  bool AddFile(const char* location, const void* data, size_t size) {
+    off_t offset = lseek(zip_file_->Fd(), 0, SEEK_CUR);
+    if (offset == static_cast<off_t>(-1)) {
+      return false;
+    }
+
+    ZipFileHeader file_header;
+    file_header.crc32 = crc32(0u, reinterpret_cast<const Bytef*>(data), size);
+    file_header.compressed_size = size;
+    file_header.uncompressed_size = size;
+    file_header.filename_length = strlen(location);
+
+    if (!zip_file_->WriteFully(&file_header, sizeof(file_header)) ||
+        !zip_file_->WriteFully(location, file_header.filename_length) ||
+        !zip_file_->WriteFully(data, size)) {
+      return false;
+    }
+
+    CentralDirectoryFileHeader cdfh;
+    cdfh.crc32 = file_header.crc32;
+    cdfh.compressed_size = size;
+    cdfh.uncompressed_size = size;
+    cdfh.filename_length = file_header.filename_length;
+    cdfh.relative_offset_of_local_file_header = offset;
+    file_data_.push_back(FileData { cdfh, location });
+    return true;
+  }
+
+  bool Finish() {
+    off_t offset = lseek(zip_file_->Fd(), 0, SEEK_CUR);
+    if (offset == static_cast<off_t>(-1)) {
+      return false;
+    }
+
+    size_t central_directory_size = 0u;
+    for (const FileData& file_data : file_data_) {
+      if (!zip_file_->WriteFully(&file_data.cdfh, sizeof(file_data.cdfh)) ||
+          !zip_file_->WriteFully(file_data.location, file_data.cdfh.filename_length)) {
+        return false;
+      }
+      central_directory_size += sizeof(file_data.cdfh) + file_data.cdfh.filename_length;
+    }
+    EndOfCentralDirectoryRecord eocd_record;
+    eocd_record.number_of_central_directory_records_on_this_disk = file_data_.size();
+    eocd_record.total_number_of_central_directory_records = file_data_.size();
+    eocd_record.size_of_central_directory = central_directory_size;
+    eocd_record.offset_of_start_of_central_directory = offset;
+    return
+        zip_file_->WriteFully(&eocd_record, sizeof(eocd_record)) &&
+        zip_file_->Flush() == 0;
+  }
+
+ private:
+  struct PACKED(1) ZipFileHeader {
+    uint32_t signature = 0x04034b50;
+    uint16_t version_needed_to_extract = 10;
+    uint16_t general_purpose_bit_flag = 0;
+    uint16_t compression_method = 0;            // 0 = store only.
+    uint16_t file_last_modification_time = 0u;
+    uint16_t file_last_modification_date = 0u;
+    uint32_t crc32;
+    uint32_t compressed_size;
+    uint32_t uncompressed_size;
+    uint16_t filename_length;
+    uint16_t extra_field_length = 0u;           // No extra fields.
+  };
+
+  struct PACKED(1) CentralDirectoryFileHeader {
+    uint32_t signature = 0x02014b50;
+    uint16_t version_made_by = 10;
+    uint16_t version_needed_to_extract = 10;
+    uint16_t general_purpose_bit_flag = 0;
+    uint16_t compression_method = 0;            // 0 = store only.
+    uint16_t file_last_modification_time = 0u;
+    uint16_t file_last_modification_date = 0u;
+    uint32_t crc32;
+    uint32_t compressed_size;
+    uint32_t uncompressed_size;
+    uint16_t filename_length;
+    uint16_t extra_field_length = 0u;           // No extra fields.
+    uint16_t file_comment_length = 0u;          // No file comment.
+    uint16_t disk_number_where_file_starts = 0u;
+    uint16_t internal_file_attributes = 0u;
+    uint32_t external_file_attributes = 0u;
+    uint32_t relative_offset_of_local_file_header;
+  };
+
+  struct PACKED(1) EndOfCentralDirectoryRecord {
+    uint32_t signature = 0x06054b50;
+    uint16_t number_of_this_disk = 0u;
+    uint16_t disk_where_central_directory_starts = 0u;
+    uint16_t number_of_central_directory_records_on_this_disk;
+    uint16_t total_number_of_central_directory_records;
+    uint32_t size_of_central_directory;
+    uint32_t offset_of_start_of_central_directory;
+    uint16_t comment_length = 0u;               // No file comment.
+  };
+
+  struct FileData {
+    CentralDirectoryFileHeader cdfh;
+    const char* location;
+  };
+
+  File* zip_file_;
+  std::vector<FileData> file_data_;
+};
+
 TEST_F(OatTest, WriteRead) {
   TimingLogger timings("OatTest::WriteRead", false, false);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
@@ -187,7 +362,7 @@
   ScratchFile tmp;
   SafeMap<std::string, std::string> key_value_store;
   key_value_store.Put(OatHeader::kImageLocationKey, "lue.art");
-  bool success = WriteElf(tmp.GetFile(), class_linker->GetBootClassPath(), key_value_store);
+  bool success = WriteElf(tmp.GetFile(), class_linker->GetBootClassPath(), key_value_store, false);
   ASSERT_TRUE(success);
 
   if (kCompile) {  // OatWriter strips the code, regenerate to compare
@@ -313,7 +488,7 @@
   ScratchFile tmp;
   SafeMap<std::string, std::string> key_value_store;
   key_value_store.Put(OatHeader::kImageLocationKey, "test.art");
-  bool success = WriteElf(tmp.GetFile(), dex_files, key_value_store);
+  bool success = WriteElf(tmp.GetFile(), dex_files, key_value_store, false);
   ASSERT_TRUE(success);
 
   std::unique_ptr<OatFile> oat_file(OatFile::Open(tmp.GetFilename(),
@@ -327,4 +502,242 @@
   EXPECT_LT(static_cast<size_t>(oat_file->Size()), static_cast<size_t>(tmp.GetFile()->GetLength()));
 }
 
+static void MaybeModifyDexFileToFail(bool verify, std::unique_ptr<const DexFile>& data) {
+  // If in verify mode (= fail the verifier mode), make sure we fail early. We'll fail already
+  // because of the missing map, but that may lead to out of bounds reads.
+  if (verify) {
+    const_cast<DexFile::Header*>(&data->GetHeader())->checksum_++;
+  }
+}
+
+void OatTest::TestDexFileInput(bool verify) {
+  TimingLogger timings("OatTest::DexFileInput", false, false);
+
+  std::vector<const char*> input_filenames;
+
+  ScratchFile dex_file1;
+  TestDexFileBuilder builder1;
+  builder1.AddField("Lsome.TestClass;", "int", "someField");
+  builder1.AddMethod("Lsome.TestClass;", "()I", "foo");
+  std::unique_ptr<const DexFile> dex_file1_data = builder1.Build(dex_file1.GetFilename());
+
+  MaybeModifyDexFileToFail(verify, dex_file1_data);
+
+  bool success = dex_file1.GetFile()->WriteFully(&dex_file1_data->GetHeader(),
+                                                 dex_file1_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+  success = dex_file1.GetFile()->Flush() == 0;
+  ASSERT_TRUE(success);
+  input_filenames.push_back(dex_file1.GetFilename().c_str());
+
+  ScratchFile dex_file2;
+  TestDexFileBuilder builder2;
+  builder2.AddField("Land.AnotherTestClass;", "boolean", "someOtherField");
+  builder2.AddMethod("Land.AnotherTestClass;", "()J", "bar");
+  std::unique_ptr<const DexFile> dex_file2_data = builder2.Build(dex_file2.GetFilename());
+
+  MaybeModifyDexFileToFail(verify, dex_file2_data);
+
+  success = dex_file2.GetFile()->WriteFully(&dex_file2_data->GetHeader(),
+                                            dex_file2_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+  success = dex_file2.GetFile()->Flush() == 0;
+  ASSERT_TRUE(success);
+  input_filenames.push_back(dex_file2.GetFilename().c_str());
+
+  ScratchFile oat_file;
+  SafeMap<std::string, std::string> key_value_store;
+  key_value_store.Put(OatHeader::kImageLocationKey, "test.art");
+  success = WriteElf(oat_file.GetFile(), input_filenames, key_value_store, verify);
+
+  // In verify mode, we expect failure.
+  if (verify) {
+    ASSERT_FALSE(success);
+    return;
+  }
+
+  ASSERT_TRUE(success);
+
+  std::string error_msg;
+  std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(),
+                                                         oat_file.GetFilename(),
+                                                         nullptr,
+                                                         nullptr,
+                                                         false,
+                                                         nullptr,
+                                                         &error_msg));
+  ASSERT_TRUE(opened_oat_file != nullptr);
+  ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size());
+  std::unique_ptr<const DexFile> opened_dex_file1 =
+      opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg);
+  std::unique_ptr<const DexFile> opened_dex_file2 =
+      opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg);
+
+  ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_);
+  ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(),
+                      &opened_dex_file1->GetHeader(),
+                      dex_file1_data->GetHeader().file_size_));
+  ASSERT_EQ(dex_file1_data->GetLocation(), opened_dex_file1->GetLocation());
+
+  ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_);
+  ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(),
+                      &opened_dex_file2->GetHeader(),
+                      dex_file2_data->GetHeader().file_size_));
+  ASSERT_EQ(dex_file2_data->GetLocation(), opened_dex_file2->GetLocation());
+}
+
+TEST_F(OatTest, DexFileInputCheckOutput) {
+  TestDexFileInput(false);
+}
+
+TEST_F(OatTest, DexFileInputCheckVerifier) {
+  TestDexFileInput(true);
+}
+
+void OatTest::TestZipFileInput(bool verify) {
+  TimingLogger timings("OatTest::DexFileInput", false, false);
+
+  ScratchFile zip_file;
+  ZipBuilder zip_builder(zip_file.GetFile());
+
+  ScratchFile dex_file1;
+  TestDexFileBuilder builder1;
+  builder1.AddField("Lsome.TestClass;", "long", "someField");
+  builder1.AddMethod("Lsome.TestClass;", "()D", "foo");
+  std::unique_ptr<const DexFile> dex_file1_data = builder1.Build(dex_file1.GetFilename());
+
+  MaybeModifyDexFileToFail(verify, dex_file1_data);
+
+  bool success = dex_file1.GetFile()->WriteFully(&dex_file1_data->GetHeader(),
+                                                 dex_file1_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+  success = dex_file1.GetFile()->Flush() == 0;
+  ASSERT_TRUE(success);
+  success = zip_builder.AddFile("classes.dex",
+                                &dex_file1_data->GetHeader(),
+                                dex_file1_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+
+  ScratchFile dex_file2;
+  TestDexFileBuilder builder2;
+  builder2.AddField("Land.AnotherTestClass;", "boolean", "someOtherField");
+  builder2.AddMethod("Land.AnotherTestClass;", "()J", "bar");
+  std::unique_ptr<const DexFile> dex_file2_data = builder2.Build(dex_file2.GetFilename());
+
+  MaybeModifyDexFileToFail(verify, dex_file2_data);
+
+  success = dex_file2.GetFile()->WriteFully(&dex_file2_data->GetHeader(),
+                                            dex_file2_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+  success = dex_file2.GetFile()->Flush() == 0;
+  ASSERT_TRUE(success);
+  success = zip_builder.AddFile("classes2.dex",
+                                &dex_file2_data->GetHeader(),
+                                dex_file2_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+
+  success = zip_builder.Finish();
+  ASSERT_TRUE(success) << strerror(errno);
+
+  SafeMap<std::string, std::string> key_value_store;
+  key_value_store.Put(OatHeader::kImageLocationKey, "test.art");
+  {
+    // Test using the AddDexFileSource() interface with the zip file.
+    std::vector<const char*> input_filenames { zip_file.GetFilename().c_str() };  // NOLINT [readability/braces] [4]
+
+    ScratchFile oat_file;
+    success = WriteElf(oat_file.GetFile(), input_filenames, key_value_store, verify);
+
+    if (verify) {
+      ASSERT_FALSE(success);
+    } else {
+      ASSERT_TRUE(success);
+
+      std::string error_msg;
+      std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(),
+                                                             oat_file.GetFilename(),
+                                                             nullptr,
+                                                             nullptr,
+                                                             false,
+                                                             nullptr,
+                                                             &error_msg));
+      ASSERT_TRUE(opened_oat_file != nullptr);
+      ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size());
+      std::unique_ptr<const DexFile> opened_dex_file1 =
+          opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg);
+      std::unique_ptr<const DexFile> opened_dex_file2 =
+          opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg);
+
+      ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_);
+      ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(),
+                          &opened_dex_file1->GetHeader(),
+                          dex_file1_data->GetHeader().file_size_));
+      ASSERT_EQ(DexFile::GetMultiDexLocation(0, zip_file.GetFilename().c_str()),
+                opened_dex_file1->GetLocation());
+
+      ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_);
+      ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(),
+                          &opened_dex_file2->GetHeader(),
+                          dex_file2_data->GetHeader().file_size_));
+      ASSERT_EQ(DexFile::GetMultiDexLocation(1, zip_file.GetFilename().c_str()),
+                opened_dex_file2->GetLocation());
+    }
+  }
+
+  {
+    // Test using the AddZipDexFileSource() interface with the zip file handle.
+    ScopedFd zip_fd(dup(zip_file.GetFd()));
+    ASSERT_NE(-1, zip_fd.get());
+
+    ScratchFile oat_file;
+    success = WriteElf(oat_file.GetFile(),
+                       std::move(zip_fd),
+                       zip_file.GetFilename().c_str(),
+                       key_value_store,
+                       verify);
+    if (verify) {
+      ASSERT_FALSE(success);
+    } else {
+      ASSERT_TRUE(success);
+
+      std::string error_msg;
+      std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(),
+                                                             oat_file.GetFilename(),
+                                                             nullptr,
+                                                             nullptr,
+                                                             false,
+                                                             nullptr,
+                                                             &error_msg));
+      ASSERT_TRUE(opened_oat_file != nullptr);
+      ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size());
+      std::unique_ptr<const DexFile> opened_dex_file1 =
+          opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg);
+      std::unique_ptr<const DexFile> opened_dex_file2 =
+          opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg);
+
+      ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_);
+      ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(),
+                          &opened_dex_file1->GetHeader(),
+                          dex_file1_data->GetHeader().file_size_));
+      ASSERT_EQ(DexFile::GetMultiDexLocation(0, zip_file.GetFilename().c_str()),
+                opened_dex_file1->GetLocation());
+
+      ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_);
+      ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(),
+                          &opened_dex_file2->GetHeader(),
+                          dex_file2_data->GetHeader().file_size_));
+      ASSERT_EQ(DexFile::GetMultiDexLocation(1, zip_file.GetFilename().c_str()),
+                opened_dex_file2->GetLocation());
+    }
+  }
+}
+
+TEST_F(OatTest, ZipFileInputCheckOutput) {
+  TestZipFileInput(false);
+}
+
+TEST_F(OatTest, ZipFileInputCheckVerifier) {
+  TestZipFileInput(true);
+}
+
 }  // namespace art
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 025e35e..90ac499 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -16,12 +16,14 @@
 
 #include "oat_writer.h"
 
+#include <unistd.h>
 #include <zlib.h>
 
 #include "arch/arm64/instruction_set_features_arm64.h"
 #include "art_method-inl.h"
 #include "base/allocator.h"
 #include "base/bit_vector.h"
+#include "base/file_magic.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
@@ -49,9 +51,77 @@
 #include "type_lookup_table.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 #include "verifier/method_verifier.h"
+#include "zip_archive.h"
 
 namespace art {
 
+namespace {  // anonymous namespace
+
+typedef DexFile::Header __attribute__((aligned(1))) UnalignedDexFileHeader;
+
+const UnalignedDexFileHeader* AsUnalignedDexFileHeader(const uint8_t* raw_data) {
+    return reinterpret_cast<const UnalignedDexFileHeader*>(raw_data);
+}
+
+}  // anonymous namespace
+
+// Defines the location of the raw dex file to write.
+class OatWriter::DexFileSource {
+ public:
+  explicit DexFileSource(ZipEntry* zip_entry)
+      : type_(kZipEntry), source_(zip_entry) {
+    DCHECK(source_ != nullptr);
+  }
+
+  explicit DexFileSource(File* raw_file)
+      : type_(kRawFile), source_(raw_file) {
+    DCHECK(source_ != nullptr);
+  }
+
+  explicit DexFileSource(const uint8_t* dex_file)
+      : type_(kRawData), source_(dex_file) {
+    DCHECK(source_ != nullptr);
+  }
+
+  bool IsZipEntry() const { return type_ == kZipEntry; }
+  bool IsRawFile() const { return type_ == kRawFile; }
+  bool IsRawData() const { return type_ == kRawData; }
+
+  ZipEntry* GetZipEntry() const {
+    DCHECK(IsZipEntry());
+    DCHECK(source_ != nullptr);
+    return static_cast<ZipEntry*>(const_cast<void*>(source_));
+  }
+
+  File* GetRawFile() const {
+    DCHECK(IsRawFile());
+    DCHECK(source_ != nullptr);
+    return static_cast<File*>(const_cast<void*>(source_));
+  }
+
+  const uint8_t* GetRawData() const {
+    DCHECK(IsRawData());
+    DCHECK(source_ != nullptr);
+    return static_cast<const uint8_t*>(source_);
+  }
+
+  void Clear() {
+    type_ = kNone;
+    source_ = nullptr;
+  }
+
+ private:
+  enum Type {
+    kNone,
+    kZipEntry,
+    kRawFile,
+    kRawData,
+  };
+
+  Type type_;
+  const void* source_;
+};
+
 class OatWriter::OatClass {
  public:
   OatClass(size_t offset,
@@ -116,11 +186,30 @@
 
 class OatWriter::OatDexFile {
  public:
-  OatDexFile(size_t offset, const DexFile& dex_file);
+  OatDexFile(const char* dex_file_location,
+             DexFileSource source,
+             CreateTypeLookupTable create_type_lookup_table);
   OatDexFile(OatDexFile&& src) = default;
 
+  const char* GetLocation() const {
+    return dex_file_location_data_;
+  }
+
+  void ReserveTypeLookupTable(OatWriter* oat_writer);
+  void ReserveClassOffsets(OatWriter* oat_writer);
+
   size_t SizeOf() const;
-  bool Write(OatWriter* oat_writer, OutputStream* out, const size_t file_offset) const;
+  bool Write(OatWriter* oat_writer, OutputStream* out) const;
+  bool WriteClassOffsets(OatWriter* oat_writer, OutputStream* out);
+
+  // The source of the dex file.
+  DexFileSource source_;
+
+  // Whether to create the type lookup table.
+  CreateTypeLookupTable create_type_lookup_table_;
+
+  // Dex file size. Initialized when writing the dex file.
+  size_t dex_file_size_;
 
   // Offset of start of OatDexFile from beginning of OatHeader. It is
   // used to validate file position when writing.
@@ -128,11 +217,13 @@
 
   // Data to write.
   uint32_t dex_file_location_size_;
-  const uint8_t* dex_file_location_data_;
+  const char* dex_file_location_data_;
   uint32_t dex_file_location_checksum_;
   uint32_t dex_file_offset_;
+  uint32_t class_offsets_offset_;
   uint32_t lookup_table_offset_;
-  TypeLookupTable* lookup_table_;  // Owned by the dex file.
+
+  // Data to write to a separate section.
   dchecked_vector<uint32_t> class_offsets_;
 
  private:
@@ -151,26 +242,20 @@
   DCHECK_EQ(static_cast<off_t>(file_offset + offset_), out->Seek(0, kSeekCurrent)) \
     << "file_offset=" << file_offset << " offset_=" << offset_
 
-OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
-                     uint32_t image_file_location_oat_checksum,
-                     uintptr_t image_file_location_oat_begin,
-                     int32_t image_patch_delta,
-                     const CompilerDriver* compiler,
-                     ImageWriter* image_writer,
-                     bool compiling_boot_image,
-                     TimingLogger* timings,
-                     SafeMap<std::string, std::string>* key_value_store)
-  : compiler_driver_(compiler),
-    image_writer_(image_writer),
+OatWriter::OatWriter(bool compiling_boot_image, TimingLogger* timings)
+  : write_state_(WriteState::kAddingDexFileSources),
+    timings_(timings),
+    raw_dex_files_(),
+    zip_archives_(),
+    zipped_dex_files_(),
+    zipped_dex_file_locations_(),
+    compiler_driver_(nullptr),
+    image_writer_(nullptr),
     compiling_boot_image_(compiling_boot_image),
-    dex_files_(&dex_files),
+    dex_files_(nullptr),
     size_(0u),
     bss_size_(0u),
     oat_data_offset_(0u),
-    image_file_location_oat_checksum_(image_file_location_oat_checksum),
-    image_file_location_oat_begin_(image_file_location_oat_begin),
-    image_patch_delta_(image_patch_delta),
-    key_value_store_(key_value_store),
     oat_header_(nullptr),
     size_dex_file_alignment_(0),
     size_executable_offset_alignment_(0),
@@ -197,55 +282,193 @@
     size_oat_dex_file_location_data_(0),
     size_oat_dex_file_location_checksum_(0),
     size_oat_dex_file_offset_(0),
+    size_oat_dex_file_class_offsets_offset_(0),
     size_oat_dex_file_lookup_table_offset_(0),
-    size_oat_dex_file_class_offsets_(0),
     size_oat_lookup_table_alignment_(0),
     size_oat_lookup_table_(0),
+    size_oat_class_offsets_alignment_(0),
+    size_oat_class_offsets_(0),
     size_oat_class_type_(0),
     size_oat_class_status_(0),
     size_oat_class_method_bitmaps_(0),
     size_oat_class_method_offsets_(0),
     method_offset_map_() {
-  CHECK(key_value_store != nullptr);
-  if (compiling_boot_image) {
-    CHECK(image_writer != nullptr);
+}
+
+bool OatWriter::AddDexFileSource(const char* filename,
+                                 const char* location,
+                                 CreateTypeLookupTable create_type_lookup_table) {
+  DCHECK(write_state_ == WriteState::kAddingDexFileSources);
+  uint32_t magic;
+  std::string error_msg;
+  ScopedFd fd(OpenAndReadMagic(filename, &magic, &error_msg));
+  if (fd.get() == -1) {
+    PLOG(ERROR) << "Failed to read magic number from dex file: '" << filename << "'";
+    return false;
+  } else if (IsDexMagic(magic)) {
+    // The file is open for reading, not writing, so it's OK to let the File destructor
+    // close it without checking for explicit Close(), so pass checkUsage = false.
+    raw_dex_files_.emplace_back(new File(fd.release(), location, /* checkUsage */ false));
+    oat_dex_files_.emplace_back(location,
+                                DexFileSource(raw_dex_files_.back().get()),
+                                create_type_lookup_table);
+  } else if (IsZipMagic(magic)) {
+    if (!AddZippedDexFilesSource(std::move(fd), location, create_type_lookup_table)) {
+      return false;
+    }
+  } else {
+    LOG(ERROR) << "Expected valid zip or dex file: '" << filename << "'";
+    return false;
+  }
+  return true;
+}
+
+// Add dex file source(s) from a zip file specified by a file handle.
+bool OatWriter::AddZippedDexFilesSource(ScopedFd&& zip_fd,
+                                        const char* location,
+                                        CreateTypeLookupTable create_type_lookup_table) {
+  DCHECK(write_state_ == WriteState::kAddingDexFileSources);
+  std::string error_msg;
+  zip_archives_.emplace_back(ZipArchive::OpenFromFd(zip_fd.release(), location, &error_msg));
+  ZipArchive* zip_archive = zip_archives_.back().get();
+  if (zip_archive == nullptr) {
+    LOG(ERROR) << "Failed to open zip from file descriptor for '" << location << "': "
+        << error_msg;
+    return false;
+  }
+  for (size_t i = 0; ; ++i) {
+    std::string entry_name = DexFile::GetMultiDexClassesDexName(i);
+    std::unique_ptr<ZipEntry> entry(zip_archive->Find(entry_name.c_str(), &error_msg));
+    if (entry == nullptr) {
+      break;
+    }
+    zipped_dex_files_.push_back(std::move(entry));
+    zipped_dex_file_locations_.push_back(DexFile::GetMultiDexLocation(i, location));
+    const char* full_location = zipped_dex_file_locations_.back().c_str();
+    oat_dex_files_.emplace_back(full_location,
+                                DexFileSource(zipped_dex_files_.back().get()),
+                                create_type_lookup_table);
+  }
+  if (zipped_dex_file_locations_.empty()) {
+    LOG(ERROR) << "No dex files in zip file '" << location << "': " << error_msg;
+    return false;
+  }
+  return true;
+}
+
+// Add dex file source from raw memory.
+bool OatWriter::AddRawDexFileSource(const ArrayRef<const uint8_t>& data,
+                                    const char* location,
+                                    uint32_t location_checksum,
+                                    CreateTypeLookupTable create_type_lookup_table) {
+  DCHECK(write_state_ == WriteState::kAddingDexFileSources);
+  if (data.size() < sizeof(DexFile::Header)) {
+    LOG(ERROR) << "Provided data is shorter than dex file header. size: "
+               << data.size() << " File: " << location;
+    return false;
+  }
+  if (!ValidateDexFileHeader(data.data(), location)) {
+    return false;
+  }
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(data.data());
+  if (data.size() < header->file_size_) {
+    LOG(ERROR) << "Truncated dex file data. Data size: " << data.size()
+               << " file size from header: " << header->file_size_ << " File: " << location;
+    return false;
+  }
+
+  oat_dex_files_.emplace_back(location, DexFileSource(data.data()), create_type_lookup_table);
+  oat_dex_files_.back().dex_file_location_checksum_ = location_checksum;
+  return true;
+}
+
+dchecked_vector<const char*> OatWriter::GetSourceLocations() const {
+  dchecked_vector<const char*> locations;
+  locations.reserve(oat_dex_files_.size());
+  for (const OatDexFile& oat_dex_file : oat_dex_files_) {
+    locations.push_back(oat_dex_file.GetLocation());
+  }
+  return locations;
+}
+
+bool OatWriter::WriteAndOpenDexFiles(
+    OutputStream* rodata,
+    File* file,
+    InstructionSet instruction_set,
+    const InstructionSetFeatures* instruction_set_features,
+    SafeMap<std::string, std::string>* key_value_store,
+    bool verify,
+    /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+    /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
+  CHECK(write_state_ == WriteState::kAddingDexFileSources);
+
+  size_t offset = InitOatHeader(instruction_set,
+                                instruction_set_features,
+                                dchecked_integral_cast<uint32_t>(oat_dex_files_.size()),
+                                key_value_store);
+  offset = InitOatDexFiles(offset);
+  size_ = offset;
+
+  std::unique_ptr<MemMap> dex_files_map;
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  if (!WriteDexFiles(rodata, file)) {
+    return false;
+  }
+  // Reserve space for type lookup tables and update type_lookup_table_offset_.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    oat_dex_file.ReserveTypeLookupTable(this);
+  }
+  size_t size_after_type_lookup_tables = size_;
+  // Reserve space for class offsets and update class_offsets_offset_.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    oat_dex_file.ReserveClassOffsets(this);
+  }
+  if (!WriteOatDexFiles(rodata) ||
+      !ExtendForTypeLookupTables(rodata, file, size_after_type_lookup_tables) ||
+      !OpenDexFiles(file, verify, &dex_files_map, &dex_files) ||
+      !WriteTypeLookupTables(dex_files_map.get(), dex_files)) {
+    return false;
+  }
+
+  *opened_dex_files_map = std::move(dex_files_map);
+  *opened_dex_files = std::move(dex_files);
+  write_state_ = WriteState::kPrepareLayout;
+  return true;
+}
+
+void OatWriter::PrepareLayout(const CompilerDriver* compiler,
+                              ImageWriter* image_writer,
+                              const std::vector<const DexFile*>& dex_files) {
+  CHECK(write_state_ == WriteState::kPrepareLayout);
+
+  dex_files_ = &dex_files;
+
+  compiler_driver_ = compiler;
+  image_writer_ = image_writer;
+  if (compiling_boot_image_) {
+    CHECK(image_writer_ != nullptr);
   }
   InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
+  CHECK_EQ(instruction_set, oat_header_->GetInstructionSet());
   const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures();
   relative_patcher_ = linker::RelativePatcher::Create(instruction_set, features,
                                                       &method_offset_map_);
 
-  size_t offset;
+  uint32_t offset = size_;
   {
-    TimingLogger::ScopedTiming split("InitOatHeader", timings);
-    offset = InitOatHeader();
-  }
-  {
-    TimingLogger::ScopedTiming split("InitOatDexFiles", timings);
-    offset = InitOatDexFiles(offset);
-  }
-  {
-    TimingLogger::ScopedTiming split("InitDexFiles", timings);
-    offset = InitDexFiles(offset);
-  }
-  {
-    TimingLogger::ScopedTiming split("InitLookupTables", timings);
-    offset = InitLookupTables(offset);
-  }
-  {
-    TimingLogger::ScopedTiming split("InitOatClasses", timings);
+    TimingLogger::ScopedTiming split("InitOatClasses", timings_);
     offset = InitOatClasses(offset);
   }
   {
-    TimingLogger::ScopedTiming split("InitOatMaps", timings);
+    TimingLogger::ScopedTiming split("InitOatMaps", timings_);
     offset = InitOatMaps(offset);
   }
   {
-    TimingLogger::ScopedTiming split("InitOatCode", timings);
+    TimingLogger::ScopedTiming split("InitOatCode", timings_);
     offset = InitOatCode(offset);
   }
   {
-    TimingLogger::ScopedTiming split("InitOatCodeDexFiles", timings);
+    TimingLogger::ScopedTiming split("InitOatCodeDexFiles", timings_);
     offset = InitOatCodeDexFiles(offset);
   }
   size_ = offset;
@@ -255,7 +478,7 @@
     size_t bss_start = RoundUp(size_, kPageSize);
     size_t pointer_size = GetInstructionSetPointerSize(instruction_set);
     bss_size_ = 0u;
-    for (const DexFile* dex_file : dex_files) {
+    for (const DexFile* dex_file : *dex_files_) {
       dex_cache_arrays_offsets_.Put(dex_file, bss_start + bss_size_);
       DexCacheArraysLayout layout(pointer_size, dex_file);
       bss_size_ += layout.Size();
@@ -265,9 +488,10 @@
   CHECK_EQ(dex_files_->size(), oat_dex_files_.size());
   if (compiling_boot_image_) {
     CHECK_EQ(image_writer_ != nullptr,
-             key_value_store_->find(OatHeader::kImageLocationKey) == key_value_store_->end());
+             oat_header_->GetStoreValueByKey(OatHeader::kImageLocationKey) == nullptr);
   }
-  CHECK_ALIGNED(image_patch_delta_, kPageSize);
+
+  write_state_ = WriteState::kWriteRoData;
 }
 
 OatWriter::~OatWriter() {
@@ -502,8 +726,18 @@
 
       // Deduplicate code arrays if we are not producing debuggable code.
       bool deduped = false;
+      MethodReference method_ref(dex_file_, it.GetMemberIndex());
+      auto method_lb = writer_->method_offset_map_.map.lower_bound(method_ref);
       if (debuggable_) {
-        quick_code_offset = NewQuickCodeOffset(compiled_method, it, thumb_offset);
+        if (method_lb != writer_->method_offset_map_.map.end() &&
+            !writer_->method_offset_map_.map.key_comp()(method_ref, method_lb->first)) {
+          // Duplicate methods, we want the same code for both of them so that the oat writer puts
+          // the same code in both ArtMethods so that we do not get different oat code at runtime.
+          quick_code_offset = method_lb->second;
+          deduped = true;
+        } else {
+          quick_code_offset = NewQuickCodeOffset(compiled_method, it, thumb_offset);
+        }
       } else {
         auto lb = dedupe_map_.lower_bound(compiled_method);
         if (lb != dedupe_map_.end() && !dedupe_map_.key_comp()(compiled_method, lb->first)) {
@@ -516,14 +750,12 @@
       }
 
       if (code_size != 0) {
-        MethodReference method_ref(dex_file_, it.GetMemberIndex());
-        auto method_lb = writer_->method_offset_map_.map.lower_bound(method_ref);
         if (method_lb != writer_->method_offset_map_.map.end() &&
             !writer_->method_offset_map_.map.key_comp()(method_ref, method_lb->first)) {
           // TODO: Should this be a hard failure?
           LOG(WARNING) << "Multiple definitions of "
               << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file)
-              << ((method_lb->second != quick_code_offset) ? "; OFFSET MISMATCH" : "");
+              << " offsets " << method_lb->second << " " << quick_code_offset;
         } else {
           writer_->method_offset_map_.map.PutBefore(method_lb, method_ref, quick_code_offset);
         }
@@ -575,7 +807,7 @@
         }
       }
 
-      if (writer_->compiler_driver_->GetCompilerOptions().GetGenerateDebugInfo()) {
+      if (writer_->compiler_driver_->GetCompilerOptions().GenerateAnyDebugInfo()) {
         // Record debug information for this function if we are doing that.
         const uint32_t quick_code_start = quick_code_offset -
             writer_->oat_header_->GetExecutableOffset() - thumb_offset;
@@ -735,30 +967,40 @@
     }
 
     ClassLinker* linker = Runtime::Current()->GetClassLinker();
-    InvokeType invoke_type = it.GetMethodInvokeType(dex_file_->GetClassDef(class_def_index_));
     // Unchecked as we hold mutator_lock_ on entry.
     ScopedObjectAccessUnchecked soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->FindDexCache(
         Thread::Current(), *dex_file_)));
-    ArtMethod* method = linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
-        *dex_file_,
-        it.GetMemberIndex(),
-        dex_cache,
-        ScopedNullHandle<mirror::ClassLoader>(),
-        nullptr,
-        invoke_type);
-    if (method == nullptr) {
-      LOG(INTERNAL_FATAL) << "Unexpected failure to resolve a method: "
-                          << PrettyMethod(it.GetMemberIndex(), *dex_file_, true);
-      soa.Self()->AssertPendingException();
-      mirror::Throwable* exc = soa.Self()->GetException();
-      std::string dump = exc->Dump();
-      LOG(FATAL) << dump;
-      UNREACHABLE();
+    ArtMethod* method;
+    if (writer_->HasBootImage()) {
+      const InvokeType invoke_type = it.GetMethodInvokeType(
+          dex_file_->GetClassDef(class_def_index_));
+      method = linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
+          *dex_file_,
+          it.GetMemberIndex(),
+          dex_cache,
+          ScopedNullHandle<mirror::ClassLoader>(),
+          nullptr,
+          invoke_type);
+      if (method == nullptr) {
+        LOG(INTERNAL_FATAL) << "Unexpected failure to resolve a method: "
+            << PrettyMethod(it.GetMemberIndex(), *dex_file_, true);
+        soa.Self()->AssertPendingException();
+        mirror::Throwable* exc = soa.Self()->GetException();
+        std::string dump = exc->Dump();
+        LOG(FATAL) << dump;
+        UNREACHABLE();
+      }
+    } else {
+      // Should already have been resolved by the compiler, just peek into the dex cache.
+      // It may not be resolved if the class failed to verify, in this case, don't set the
+      // entrypoint. This is not fatal since the dex cache will contain a resolution method.
+      method = dex_cache->GetResolvedMethod(it.GetMemberIndex(), linker->GetImagePointerSize());
     }
-
-    if (compiled_method != nullptr && compiled_method->GetQuickCode().size() != 0) {
+    if (method != nullptr &&
+        compiled_method != nullptr &&
+        compiled_method->GetQuickCode().size() != 0) {
       method->SetEntryPointFromQuickCompiledCodePtrSize(
           reinterpret_cast<void*>(offsets.code_offset_), pointer_size_);
     }
@@ -1134,59 +1376,26 @@
   return true;
 }
 
-size_t OatWriter::InitOatHeader() {
-  oat_header_.reset(OatHeader::Create(compiler_driver_->GetInstructionSet(),
-                                      compiler_driver_->GetInstructionSetFeatures(),
-                                      dchecked_integral_cast<uint32_t>(dex_files_->size()),
-                                      key_value_store_));
-  oat_header_->SetImageFileLocationOatChecksum(image_file_location_oat_checksum_);
-  oat_header_->SetImageFileLocationOatDataBegin(image_file_location_oat_begin_);
-
+size_t OatWriter::InitOatHeader(InstructionSet instruction_set,
+                                const InstructionSetFeatures* instruction_set_features,
+                                uint32_t num_dex_files,
+                                SafeMap<std::string, std::string>* key_value_store) {
+  TimingLogger::ScopedTiming split("InitOatHeader", timings_);
+  oat_header_.reset(OatHeader::Create(instruction_set,
+                                      instruction_set_features,
+                                      num_dex_files,
+                                      key_value_store));
+  size_oat_header_ += sizeof(OatHeader);
+  size_oat_header_key_value_store_ += oat_header_->GetHeaderSize() - sizeof(OatHeader);
   return oat_header_->GetHeaderSize();
 }
 
 size_t OatWriter::InitOatDexFiles(size_t offset) {
-  // create the OatDexFiles
-  for (size_t i = 0; i != dex_files_->size(); ++i) {
-    const DexFile* dex_file = (*dex_files_)[i];
-    CHECK(dex_file != nullptr);
-    oat_dex_files_.emplace_back(offset, *dex_file);
-    offset += oat_dex_files_.back().SizeOf();
-  }
-  return offset;
-}
-
-size_t OatWriter::InitDexFiles(size_t offset) {
-  // calculate the offsets within OatDexFiles to the DexFiles
-  for (size_t i = 0; i != dex_files_->size(); ++i) {
-    // dex files are required to be 4 byte aligned
-    size_t original_offset = offset;
-    offset = RoundUp(offset, 4);
-    size_dex_file_alignment_ += offset - original_offset;
-
-    // set offset in OatDexFile to DexFile
-    oat_dex_files_[i].dex_file_offset_ = offset;
-
-    const DexFile* dex_file = (*dex_files_)[i];
-
-    // Initialize type lookup table
-    oat_dex_files_[i].lookup_table_ = dex_file->GetTypeLookupTable();
-
-    offset += dex_file->GetHeader().file_size_;
-  }
-  return offset;
-}
-
-size_t OatWriter::InitLookupTables(size_t offset) {
+  TimingLogger::ScopedTiming split("InitOatDexFiles", timings_);
+  // Initialize offsets of dex files.
   for (OatDexFile& oat_dex_file : oat_dex_files_) {
-    if (oat_dex_file.lookup_table_ != nullptr) {
-      uint32_t aligned_offset = RoundUp(offset, 4);
-      oat_dex_file.lookup_table_offset_ = aligned_offset;
-      size_oat_lookup_table_alignment_ += aligned_offset - offset;
-      offset = aligned_offset + oat_dex_file.lookup_table_->RawDataLength();
-    } else {
-      oat_dex_file.lookup_table_offset_ = 0;
-    }
+    oat_dex_file.offset_ = offset;
+    offset += oat_dex_file.SizeOf();
   }
   return offset;
 }
@@ -1239,7 +1448,6 @@
   oat_header_->SetExecutableOffset(offset);
   size_executable_offset_alignment_ = offset - old_offset;
   if (compiler_driver_->IsBootImage()) {
-    CHECK_EQ(image_patch_delta_, 0);
     InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
 
     #define DO_TRAMPOLINE(field, fn_name) \
@@ -1264,7 +1472,6 @@
     oat_header_->SetQuickImtConflictTrampolineOffset(0);
     oat_header_->SetQuickResolutionTrampolineOffset(0);
     oat_header_->SetQuickToInterpreterBridgeOffset(0);
-    oat_header_->SetImagePatchDelta(image_patch_delta_);
   }
   return offset;
 }
@@ -1279,7 +1486,7 @@
     } while (false)
 
   VISIT(InitCodeMethodVisitor);
-  if (compiler_driver_->IsBootImage()) {
+  if (HasImage()) {
     VISIT(InitImageMethodVisitor);
   }
 
@@ -1289,22 +1496,15 @@
 }
 
 bool OatWriter::WriteRodata(OutputStream* out) {
-  if (!GetOatDataOffset(out)) {
+  CHECK(write_state_ == WriteState::kWriteRoData);
+
+  if (!WriteClassOffsets(out)) {
+    LOG(ERROR) << "Failed to write class offsets to " << out->GetLocation();
     return false;
   }
-  const size_t file_offset = oat_data_offset_;
 
-  // Reserve space for header. It will be written last - after updating the checksum.
-  size_t header_size = oat_header_->GetHeaderSize();
-  if (out->Seek(header_size, kSeekCurrent) == static_cast<off_t>(-1)) {
-    PLOG(ERROR) << "Failed to reserve space for oat header in " << out->GetLocation();
-    return false;
-  }
-  size_oat_header_ += sizeof(OatHeader);
-  size_oat_header_key_value_store_ += oat_header_->GetHeaderSize() - sizeof(OatHeader);
-
-  if (!WriteTables(out, file_offset)) {
-    LOG(ERROR) << "Failed to write oat tables to " << out->GetLocation();
+  if (!WriteClasses(out)) {
+    LOG(ERROR) << "Failed to write classes to " << out->GetLocation();
     return false;
   }
 
@@ -1313,6 +1513,7 @@
     LOG(ERROR) << "Failed to seek to oat code position in " << out->GetLocation();
     return false;
   }
+  size_t file_offset = oat_data_offset_;
   size_t relative_offset = static_cast<size_t>(tables_end_offset) - file_offset;
   relative_offset = WriteMaps(out, file_offset, relative_offset);
   if (relative_offset == 0) {
@@ -1332,11 +1533,13 @@
   }
   DCHECK_OFFSET();
 
+  write_state_ = WriteState::kWriteText;
   return true;
 }
 
 bool OatWriter::WriteCode(OutputStream* out) {
-  size_t header_size = oat_header_->GetHeaderSize();
+  CHECK(write_state_ == WriteState::kWriteText);
+
   const size_t file_offset = oat_data_offset_;
   size_t relative_offset = oat_header_->GetExecutableOffset();
   DCHECK_OFFSET();
@@ -1390,10 +1593,12 @@
     DO_STAT(size_oat_dex_file_location_data_);
     DO_STAT(size_oat_dex_file_location_checksum_);
     DO_STAT(size_oat_dex_file_offset_);
+    DO_STAT(size_oat_dex_file_class_offsets_offset_);
     DO_STAT(size_oat_dex_file_lookup_table_offset_);
-    DO_STAT(size_oat_dex_file_class_offsets_);
     DO_STAT(size_oat_lookup_table_alignment_);
     DO_STAT(size_oat_lookup_table_);
+    DO_STAT(size_oat_class_offsets_alignment_);
+    DO_STAT(size_oat_class_offsets_);
     DO_STAT(size_oat_class_type_);
     DO_STAT(size_oat_class_status_);
     DO_STAT(size_oat_class_method_bitmaps_);
@@ -1408,88 +1613,90 @@
   CHECK_EQ(file_offset + size_, static_cast<size_t>(oat_end_file_offset));
   CHECK_EQ(size_, relative_offset);
 
-  // Finalize the header checksum.
+  write_state_ = WriteState::kWriteHeader;
+  return true;
+}
+
+bool OatWriter::WriteHeader(OutputStream* out,
+                            uint32_t image_file_location_oat_checksum,
+                            uintptr_t image_file_location_oat_begin,
+                            int32_t image_patch_delta) {
+  CHECK(write_state_ == WriteState::kWriteHeader);
+
+  oat_header_->SetImageFileLocationOatChecksum(image_file_location_oat_checksum);
+  oat_header_->SetImageFileLocationOatDataBegin(image_file_location_oat_begin);
+  if (compiler_driver_->IsBootImage()) {
+    CHECK_EQ(image_patch_delta, 0);
+    CHECK_EQ(oat_header_->GetImagePatchDelta(), 0);
+  } else {
+    CHECK_ALIGNED(image_patch_delta, kPageSize);
+    oat_header_->SetImagePatchDelta(image_patch_delta);
+  }
   oat_header_->UpdateChecksumWithHeaderData();
 
-  // Write the header now that the checksum is final.
+  const size_t file_offset = oat_data_offset_;
+
+  off_t current_offset = out->Seek(0, kSeekCurrent);
+  if (current_offset == static_cast<off_t>(-1)) {
+    PLOG(ERROR) << "Failed to get current offset from " << out->GetLocation();
+    return false;
+  }
   if (out->Seek(file_offset, kSeekSet) == static_cast<off_t>(-1)) {
     PLOG(ERROR) << "Failed to seek to oat header position in " << out->GetLocation();
     return false;
   }
   DCHECK_EQ(file_offset, static_cast<size_t>(out->Seek(0, kSeekCurrent)));
+
+  // Flush all other data before writing the header.
+  if (!out->Flush()) {
+    PLOG(ERROR) << "Failed to flush before writing oat header to " << out->GetLocation();
+    return false;
+  }
+  // Write the header.
+  size_t header_size = oat_header_->GetHeaderSize();
   if (!out->WriteFully(oat_header_.get(), header_size)) {
     PLOG(ERROR) << "Failed to write oat header to " << out->GetLocation();
     return false;
   }
-  if (out->Seek(oat_end_file_offset, kSeekSet) == static_cast<off_t>(-1)) {
-    PLOG(ERROR) << "Failed to seek to end after writing oat header to " << out->GetLocation();
+  // Flush the header data.
+  if (!out->Flush()) {
+    PLOG(ERROR) << "Failed to flush after writing oat header to " << out->GetLocation();
     return false;
   }
-  DCHECK_EQ(oat_end_file_offset, out->Seek(0, kSeekCurrent));
 
+  if (out->Seek(current_offset, kSeekSet) == static_cast<off_t>(-1)) {
+    PLOG(ERROR) << "Failed to seek back after writing oat header to " << out->GetLocation();
+    return false;
+  }
+  DCHECK_EQ(current_offset, out->Seek(0, kSeekCurrent));
+
+  write_state_ = WriteState::kDone;
   return true;
 }
 
-bool OatWriter::WriteTables(OutputStream* out, const size_t file_offset) {
-  for (size_t i = 0; i != oat_dex_files_.size(); ++i) {
-    if (!oat_dex_files_[i].Write(this, out, file_offset)) {
-      PLOG(ERROR) << "Failed to write oat dex information to " << out->GetLocation();
-      return false;
-    }
-  }
-  for (size_t i = 0; i != oat_dex_files_.size(); ++i) {
-    uint32_t expected_offset = file_offset + oat_dex_files_[i].dex_file_offset_;
-    off_t actual_offset = out->Seek(expected_offset, kSeekSet);
-    if (static_cast<uint32_t>(actual_offset) != expected_offset) {
-      const DexFile* dex_file = (*dex_files_)[i];
-      PLOG(ERROR) << "Failed to seek to dex file section. Actual: " << actual_offset
-                  << " Expected: " << expected_offset << " File: " << dex_file->GetLocation();
-      return false;
-    }
-    const DexFile* dex_file = (*dex_files_)[i];
-    if (!out->WriteFully(&dex_file->GetHeader(), dex_file->GetHeader().file_size_)) {
-      PLOG(ERROR) << "Failed to write dex file " << dex_file->GetLocation()
-                  << " to " << out->GetLocation();
-      return false;
-    }
-    size_dex_file_ += dex_file->GetHeader().file_size_;
-  }
-  if (!WriteLookupTables(out, file_offset)) {
-    return false;
-  }
-  for (size_t i = 0; i != oat_classes_.size(); ++i) {
-    if (!oat_classes_[i].Write(this, out, file_offset)) {
-      PLOG(ERROR) << "Failed to write oat methods information to " << out->GetLocation();
-      return false;
-    }
-  }
-  return true;
-}
-
-bool OatWriter::WriteLookupTables(OutputStream* out, const size_t file_offset) {
-  for (size_t i = 0; i < oat_dex_files_.size(); ++i) {
-    const uint32_t lookup_table_offset = oat_dex_files_[i].lookup_table_offset_;
-    const TypeLookupTable* table = oat_dex_files_[i].lookup_table_;
-    DCHECK_EQ(lookup_table_offset == 0, table == nullptr);
-    if (lookup_table_offset == 0) {
-      continue;
-    }
-    const uint32_t expected_offset = file_offset + lookup_table_offset;
-    off_t actual_offset = out->Seek(expected_offset, kSeekSet);
-    if (static_cast<uint32_t>(actual_offset) != expected_offset) {
-      const DexFile* dex_file = (*dex_files_)[i];
-      PLOG(ERROR) << "Failed to seek to lookup table section. Actual: " << actual_offset
-                  << " Expected: " << expected_offset << " File: " << dex_file->GetLocation();
-      return false;
-    }
-    if (table != nullptr) {
-      if (!WriteData(out, table->RawData(), table->RawDataLength())) {
-        const DexFile* dex_file = (*dex_files_)[i];
-        PLOG(ERROR) << "Failed to write lookup table for " << dex_file->GetLocation()
-                    << " to " << out->GetLocation();
+bool OatWriter::WriteClassOffsets(OutputStream* out) {
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    if (oat_dex_file.class_offsets_offset_ != 0u) {
+      uint32_t expected_offset = oat_data_offset_ + oat_dex_file.class_offsets_offset_;
+      off_t actual_offset = out->Seek(expected_offset, kSeekSet);
+      if (static_cast<uint32_t>(actual_offset) != expected_offset) {
+        PLOG(ERROR) << "Failed to seek to oat class offsets section. Actual: " << actual_offset
+                    << " Expected: " << expected_offset << " File: " << oat_dex_file.GetLocation();
         return false;
       }
-      size_oat_lookup_table_ += table->RawDataLength();
+      if (!oat_dex_file.WriteClassOffsets(this, out)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+bool OatWriter::WriteClasses(OutputStream* out) {
+  for (OatClass& oat_class : oat_classes_) {
+    if (!oat_class.Write(this, out, oat_data_offset_)) {
+      PLOG(ERROR) << "Failed to write oat methods information to " << out->GetLocation();
+      return false;
     }
   }
   return true;
@@ -1585,6 +1792,458 @@
   return true;
 }
 
+bool OatWriter::ReadDexFileHeader(File* file, OatDexFile* oat_dex_file) {
+  // Read the dex file header and perform minimal verification.
+  uint8_t raw_header[sizeof(DexFile::Header)];
+  if (!file->ReadFully(&raw_header, sizeof(DexFile::Header))) {
+    PLOG(ERROR) << "Failed to read dex file header. Actual: "
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (!ValidateDexFileHeader(raw_header, oat_dex_file->GetLocation())) {
+    return false;
+  }
+
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_header);
+  oat_dex_file->dex_file_size_ = header->file_size_;
+  oat_dex_file->dex_file_location_checksum_ = header->checksum_;
+  oat_dex_file->class_offsets_.resize(header->class_defs_size_);
+  return true;
+}
+
+bool OatWriter::ValidateDexFileHeader(const uint8_t* raw_header, const char* location) {
+  if (!DexFile::IsMagicValid(raw_header)) {
+    LOG(ERROR) << "Invalid magic number in dex file header. " << " File: " << location;
+    return false;
+  }
+  if (!DexFile::IsVersionValid(raw_header)) {
+    LOG(ERROR) << "Invalid version number in dex file header. " << " File: " << location;
+    return false;
+  }
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_header);
+  if (header->file_size_ < sizeof(DexFile::Header)) {
+    LOG(ERROR) << "Dex file header specifies file size insufficient to contain the header."
+               << " File: " << location;
+    return false;
+  }
+  return true;
+}
+
+bool OatWriter::WriteDexFiles(OutputStream* rodata, File* file) {
+  TimingLogger::ScopedTiming split("WriteDexFiles", timings_);
+
+  // Get the elf file offset of the oat file.
+  if (!GetOatDataOffset(rodata)) {
+    return false;
+  }
+
+  // Write dex files.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    if (!WriteDexFile(rodata, file, &oat_dex_file)) {
+      return false;
+    }
+  }
+
+  // Close sources.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    oat_dex_file.source_.Clear();  // Get rid of the reference, it's about to be invalidated.
+  }
+  zipped_dex_files_.clear();
+  zip_archives_.clear();
+  raw_dex_files_.clear();
+  return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file) {
+  if (!SeekToDexFile(rodata, file, oat_dex_file)) {
+    return false;
+  }
+  if (oat_dex_file->source_.IsZipEntry()) {
+    if (!WriteDexFile(rodata, file, oat_dex_file, oat_dex_file->source_.GetZipEntry())) {
+      return false;
+    }
+  } else if (oat_dex_file->source_.IsRawFile()) {
+    if (!WriteDexFile(rodata, file, oat_dex_file, oat_dex_file->source_.GetRawFile())) {
+      return false;
+    }
+  } else {
+    DCHECK(oat_dex_file->source_.IsRawData());
+    if (!WriteDexFile(rodata, oat_dex_file, oat_dex_file->source_.GetRawData())) {
+      return false;
+    }
+  }
+
+  // Update current size and account for the written data.
+  DCHECK_EQ(size_, oat_dex_file->dex_file_offset_);
+  size_ += oat_dex_file->dex_file_size_;
+  size_dex_file_ += oat_dex_file->dex_file_size_;
+  return true;
+}
+
+bool OatWriter::SeekToDexFile(OutputStream* out, File* file, OatDexFile* oat_dex_file) {
+  // Dex files are required to be 4 byte aligned.
+  size_t original_offset = size_;
+  size_t offset = RoundUp(original_offset, 4);
+  size_dex_file_alignment_ += offset - original_offset;
+
+  // Seek to the start of the dex file and flush any pending operations in the stream.
+  // Verify that, after flushing the stream, the file is at the same offset as the stream.
+  uint32_t start_offset = oat_data_offset_ + offset;
+  off_t actual_offset = out->Seek(start_offset, kSeekSet);
+  if (actual_offset != static_cast<off_t>(start_offset)) {
+    PLOG(ERROR) << "Failed to seek to dex file section. Actual: " << actual_offset
+                << " Expected: " << start_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (!out->Flush()) {
+    PLOG(ERROR) << "Failed to flush before writing dex file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  actual_offset = lseek(file->Fd(), 0, SEEK_CUR);
+  if (actual_offset != static_cast<off_t>(start_offset)) {
+    PLOG(ERROR) << "Stream/file position mismatch! Actual: " << actual_offset
+                << " Expected: " << start_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+
+  size_ = offset;
+  oat_dex_file->dex_file_offset_ = offset;
+  return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata,
+                             File* file,
+                             OatDexFile* oat_dex_file,
+                             ZipEntry* dex_file) {
+  size_t start_offset = oat_data_offset_ + size_;
+  DCHECK_EQ(static_cast<off_t>(start_offset), rodata->Seek(0, kSeekCurrent));
+
+  // Extract the dex file and get the extracted size.
+  std::string error_msg;
+  if (!dex_file->ExtractToFile(*file, &error_msg)) {
+    LOG(ERROR) << "Failed to extract dex file from ZIP entry: " << error_msg
+               << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (file->Flush() != 0) {
+    PLOG(ERROR) << "Failed to flush dex file from ZIP entry."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  off_t extracted_end = lseek(file->Fd(), 0, SEEK_CUR);
+  if (extracted_end == static_cast<off_t>(-1)) {
+    PLOG(ERROR) << "Failed get end offset after writing dex file from ZIP entry."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (extracted_end < static_cast<off_t>(start_offset)) {
+    LOG(ERROR) << "Dex file end position is before start position! End: " << extracted_end
+               << " Start: " << start_offset
+               << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  uint64_t extracted_size = static_cast<uint64_t>(extracted_end - start_offset);
+  if (extracted_size < sizeof(DexFile::Header)) {
+    LOG(ERROR) << "Extracted dex file is shorter than dex file header. size: "
+               << extracted_size << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+
+  // Read the dex file header and extract required data to OatDexFile.
+  off_t actual_offset = lseek(file->Fd(), start_offset, SEEK_SET);
+  if (actual_offset != static_cast<off_t>(start_offset)) {
+    PLOG(ERROR) << "Failed to seek back to dex file header. Actual: " << actual_offset
+                << " Expected: " << start_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (!ReadDexFileHeader(file, oat_dex_file)) {
+    return false;
+  }
+  if (extracted_size < oat_dex_file->dex_file_size_) {
+    LOG(ERROR) << "Extracted truncated dex file. Extracted size: " << extracted_size
+               << " file size from header: " << oat_dex_file->dex_file_size_
+               << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+
+  // Override the checksum from header with the CRC from ZIP entry.
+  oat_dex_file->dex_file_location_checksum_ = dex_file->GetCrc32();
+
+  // Seek both file and stream to the end offset.
+  size_t end_offset = start_offset + oat_dex_file->dex_file_size_;
+  actual_offset = lseek(file->Fd(), end_offset, SEEK_SET);
+  if (actual_offset != static_cast<off_t>(end_offset)) {
+    PLOG(ERROR) << "Failed to seek to end of dex file. Actual: " << actual_offset
+                << " Expected: " << end_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  actual_offset = rodata->Seek(end_offset, kSeekSet);
+  if (actual_offset != static_cast<off_t>(end_offset)) {
+    PLOG(ERROR) << "Failed to seek stream to end of dex file. Actual: " << actual_offset
+                << " Expected: " << end_offset << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+  if (!rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after seeking over dex file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+
+  // If we extracted more than the size specified in the header, truncate the file.
+  if (extracted_size > oat_dex_file->dex_file_size_) {
+    if (file->SetLength(end_offset) != 0) {
+      PLOG(ERROR) << "Failed to truncate excessive dex file length."
+                  << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata,
+                             File* file,
+                             OatDexFile* oat_dex_file,
+                             File* dex_file) {
+  size_t start_offset = oat_data_offset_ + size_;
+  DCHECK_EQ(static_cast<off_t>(start_offset), rodata->Seek(0, kSeekCurrent));
+
+  off_t input_offset = lseek(dex_file->Fd(), 0, SEEK_SET);
+  if (input_offset != static_cast<off_t>(0)) {
+    PLOG(ERROR) << "Failed to seek to dex file header. Actual: " << input_offset
+                << " Expected: 0"
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (!ReadDexFileHeader(dex_file, oat_dex_file)) {
+    return false;
+  }
+
+  // Copy the input dex file using sendfile().
+  if (!file->Copy(dex_file, 0, oat_dex_file->dex_file_size_)) {
+    PLOG(ERROR) << "Failed to copy dex file to oat file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (file->Flush() != 0) {
+    PLOG(ERROR) << "Failed to flush dex file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+
+  // Check file position and seek the stream to the end offset.
+  size_t end_offset = start_offset + oat_dex_file->dex_file_size_;
+  off_t actual_offset = lseek(file->Fd(), 0, SEEK_CUR);
+  if (actual_offset != static_cast<off_t>(end_offset)) {
+    PLOG(ERROR) << "Unexpected file position after copying dex file. Actual: " << actual_offset
+                << " Expected: " << end_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  actual_offset = rodata->Seek(end_offset, kSeekSet);
+  if (actual_offset != static_cast<off_t>(end_offset)) {
+    PLOG(ERROR) << "Failed to seek stream to end of dex file. Actual: " << actual_offset
+                << " Expected: " << end_offset << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+  if (!rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after seeking over dex file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+
+  return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata,
+                             OatDexFile* oat_dex_file,
+                             const uint8_t* dex_file) {
+  // Note: The raw data has already been checked to contain the header
+  // and all the data that the header specifies as the file size.
+  DCHECK(dex_file != nullptr);
+  DCHECK(ValidateDexFileHeader(dex_file, oat_dex_file->GetLocation()));
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(dex_file);
+
+  if (!rodata->WriteFully(dex_file, header->file_size_)) {
+    PLOG(ERROR) << "Failed to write dex file " << oat_dex_file->GetLocation()
+                << " to " << rodata->GetLocation();
+    return false;
+  }
+  if (!rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after writing dex file."
+                << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+
+  // Update dex file size and resize class offsets in the OatDexFile.
+  // Note: For raw data, the checksum is passed directly to AddRawDexFileSource().
+  oat_dex_file->dex_file_size_ = header->file_size_;
+  oat_dex_file->class_offsets_.resize(header->class_defs_size_);
+  return true;
+}
+
+bool OatWriter::WriteOatDexFiles(OutputStream* rodata) {
+  TimingLogger::ScopedTiming split("WriteOatDexFiles", timings_);
+
+  // Seek to the start of OatDexFiles, i.e. to the end of the OatHeader.  If there are
+  // no OatDexFiles, no data is actually written to .rodata before WriteHeader() and
+  // this Seek() ensures that we reserve the space for OatHeader in .rodata.
+  DCHECK(oat_dex_files_.empty() || oat_dex_files_[0u].offset_ == oat_header_->GetHeaderSize());
+  uint32_t expected_offset = oat_data_offset_ + oat_header_->GetHeaderSize();
+  off_t actual_offset = rodata->Seek(expected_offset, kSeekSet);
+  if (static_cast<uint32_t>(actual_offset) != expected_offset) {
+    PLOG(ERROR) << "Failed to seek to OatDexFile table section. Actual: " << actual_offset
+                << " Expected: " << expected_offset << " File: " << rodata->GetLocation();
+    return false;
+  }
+
+  for (size_t i = 0, size = oat_dex_files_.size(); i != size; ++i) {
+    OatDexFile* oat_dex_file = &oat_dex_files_[i];
+
+    DCHECK_EQ(oat_data_offset_ + oat_dex_file->offset_,
+              static_cast<size_t>(rodata->Seek(0, kSeekCurrent)));
+
+    // Write OatDexFile.
+    if (!oat_dex_file->Write(this, rodata)) {
+      PLOG(ERROR) << "Failed to write oat dex information to " << rodata->GetLocation();
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool OatWriter::ExtendForTypeLookupTables(OutputStream* rodata, File* file, size_t offset) {
+  TimingLogger::ScopedTiming split("ExtendForTypeLookupTables", timings_);
+
+  int64_t new_length = oat_data_offset_ + dchecked_integral_cast<int64_t>(offset);
+  if (file->SetLength(new_length) != 0) {
+    PLOG(ERROR) << "Failed to extend file for type lookup tables. new_length: " << new_length
+        << "File: " << file->GetPath();
+    return false;
+  }
+  off_t actual_offset = rodata->Seek(new_length, kSeekSet);
+  if (actual_offset != static_cast<off_t>(new_length)) {
+    PLOG(ERROR) << "Failed to seek stream after extending file for type lookup tables."
+                << " Actual: " << actual_offset << " Expected: " << new_length
+                << " File: " << rodata->GetLocation();
+    return false;
+  }
+  if (!rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after extending for type lookup tables."
+                << " File: " << rodata->GetLocation();
+    return false;
+  }
+  return true;
+}
+
+bool OatWriter::OpenDexFiles(
+    File* file,
+    bool verify,
+    /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+    /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
+  TimingLogger::ScopedTiming split("OpenDexFiles", timings_);
+
+  if (oat_dex_files_.empty()) {
+    // Nothing to do.
+    return true;
+  }
+
+  size_t map_offset = oat_dex_files_[0].dex_file_offset_;
+  size_t length = size_ - map_offset;
+  std::string error_msg;
+  std::unique_ptr<MemMap> dex_files_map(MemMap::MapFile(length,
+                                                        PROT_READ | PROT_WRITE,
+                                                        MAP_SHARED,
+                                                        file->Fd(),
+                                                        oat_data_offset_ + map_offset,
+                                                        /* low_4gb */ false,
+                                                        file->GetPath().c_str(),
+                                                        &error_msg));
+  if (dex_files_map == nullptr) {
+    LOG(ERROR) << "Failed to mmap() dex files from oat file. File: " << file->GetPath()
+               << " error: " << error_msg;
+    return false;
+  }
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    // Make sure no one messed with input files while we were copying data.
+    // At the very least we need consistent file size and number of class definitions.
+    const uint8_t* raw_dex_file =
+        dex_files_map->Begin() + oat_dex_file.dex_file_offset_ - map_offset;
+    if (!ValidateDexFileHeader(raw_dex_file, oat_dex_file.GetLocation())) {
+      // Note: ValidateDexFileHeader() already logged an error message.
+      LOG(ERROR) << "Failed to verify written dex file header!"
+          << " Output: " << file->GetPath() << " ~ " << std::hex << map_offset
+          << " ~ " << static_cast<const void*>(raw_dex_file);
+      return false;
+    }
+    const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_dex_file);
+    if (header->file_size_ != oat_dex_file.dex_file_size_) {
+      LOG(ERROR) << "File size mismatch in written dex file header! Expected: "
+          << oat_dex_file.dex_file_size_ << " Actual: " << header->file_size_
+          << " Output: " << file->GetPath();
+      return false;
+    }
+    if (header->class_defs_size_ != oat_dex_file.class_offsets_.size()) {
+      LOG(ERROR) << "Class defs size mismatch in written dex file header! Expected: "
+          << oat_dex_file.class_offsets_.size() << " Actual: " << header->class_defs_size_
+          << " Output: " << file->GetPath();
+      return false;
+    }
+
+    // Now, open the dex file.
+    dex_files.emplace_back(DexFile::Open(raw_dex_file,
+                                         oat_dex_file.dex_file_size_,
+                                         oat_dex_file.GetLocation(),
+                                         oat_dex_file.dex_file_location_checksum_,
+                                         /* oat_dex_file */ nullptr,
+                                         verify,
+                                         &error_msg));
+    if (dex_files.back() == nullptr) {
+      LOG(ERROR) << "Failed to open dex file from oat file. File: " << oat_dex_file.GetLocation()
+                 << " Error: " << error_msg;
+      return false;
+    }
+  }
+
+  *opened_dex_files_map = std::move(dex_files_map);
+  *opened_dex_files = std::move(dex_files);
+  return true;
+}
+
+bool OatWriter::WriteTypeLookupTables(
+    MemMap* opened_dex_files_map,
+    const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files) {
+  TimingLogger::ScopedTiming split("WriteTypeLookupTables", timings_);
+
+  DCHECK_EQ(opened_dex_files.size(), oat_dex_files_.size());
+  for (size_t i = 0, size = opened_dex_files.size(); i != size; ++i) {
+    OatDexFile* oat_dex_file = &oat_dex_files_[i];
+    if (oat_dex_file->lookup_table_offset_ != 0u) {
+      DCHECK(oat_dex_file->create_type_lookup_table_ == CreateTypeLookupTable::kCreate);
+      DCHECK_NE(oat_dex_file->class_offsets_.size(), 0u);
+      size_t map_offset = oat_dex_files_[0].dex_file_offset_;
+      size_t lookup_table_offset = oat_dex_file->lookup_table_offset_;
+      uint8_t* lookup_table = opened_dex_files_map->Begin() + (lookup_table_offset - map_offset);
+      opened_dex_files[i]->CreateTypeLookupTable(lookup_table);
+    }
+  }
+
+  DCHECK_EQ(opened_dex_files_map == nullptr, opened_dex_files.empty());
+  if (opened_dex_files_map != nullptr && !opened_dex_files_map->Sync()) {
+    PLOG(ERROR) << "Failed to Sync() type lookup tables. Map: " << opened_dex_files_map->GetName();
+    return false;
+  }
+
+  return true;
+}
+
 bool OatWriter::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta) {
   static const uint8_t kPadding[] = {
       0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u
@@ -1611,15 +2270,20 @@
   }
 }
 
-OatWriter::OatDexFile::OatDexFile(size_t offset, const DexFile& dex_file) {
-  offset_ = offset;
-  const std::string& location(dex_file.GetLocation());
-  dex_file_location_size_ = location.size();
-  dex_file_location_data_ = reinterpret_cast<const uint8_t*>(location.data());
-  dex_file_location_checksum_ = dex_file.GetLocationChecksum();
-  dex_file_offset_ = 0;
-  lookup_table_offset_ = 0;
-  class_offsets_.resize(dex_file.NumClassDefs());
+OatWriter::OatDexFile::OatDexFile(const char* dex_file_location,
+                                  DexFileSource source,
+                                  CreateTypeLookupTable create_type_lookup_table)
+    : source_(source),
+      create_type_lookup_table_(create_type_lookup_table),
+      dex_file_size_(0),
+      offset_(0),
+      dex_file_location_size_(strlen(dex_file_location)),
+      dex_file_location_data_(dex_file_location),
+      dex_file_location_checksum_(0u),
+      dex_file_offset_(0u),
+      class_offsets_offset_(0u),
+      lookup_table_offset_(0u),
+      class_offsets_() {
 }
 
 size_t OatWriter::OatDexFile::SizeOf() const {
@@ -1627,24 +2291,54 @@
           + dex_file_location_size_
           + sizeof(dex_file_location_checksum_)
           + sizeof(dex_file_offset_)
-          + sizeof(lookup_table_offset_)
-          + (sizeof(class_offsets_[0]) * class_offsets_.size());
+          + sizeof(class_offsets_offset_)
+          + sizeof(lookup_table_offset_);
 }
 
-bool OatWriter::OatDexFile::Write(OatWriter* oat_writer,
-                                  OutputStream* out,
-                                  const size_t file_offset) const {
+void OatWriter::OatDexFile::ReserveTypeLookupTable(OatWriter* oat_writer) {
+  DCHECK_EQ(lookup_table_offset_, 0u);
+  if (create_type_lookup_table_ == CreateTypeLookupTable::kCreate && !class_offsets_.empty()) {
+    size_t table_size = TypeLookupTable::RawDataLength(class_offsets_.size());
+    if (table_size != 0u) {
+      // Type tables are required to be 4 byte aligned.
+      size_t original_offset = oat_writer->size_;
+      size_t offset = RoundUp(original_offset, 4);
+      oat_writer->size_oat_lookup_table_alignment_ += offset - original_offset;
+      lookup_table_offset_ = offset;
+      oat_writer->size_ = offset + table_size;
+      oat_writer->size_oat_lookup_table_ += table_size;
+    }
+  }
+}
+
+void OatWriter::OatDexFile::ReserveClassOffsets(OatWriter* oat_writer) {
+  DCHECK_EQ(class_offsets_offset_, 0u);
+  if (!class_offsets_.empty()) {
+    // Class offsets are required to be 4 byte aligned.
+    size_t original_offset = oat_writer->size_;
+    size_t offset = RoundUp(original_offset, 4);
+    oat_writer->size_oat_class_offsets_alignment_ += offset - original_offset;
+    class_offsets_offset_ = offset;
+    oat_writer->size_ = offset + GetClassOffsetsRawSize();
+  }
+}
+
+bool OatWriter::OatDexFile::Write(OatWriter* oat_writer, OutputStream* out) const {
+  const size_t file_offset = oat_writer->oat_data_offset_;
   DCHECK_OFFSET_();
+
   if (!oat_writer->WriteData(out, &dex_file_location_size_, sizeof(dex_file_location_size_))) {
     PLOG(ERROR) << "Failed to write dex file location length to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_size_ += sizeof(dex_file_location_size_);
+
   if (!oat_writer->WriteData(out, dex_file_location_data_, dex_file_location_size_)) {
     PLOG(ERROR) << "Failed to write dex file location data to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_data_ += dex_file_location_size_;
+
   if (!oat_writer->WriteData(out,
                              &dex_file_location_checksum_,
                              sizeof(dex_file_location_checksum_))) {
@@ -1652,21 +2346,35 @@
     return false;
   }
   oat_writer->size_oat_dex_file_location_checksum_ += sizeof(dex_file_location_checksum_);
+
   if (!oat_writer->WriteData(out, &dex_file_offset_, sizeof(dex_file_offset_))) {
     PLOG(ERROR) << "Failed to write dex file offset to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_offset_ += sizeof(dex_file_offset_);
+
+  if (!oat_writer->WriteData(out, &class_offsets_offset_, sizeof(class_offsets_offset_))) {
+    PLOG(ERROR) << "Failed to write class offsets offset to " << out->GetLocation();
+    return false;
+  }
+  oat_writer->size_oat_dex_file_class_offsets_offset_ += sizeof(class_offsets_offset_);
+
   if (!oat_writer->WriteData(out, &lookup_table_offset_, sizeof(lookup_table_offset_))) {
     PLOG(ERROR) << "Failed to write lookup table offset to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_lookup_table_offset_ += sizeof(lookup_table_offset_);
+
+  return true;
+}
+
+bool OatWriter::OatDexFile::WriteClassOffsets(OatWriter* oat_writer, OutputStream* out) {
   if (!oat_writer->WriteData(out, class_offsets_.data(), GetClassOffsetsRawSize())) {
-    PLOG(ERROR) << "Failed to write methods offsets to " << out->GetLocation();
+    PLOG(ERROR) << "Failed to write oat class offsets for " << GetLocation()
+                << " to " << out->GetLocation();
     return false;
   }
-  oat_writer->size_oat_dex_file_class_offsets_ += GetClassOffsetsRawSize();
+  oat_writer->size_oat_class_offsets_ += GetClassOffsetsRawSize();
   return true;
 }
 
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 5feb5fc..14c6d50 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -27,7 +27,9 @@
 #include "method_reference.h"
 #include "mirror/class.h"
 #include "oat.h"
+#include "os.h"
 #include "safe_map.h"
+#include "ScopedFd.h"
 #include "utils/array_ref.h"
 
 namespace art {
@@ -39,6 +41,7 @@
 class OutputStream;
 class TimingLogger;
 class TypeLookupTable;
+class ZipEntry;
 
 namespace dwarf {
 struct MethodDebugInfo;
@@ -61,6 +64,11 @@
 // ...
 // TypeLookupTable[D]
 //
+// ClassOffsets[0]   one table of OatClass offsets for each class def for each OatDexFile.
+// ClassOffsets[1]
+// ...
+// ClassOffsets[D]
+//
 // OatClass[0]       one variable sized OatClass for each of C DexFile::ClassDefs
 // OatClass[1]       contains OatClass entries with class status, offsets to code, etc.
 // ...
@@ -93,15 +101,68 @@
 //
 class OatWriter {
  public:
-  OatWriter(const std::vector<const DexFile*>& dex_files,
-            uint32_t image_file_location_oat_checksum,
-            uintptr_t image_file_location_oat_begin,
-            int32_t image_patch_delta,
-            const CompilerDriver* compiler,
-            ImageWriter* image_writer,
-            bool compiling_boot_image,
-            TimingLogger* timings,
-            SafeMap<std::string, std::string>* key_value_store);
+  enum class CreateTypeLookupTable {
+    kCreate,
+    kDontCreate,
+    kDefault = kCreate
+  };
+
+  OatWriter(bool compiling_boot_image, TimingLogger* timings);
+
+  // To produce a valid oat file, the user must first add sources with any combination of
+  //   - AddDexFileSource(),
+  //   - AddZippedDexFilesSource(),
+  //   - AddRawDexFileSource().
+  // Then the user must call in order
+  //   - WriteAndOpenDexFiles()
+  //   - PrepareLayout(),
+  //   - WriteRodata(),
+  //   - WriteCode(),
+  //   - WriteHeader().
+
+  // Add dex file source(s) from a file, either a plain dex file or
+  // a zip file with one or more dex files.
+  bool AddDexFileSource(
+      const char* filename,
+      const char* location,
+      CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
+  // Add dex file source(s) from a zip file specified by a file handle.
+  bool AddZippedDexFilesSource(
+      ScopedFd&& zip_fd,
+      const char* location,
+      CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
+  // Add dex file source from raw memory.
+  bool AddRawDexFileSource(
+      const ArrayRef<const uint8_t>& data,
+      const char* location,
+      uint32_t location_checksum,
+      CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
+  dchecked_vector<const char*> GetSourceLocations() const;
+
+  // Write raw dex files to the .rodata section and open them from the oat file. The verify
+  // setting dictates whether the dex file verifier should check the dex files. This is generally
+  // the case, and should only be false for tests.
+  bool WriteAndOpenDexFiles(OutputStream* rodata,
+                            File* file,
+                            InstructionSet instruction_set,
+                            const InstructionSetFeatures* instruction_set_features,
+                            SafeMap<std::string, std::string>* key_value_store,
+                            bool verify,
+                            /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+                            /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files);
+  // Prepare layout of remaining data.
+  void PrepareLayout(const CompilerDriver* compiler,
+                     ImageWriter* image_writer,
+                     const std::vector<const DexFile*>& dex_files);
+  // Write the rest of .rodata section (ClassOffsets[], OatClass[], maps).
+  bool WriteRodata(OutputStream* out);
+  // Write the code to the .text section.
+  bool WriteCode(OutputStream* out);
+  // Write the oat header. This finalizes the oat file.
+  bool WriteHeader(OutputStream* out,
+                   uint32_t image_file_location_oat_checksum,
+                   uintptr_t image_file_location_oat_begin,
+                   int32_t image_patch_delta);
 
   // Returns whether the oat file has an associated image.
   bool HasImage() const {
@@ -130,9 +191,6 @@
     return ArrayRef<const uintptr_t>(absolute_patch_locations_);
   }
 
-  bool WriteRodata(OutputStream* out);
-  bool WriteCode(OutputStream* out);
-
   ~OatWriter();
 
   ArrayRef<const dwarf::MethodDebugInfo> GetMethodDebugInfo() const {
@@ -144,6 +202,7 @@
   }
 
  private:
+  class DexFileSource;
   class OatClass;
   class OatDexFile;
 
@@ -174,29 +233,66 @@
   // with a given DexMethodVisitor.
   bool VisitDexMethods(DexMethodVisitor* visitor);
 
-  size_t InitOatHeader();
+  size_t InitOatHeader(InstructionSet instruction_set,
+                       const InstructionSetFeatures* instruction_set_features,
+                       uint32_t num_dex_files,
+                       SafeMap<std::string, std::string>* key_value_store);
   size_t InitOatDexFiles(size_t offset);
-  size_t InitLookupTables(size_t offset);
-  size_t InitDexFiles(size_t offset);
   size_t InitOatClasses(size_t offset);
   size_t InitOatMaps(size_t offset);
   size_t InitOatCode(size_t offset);
   size_t InitOatCodeDexFiles(size_t offset);
 
-  bool WriteTables(OutputStream* out, const size_t file_offset);
-  bool WriteLookupTables(OutputStream* out, const size_t file_offset);
+  bool WriteClassOffsets(OutputStream* out);
+  bool WriteClasses(OutputStream* out);
   size_t WriteMaps(OutputStream* out, const size_t file_offset, size_t relative_offset);
   size_t WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset);
   size_t WriteCodeDexFiles(OutputStream* out, const size_t file_offset, size_t relative_offset);
 
   bool GetOatDataOffset(OutputStream* out);
+  bool ReadDexFileHeader(File* file, OatDexFile* oat_dex_file);
+  bool ValidateDexFileHeader(const uint8_t* raw_header, const char* location);
+  bool WriteDexFiles(OutputStream* rodata, File* file);
+  bool WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file);
+  bool SeekToDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file);
+  bool WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file, ZipEntry* dex_file);
+  bool WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file, File* dex_file);
+  bool WriteDexFile(OutputStream* rodata, OatDexFile* oat_dex_file, const uint8_t* dex_file);
+  bool WriteOatDexFiles(OutputStream* rodata);
+  bool ExtendForTypeLookupTables(OutputStream* rodata, File* file, size_t offset);
+  bool OpenDexFiles(File* file,
+                    bool verify,
+                    /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+                    /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files);
+  bool WriteTypeLookupTables(MemMap* opened_dex_files_map,
+                             const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files);
   bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta);
   bool WriteData(OutputStream* out, const void* data, size_t size);
 
+  enum class WriteState {
+    kAddingDexFileSources,
+    kPrepareLayout,
+    kWriteRoData,
+    kWriteText,
+    kWriteHeader,
+    kDone
+  };
+
+  WriteState write_state_;
+  TimingLogger* timings_;
+
+  std::vector<std::unique_ptr<File>> raw_dex_files_;
+  std::vector<std::unique_ptr<ZipArchive>> zip_archives_;
+  std::vector<std::unique_ptr<ZipEntry>> zipped_dex_files_;
+
+  // Using std::list<> which doesn't move elements around on push/emplace_back().
+  // We need this because we keep plain pointers to the strings' c_str().
+  std::list<std::string> zipped_dex_file_locations_;
+
   dchecked_vector<dwarf::MethodDebugInfo> method_info_;
 
-  const CompilerDriver* const compiler_driver_;
-  ImageWriter* const image_writer_;
+  const CompilerDriver* compiler_driver_;
+  ImageWriter* image_writer_;
   const bool compiling_boot_image_;
 
   // note OatFile does not take ownership of the DexFiles
@@ -215,13 +311,7 @@
   // Offset of the oat data from the start of the mmapped region of the elf file.
   size_t oat_data_offset_;
 
-  // dependencies on the image.
-  uint32_t image_file_location_oat_checksum_;
-  uintptr_t image_file_location_oat_begin_;
-  int32_t image_patch_delta_;
-
   // data to write
-  SafeMap<std::string, std::string>* key_value_store_;
   std::unique_ptr<OatHeader> oat_header_;
   dchecked_vector<OatDexFile> oat_dex_files_;
   dchecked_vector<OatClass> oat_classes_;
@@ -257,10 +347,12 @@
   uint32_t size_oat_dex_file_location_data_;
   uint32_t size_oat_dex_file_location_checksum_;
   uint32_t size_oat_dex_file_offset_;
+  uint32_t size_oat_dex_file_class_offsets_offset_;
   uint32_t size_oat_dex_file_lookup_table_offset_;
-  uint32_t size_oat_dex_file_class_offsets_;
   uint32_t size_oat_lookup_table_alignment_;
   uint32_t size_oat_lookup_table_;
+  uint32_t size_oat_class_offsets_alignment_;
+  uint32_t size_oat_class_offsets_;
   uint32_t size_oat_class_type_;
   uint32_t size_oat_class_status_;
   uint32_t size_oat_class_method_bitmaps_;
@@ -269,7 +361,7 @@
   std::unique_ptr<linker::RelativePatcher> relative_patcher_;
 
   // The locations of absolute patches relative to the start of the executable section.
-  std::vector<uintptr_t> absolute_patch_locations_;
+  dchecked_vector<uintptr_t> absolute_patch_locations_;
 
   // Map method reference to assigned offset.
   // Wrap the map in a class implementing linker::RelativePatcherTargetProvider.
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
deleted file mode 100644
index f0cafc8..0000000
--- a/compiler/optimizing/boolean_simplifier.cc
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "boolean_simplifier.h"
-
-namespace art {
-
-void HBooleanSimplifier::TryRemovingNegatedCondition(HBasicBlock* block) {
-  DCHECK(block->EndsWithIf());
-
-  // Check if the condition is a Boolean negation.
-  HIf* if_instruction = block->GetLastInstruction()->AsIf();
-  HInstruction* boolean_not = if_instruction->InputAt(0);
-  if (!boolean_not->IsBooleanNot()) {
-    return;
-  }
-
-  // Make BooleanNot's input the condition of the If and swap branches.
-  if_instruction->ReplaceInput(boolean_not->InputAt(0), 0);
-  block->SwapSuccessors();
-
-  // Remove the BooleanNot if it is now unused.
-  if (!boolean_not->HasUses()) {
-    boolean_not->GetBlock()->RemoveInstruction(boolean_not);
-  }
-}
-
-// Returns true if 'block1' and 'block2' are empty, merge into the same single
-// successor and the successor can only be reached from them.
-static bool BlocksDoMergeTogether(HBasicBlock* block1, HBasicBlock* block2) {
-  if (!block1->IsSingleGoto() || !block2->IsSingleGoto()) return false;
-  HBasicBlock* succ1 = block1->GetSuccessors()[0];
-  HBasicBlock* succ2 = block2->GetSuccessors()[0];
-  return succ1 == succ2 && succ1->GetPredecessors().size() == 2u;
-}
-
-// Returns true if the outcome of the branching matches the boolean value of
-// the branching condition.
-static bool PreservesCondition(HInstruction* input_true, HInstruction* input_false) {
-  return input_true->IsIntConstant() && input_true->AsIntConstant()->IsOne()
-      && input_false->IsIntConstant() && input_false->AsIntConstant()->IsZero();
-}
-
-// Returns true if the outcome of the branching is exactly opposite of the
-// boolean value of the branching condition.
-static bool NegatesCondition(HInstruction* input_true, HInstruction* input_false) {
-  return input_true->IsIntConstant() && input_true->AsIntConstant()->IsZero()
-      && input_false->IsIntConstant() && input_false->AsIntConstant()->IsOne();
-}
-
-void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) {
-  DCHECK(block->EndsWithIf());
-
-  // Find elements of the pattern.
-  HIf* if_instruction = block->GetLastInstruction()->AsIf();
-  HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
-  HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
-  if (!BlocksDoMergeTogether(true_block, false_block)) {
-    return;
-  }
-  HBasicBlock* merge_block = true_block->GetSuccessors()[0];
-  if (!merge_block->HasSinglePhi()) {
-    return;
-  }
-  HPhi* phi = merge_block->GetFirstPhi()->AsPhi();
-  HInstruction* true_value = phi->InputAt(merge_block->GetPredecessorIndexOf(true_block));
-  HInstruction* false_value = phi->InputAt(merge_block->GetPredecessorIndexOf(false_block));
-
-  // Check if the selection negates/preserves the value of the condition and
-  // if so, generate a suitable replacement instruction.
-  HInstruction* if_condition = if_instruction->InputAt(0);
-
-  // Don't change FP compares.  The definition of compares involving NaNs forces
-  // the compares to be done as written by the user.
-  if (if_condition->IsCondition() &&
-      Primitive::IsFloatingPointType(if_condition->InputAt(0)->GetType())) {
-    return;
-  }
-
-  HInstruction* replacement;
-  if (NegatesCondition(true_value, false_value)) {
-    replacement = graph_->InsertOppositeCondition(if_condition, if_instruction);
-  } else if (PreservesCondition(true_value, false_value)) {
-    replacement = if_condition;
-  } else {
-    return;
-  }
-
-  // Replace the selection outcome with the new instruction.
-  phi->ReplaceWith(replacement);
-  merge_block->RemovePhi(phi);
-
-  // Delete the true branch and merge the resulting chain of blocks
-  // 'block->false_block->merge_block' into one.
-  true_block->DisconnectAndDelete();
-  block->MergeWith(false_block);
-  block->MergeWith(merge_block);
-
-  // No need to update any dominance information, as we are simplifying
-  // a simple diamond shape, where the join block is merged with the
-  // entry block. Any following blocks would have had the join block
-  // as a dominator, and `MergeWith` handles changing that to the
-  // entry block.
-}
-
-void HBooleanSimplifier::Run() {
-  // Iterate in post order in the unlikely case that removing one occurrence of
-  // the selection pattern empties a branch block of another occurrence.
-  // Otherwise the order does not matter.
-  for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
-    if (!block->EndsWithIf()) continue;
-
-    // If condition is negated, remove the negation and swap the branches.
-    TryRemovingNegatedCondition(block);
-
-    // If this is a boolean-selection diamond pattern, replace its result with
-    // the condition value (or its negation) and simplify the graph.
-    TryRemovingBooleanSelection(block);
-  }
-}
-
-}  // namespace art
diff --git a/compiler/optimizing/boolean_simplifier.h b/compiler/optimizing/boolean_simplifier.h
deleted file mode 100644
index e12a12c..0000000
--- a/compiler/optimizing/boolean_simplifier.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// This optimization recognizes two common patterns:
-//  (a) Boolean selection: Casting a boolean to an integer or negating it is
-//      carried out with an If statement selecting from zero/one integer
-//      constants. Because Boolean values are represented as zero/one, the
-//      pattern can be replaced with the condition instruction itself or its
-//      negation, depending on the layout.
-//  (b) Negated condition: Instruction simplifier may replace an If's condition
-//      with a boolean value. If this value is the result of a Boolean negation,
-//      the true/false branches can be swapped and negation removed.
-
-// Example: Negating a boolean value
-//     B1:
-//       z1   ParameterValue
-//       i2   IntConstant 0
-//       i3   IntConstant 1
-//       v4   Goto B2
-//     B2:
-//       z5   NotEquals [ z1 i2 ]
-//       v6   If [ z5 ] then B3 else B4
-//     B3:
-//       v7   Goto B5
-//     B4:
-//       v8   Goto B5
-//     B5:
-//       i9   Phi [ i3 i2 ]
-//       v10  Return [ i9 ]
-// turns into
-//     B1:
-//       z1   ParameterValue
-//       i2   IntConstant 0
-//       v4   Goto B2
-//     B2:
-//       z11  Equals [ z1 i2 ]
-//       v10  Return [ z11 ]
-//     B3, B4, B5: removed
-
-// Note: in order to recognize empty blocks, this optimization must be run
-// after the instruction simplifier has removed redundant suspend checks.
-
-#ifndef ART_COMPILER_OPTIMIZING_BOOLEAN_SIMPLIFIER_H_
-#define ART_COMPILER_OPTIMIZING_BOOLEAN_SIMPLIFIER_H_
-
-#include "optimization.h"
-
-namespace art {
-
-class HBooleanSimplifier : public HOptimization {
- public:
-  explicit HBooleanSimplifier(HGraph* graph)
-    : HOptimization(graph, kBooleanSimplifierPassName) {}
-
-  void Run() OVERRIDE;
-
-  static constexpr const char* kBooleanSimplifierPassName = "boolean_simplifier";
-
- private:
-  void TryRemovingNegatedCondition(HBasicBlock* block);
-  void TryRemovingBooleanSelection(HBasicBlock* block);
-
-  DISALLOW_COPY_AND_ASSIGN(HBooleanSimplifier);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_OPTIMIZING_BOOLEAN_SIMPLIFIER_H_
diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h
index b9df686..6dc5320 100644
--- a/compiler/optimizing/bounds_check_elimination.h
+++ b/compiler/optimizing/bounds_check_elimination.h
@@ -29,13 +29,13 @@
   BoundsCheckElimination(HGraph* graph,
                          const SideEffectsAnalysis& side_effects,
                          HInductionVarAnalysis* induction_analysis)
-      : HOptimization(graph, kBoundsCheckEliminiationPassName),
+      : HOptimization(graph, kBoundsCheckEliminationPassName),
         side_effects_(side_effects),
         induction_analysis_(induction_analysis) {}
 
   void Run() OVERRIDE;
 
-  static constexpr const char* kBoundsCheckEliminiationPassName = "BCE";
+  static constexpr const char* kBoundsCheckEliminationPassName = "BCE";
 
  private:
   const SideEffectsAnalysis& side_effects_;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 2725792..c2d9edd 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1285,11 +1285,9 @@
 }
 
 void CodeGeneratorARM::MoveLocation(Location dst, Location src, Primitive::Type dst_type) {
-  if (Primitive::Is64BitType(dst_type)) {
-    Move64(dst, src);
-  } else {
-    Move32(dst, src);
-  }
+  HParallelMove move(GetGraph()->GetArena());
+  move.AddMove(src, dst, dst_type, nullptr);
+  GetMoveResolver()->EmitNativeCode(&move);
 }
 
 void CodeGeneratorARM::AddLocationAsTemp(Location location, LocationSummary* locations) {
@@ -1612,6 +1610,32 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderARM::VisitSelect(HSelect* select) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetInAt(1, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
+  if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
+    locations->SetInAt(2, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+void InstructionCodeGeneratorARM::VisitSelect(HSelect* select) {
+  LocationSummary* locations = select->GetLocations();
+  Label false_target;
+  GenerateTestAndBranch(select,
+                        /* condition_input_index */ 2,
+                        /* true_target */ nullptr,
+                        &false_target);
+  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+  __ Bind(&false_target);
+}
+
 void LocationsBuilderARM::VisitNativeDebugInfo(HNativeDebugInfo* info) {
   new (GetGraph()->GetArena()) LocationSummary(info);
 }
@@ -1632,7 +1656,7 @@
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
-      if (cond->NeedsMaterialization()) {
+      if (!cond->IsEmittedAtUseSite()) {
         locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
       }
       break;
@@ -1641,7 +1665,7 @@
     case Primitive::kPrimDouble:
       locations->SetInAt(0, Location::RequiresFpuRegister());
       locations->SetInAt(1, Location::RequiresFpuRegister());
-      if (cond->NeedsMaterialization()) {
+      if (!cond->IsEmittedAtUseSite()) {
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       }
       break;
@@ -1649,14 +1673,14 @@
     default:
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
-      if (cond->NeedsMaterialization()) {
+      if (!cond->IsEmittedAtUseSite()) {
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       }
   }
 }
 
 void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) {
-  if (!cond->NeedsMaterialization()) {
+  if (cond->IsEmittedAtUseSite()) {
     return;
   }
 
@@ -4973,6 +4997,8 @@
   if (source.IsRegister()) {
     if (destination.IsRegister()) {
       __ Mov(destination.AsRegister<Register>(), source.AsRegister<Register>());
+    } else if (destination.IsFpuRegister()) {
+      __ vmovsr(destination.AsFpuRegister<SRegister>(), source.AsRegister<Register>());
     } else {
       DCHECK(destination.IsStackSlot());
       __ StoreToOffset(kStoreWord, source.AsRegister<Register>(),
@@ -4990,7 +5016,9 @@
       __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
     }
   } else if (source.IsFpuRegister()) {
-    if (destination.IsFpuRegister()) {
+    if (destination.IsRegister()) {
+      __ vmovrs(destination.AsRegister<Register>(), source.AsFpuRegister<SRegister>());
+    } else if (destination.IsFpuRegister()) {
       __ vmovs(destination.AsFpuRegister<SRegister>(), source.AsFpuRegister<SRegister>());
     } else {
       DCHECK(destination.IsStackSlot());
@@ -5014,6 +5042,10 @@
     if (destination.IsRegisterPair()) {
       __ Mov(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
       __ Mov(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
+    } else if (destination.IsFpuRegisterPair()) {
+      __ vmovdrr(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()),
+                 source.AsRegisterPairLow<Register>(),
+                 source.AsRegisterPairHigh<Register>());
     } else {
       DCHECK(destination.IsDoubleStackSlot()) << destination;
       DCHECK(ExpectedPairLayout(source));
@@ -5021,7 +5053,11 @@
           kStoreWordPair, source.AsRegisterPairLow<Register>(), SP, destination.GetStackIndex());
     }
   } else if (source.IsFpuRegisterPair()) {
-    if (destination.IsFpuRegisterPair()) {
+    if (destination.IsRegisterPair()) {
+      __ vmovrrd(destination.AsRegisterPairLow<Register>(),
+                 destination.AsRegisterPairHigh<Register>(),
+                 FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()));
+    } else if (destination.IsFpuRegisterPair()) {
       __ vmovd(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()),
                FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()));
     } else {
@@ -5381,7 +5417,7 @@
   Register cls = locations->InAt(1).AsRegister<Register>();
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
-  Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
       locations->GetTemp(0) :
       Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -5398,7 +5434,7 @@
   }
 
   // /* HeapReference<Class> */ out = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc);
+  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
 
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
@@ -5416,7 +5452,7 @@
       Label loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done);
       __ cmp(out, ShifterOperand(cls));
@@ -5435,7 +5471,7 @@
       __ cmp(out, ShifterOperand(cls));
       __ b(&success, EQ);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
       __ CompareAndBranchIfNonZero(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ b(&done);
@@ -5454,7 +5490,7 @@
       __ b(&exact_check, EQ);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ out = out->component_type_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done);
       __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
@@ -5567,7 +5603,7 @@
   Register cls = locations->InAt(1).AsRegister<Register>();
   Location temp_loc = locations->GetTemp(0);
   Register temp = temp_loc.AsRegister<Register>();
-  Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
       locations->GetTemp(1) :
       Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -5593,7 +5629,7 @@
   }
 
   // /* HeapReference<Class> */ temp = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
+  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
 
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
@@ -5611,7 +5647,7 @@
       Label loop, compare_classes;
       __ Bind(&loop);
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // to the `compare_classes` label to compare it with the checked
@@ -5623,7 +5659,8 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ b(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&compare_classes);
@@ -5640,7 +5677,7 @@
       __ b(&done, EQ);
 
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
@@ -5651,7 +5688,8 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ b(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -5664,7 +5702,7 @@
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
 
       // If the component type is not null (i.e. the object is indeed
       // an array), jump to label `check_non_primitive_component_type`
@@ -5677,7 +5715,8 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ b(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&check_non_primitive_component_type);
@@ -5686,7 +5725,8 @@
       __ CompareAndBranchIfZero(temp, &done);
       // Same comment as above regarding `temp` and the slow path.
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ b(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -5897,23 +5937,24 @@
 void InstructionCodeGeneratorARM::GenerateReferenceLoadOneRegister(HInstruction* instruction,
                                                                    Location out,
                                                                    uint32_t offset,
-                                                                   Location temp) {
+                                                                   Location maybe_temp) {
   Register out_reg = out.AsRegister<Register>();
   if (kEmitCompilerReadBarrier) {
+    DCHECK(maybe_temp.IsRegister()) << maybe_temp;
     if (kUseBakerReadBarrier) {
       // Load with fast path based Baker's read barrier.
       // /* HeapReference<Object> */ out = *(out + offset)
       codegen_->GenerateFieldLoadWithBakerReadBarrier(
-          instruction, out, out_reg, offset, temp, /* needs_null_check */ false);
+          instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
     } else {
       // Load with slow path based read barrier.
-      // Save the value of `out` into `temp` before overwriting it
+      // Save the value of `out` into `maybe_temp` before overwriting it
       // in the following move operation, as we will need it for the
       // read barrier below.
-      __ Mov(temp.AsRegister<Register>(), out_reg);
+      __ Mov(maybe_temp.AsRegister<Register>(), out_reg);
       // /* HeapReference<Object> */ out = *(out + offset)
       __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
-      codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset);
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
     }
   } else {
     // Plain load with no read barrier.
@@ -5927,15 +5968,16 @@
                                                                     Location out,
                                                                     Location obj,
                                                                     uint32_t offset,
-                                                                    Location temp) {
+                                                                    Location maybe_temp) {
   Register out_reg = out.AsRegister<Register>();
   Register obj_reg = obj.AsRegister<Register>();
   if (kEmitCompilerReadBarrier) {
     if (kUseBakerReadBarrier) {
+      DCHECK(maybe_temp.IsRegister()) << maybe_temp;
       // Load with fast path based Baker's read barrier.
       // /* HeapReference<Object> */ out = *(obj + offset)
       codegen_->GenerateFieldLoadWithBakerReadBarrier(
-          instruction, out, obj_reg, offset, temp, /* needs_null_check */ false);
+          instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
     } else {
       // Load with slow path based read barrier.
       // /* HeapReference<Object> */ out = *(obj + offset)
@@ -6593,6 +6635,29 @@
   }
 }
 
+void LocationsBuilderARM::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  uint32_t method_offset = 0;
+  if (instruction->GetTableKind() == HClassTableGet::kVTable) {
+    method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+        instruction->GetIndex(), kArmPointerSize).SizeValue();
+  } else {
+    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+        instruction->GetIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value();
+  }
+  __ LoadFromOffset(kLoadWord,
+                    locations->Out().AsRegister<Register>(),
+                    locations->InAt(0).AsRegister<Register>(),
+                    method_offset);
+}
+
 #undef __
 #undef QUICK_ENTRY_POINT
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index d45ea97..558c9cf 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -240,23 +240,29 @@
   //   out <- *(out + offset)
   //
   // while honoring heap poisoning and/or read barriers (if any).
-  // Register `temp` is used when generating a read barrier.
+  //
+  // Location `maybe_temp` is used when generating a read barrier and
+  // shall be a register in that case; it may be an invalid location
+  // otherwise.
   void GenerateReferenceLoadOneRegister(HInstruction* instruction,
                                         Location out,
                                         uint32_t offset,
-                                        Location temp);
+                                        Location maybe_temp);
   // Generate a heap reference load using two different registers
   // `out` and `obj`:
   //
   //   out <- *(obj + offset)
   //
   // while honoring heap poisoning and/or read barriers (if any).
-  // Register `temp` is used when generating a Baker's read barrier.
+  //
+  // Location `maybe_temp` is used when generating a Baker's (fast
+  // path) read barrier and shall be a register in that case; it may
+  // be an invalid location otherwise.
   void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
                                          Location out,
                                          Location obj,
                                          uint32_t offset,
-                                         Location temp);
+                                         Location maybe_temp);
   // Generate a GC root reference load:
   //
   //   root <- *(obj + offset)
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 2cb2741..a59024e 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -584,6 +584,56 @@
   }
 }
 
+// Slow path marking an object during a read barrier.
+class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location out, Location obj)
+      : instruction_(instruction), out_(out), obj_(obj) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Primitive::Type type = Primitive::kPrimNot;
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsLoadClass() ||
+           instruction_->IsLoadString() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast())
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)), obj_, type);
+    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+    CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
+    arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+ private:
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location obj_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
+};
+
 // Slow path generating a read barrier for a heap reference.
 class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
  public:
@@ -605,7 +655,7 @@
     // to be instrumented, e.g.:
     //
     //   __ Ldr(out, HeapOperand(out, class_offset);
-    //   codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
     //
     // In that case, we have lost the information about the original
     // object, and the emitted read barrier cannot work properly.
@@ -621,7 +671,9 @@
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
     DCHECK(!instruction_->IsInvoke() ||
            (instruction_->IsInvokeStaticOrDirect() &&
-            instruction_->GetLocations()->Intrinsified()));
+            instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier for heap reference slow path: "
+        << instruction_->DebugName();
     // The read barrier instrumentation does not support the
     // HArm64IntermediateAddress instruction yet.
     DCHECK(!(instruction_->IsArrayGet() &&
@@ -769,14 +821,18 @@
 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
  public:
   ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
-      : instruction_(instruction), out_(out), root_(root) {}
+      : instruction_(instruction), out_(out), root_(root) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
     Primitive::Type type = Primitive::kPrimNot;
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
-    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier for GC root slow path: "
+        << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -1338,7 +1394,8 @@
 
 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
                                      CPURegister dst,
-                                     const MemOperand& src) {
+                                     const MemOperand& src,
+                                     bool needs_null_check) {
   MacroAssembler* masm = GetVIXLAssembler();
   BlockPoolsScope block_pools(masm);
   UseScratchRegisterScope temps(masm);
@@ -1354,20 +1411,28 @@
   switch (type) {
     case Primitive::kPrimBoolean:
       __ Ldarb(Register(dst), base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       break;
     case Primitive::kPrimByte:
       __ Ldarb(Register(dst), base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
       break;
     case Primitive::kPrimChar:
       __ Ldarh(Register(dst), base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       break;
     case Primitive::kPrimShort:
       __ Ldarh(Register(dst), base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
       break;
     case Primitive::kPrimInt:
@@ -1375,7 +1440,9 @@
     case Primitive::kPrimLong:
       DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
       __ Ldar(Register(dst), base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       break;
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
@@ -1384,7 +1451,9 @@
 
       Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
       __ Ldar(temp, base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       __ Fmov(FPRegister(dst), temp);
       break;
     }
@@ -1505,7 +1574,7 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
-void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
+void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
   BarrierType type = BarrierAll;
 
   switch (kind) {
@@ -1641,33 +1710,62 @@
 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
                                                    const FieldInfo& field_info) {
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+  LocationSummary* locations = instruction->GetLocations();
+  Location base_loc = locations->InAt(0);
+  Location out = locations->Out();
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
   Primitive::Type field_type = field_info.GetFieldType();
   BlockPoolsScope block_pools(GetVIXLAssembler());
 
   MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
   bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
-  if (field_info.IsVolatile()) {
-    if (use_acquire_release) {
-      // NB: LoadAcquire will record the pc info if needed.
-      codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field);
+  if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // Object FieldGet with Baker's read barrier case.
+    MacroAssembler* masm = GetVIXLAssembler();
+    UseScratchRegisterScope temps(masm);
+    // /* HeapReference<Object> */ out = *(base + offset)
+    Register base = RegisterFrom(base_loc, Primitive::kPrimNot);
+    Register temp = temps.AcquireW();
+    // Note that potential implicit null checks are handled in this
+    // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
+    codegen_->GenerateFieldLoadWithBakerReadBarrier(
+        instruction,
+        out,
+        base,
+        offset,
+        temp,
+        /* needs_null_check */ true,
+        field_info.IsVolatile() && use_acquire_release);
+    if (field_info.IsVolatile() && !use_acquire_release) {
+      // For IRIW sequential consistency kLoadAny is not sufficient.
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+    }
+  } else {
+    // General case.
+    if (field_info.IsVolatile()) {
+      if (use_acquire_release) {
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorARM64::LoadAcquire call.
+        // NB: LoadAcquire will record the pc info if needed.
+        codegen_->LoadAcquire(
+            instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
+      } else {
+        codegen_->Load(field_type, OutputCPURegister(instruction), field);
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        // For IRIW sequential consistency kLoadAny is not sufficient.
+        codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+      }
     } else {
       codegen_->Load(field_type, OutputCPURegister(instruction), field);
       codegen_->MaybeRecordImplicitNullCheck(instruction);
-      // For IRIW sequential consistency kLoadAny is not sufficient.
-      GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
     }
-  } else {
-    codegen_->Load(field_type, OutputCPURegister(instruction), field);
-    codegen_->MaybeRecordImplicitNullCheck(instruction);
-  }
-
-  if (field_type == Primitive::kPrimNot) {
-    LocationSummary* locations = instruction->GetLocations();
-    Location base = locations->InAt(0);
-    Location out = locations->Out();
-    uint32_t offset = field_info.GetFieldOffset().Uint32Value();
-    codegen_->MaybeGenerateReadBarrier(instruction, out, out, base, offset);
+    if (field_type == Primitive::kPrimNot) {
+      // If read barriers are enabled, emit read barriers other than
+      // Baker's using a slow path (and also unpoison the loaded
+      // reference, if heap poisoning is enabled).
+      codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
+    }
   }
 }
 
@@ -1713,10 +1811,10 @@
         codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       } else {
-        GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+        codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
         codegen_->Store(field_type, source, HeapOperand(obj, offset));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
-        GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+        codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
       }
     } else {
       codegen_->Store(field_type, source, HeapOperand(obj, offset));
@@ -2021,50 +2119,62 @@
   LocationSummary* locations = instruction->GetLocations();
   Location index = locations->InAt(1);
   uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value();
-  MemOperand source = HeapOperand(obj);
-  CPURegister dest = OutputCPURegister(instruction);
+  Location out = locations->Out();
 
   MacroAssembler* masm = GetVIXLAssembler();
   UseScratchRegisterScope temps(masm);
   // Block pools between `Load` and `MaybeRecordImplicitNullCheck`.
   BlockPoolsScope block_pools(masm);
 
-  if (index.IsConstant()) {
-    offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
-    source = HeapOperand(obj, offset);
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // Object ArrayGet with Baker's read barrier case.
+    Register temp = temps.AcquireW();
+    // The read barrier instrumentation does not support the
+    // HArm64IntermediateAddress instruction yet.
+    DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress());
+    // Note that a potential implicit null check is handled in the
+    // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
+    codegen_->GenerateArrayLoadWithBakerReadBarrier(
+        instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true);
   } else {
-    Register temp = temps.AcquireSameSizeAs(obj);
-    if (instruction->GetArray()->IsArm64IntermediateAddress()) {
-      // The read barrier instrumentation does not support the
-      // HArm64IntermediateAddress instruction yet.
-      DCHECK(!kEmitCompilerReadBarrier);
-      // We do not need to compute the intermediate address from the array: the
-      // input instruction has done it already. See the comment in
-      // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
-      if (kIsDebugBuild) {
-        HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
-        DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
-      }
-      temp = obj;
-    } else {
-      __ Add(temp, obj, offset);
-    }
-    source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
-  }
-
-  codegen_->Load(type, dest, source);
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
-
-  if (type == Primitive::kPrimNot) {
-    static_assert(
-        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-    Location obj_loc = locations->InAt(0);
-    Location out = locations->Out();
+    // General case.
+    MemOperand source = HeapOperand(obj);
     if (index.IsConstant()) {
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
+      offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
+      source = HeapOperand(obj, offset);
     } else {
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset, index);
+      Register temp = temps.AcquireSameSizeAs(obj);
+      if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+        // The read barrier instrumentation does not support the
+        // HArm64IntermediateAddress instruction yet.
+        DCHECK(!kEmitCompilerReadBarrier);
+        // We do not need to compute the intermediate address from the array: the
+        // input instruction has done it already. See the comment in
+        // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
+        if (kIsDebugBuild) {
+          HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
+          DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
+        }
+        temp = obj;
+      } else {
+        __ Add(temp, obj, offset);
+      }
+      source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
+    }
+
+    codegen_->Load(type, OutputCPURegister(instruction), source);
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
+
+    if (type == Primitive::kPrimNot) {
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+      Location obj_loc = locations->InAt(0);
+      if (index.IsConstant()) {
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset);
+      } else {
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index);
+      }
     }
   }
 }
@@ -2194,12 +2304,12 @@
           //   __ Mov(temp2, temp);
           //   // /* HeapReference<Class> */ temp = temp->component_type_
           //   __ Ldr(temp, HeapOperand(temp, component_offset));
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
           //
           //   // /* HeapReference<Class> */ temp2 = value->klass_
           //   __ Ldr(temp2, HeapOperand(Register(value), class_offset));
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc);
           //
           //   __ Cmp(temp, temp2);
@@ -2400,13 +2510,13 @@
     locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
   }
 
-  if (instruction->NeedsMaterialization()) {
+  if (!instruction->IsEmittedAtUseSite()) {
     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
 }
 
 void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
-  if (!instruction->NeedsMaterialization()) {
+  if (instruction->IsEmittedAtUseSite()) {
     return;
   }
 
@@ -2820,7 +2930,8 @@
         non_fallthrough_target = true_target;
       }
 
-      if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
+      if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
+          rhs.IsImmediate() && (rhs.immediate() == 0)) {
         switch (arm64_cond) {
           case eq:
             __ Cbz(lhs, non_fallthrough_target);
@@ -2893,6 +3004,32 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderARM64::VisitSelect(HSelect* select) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetInAt(1, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
+  if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
+    locations->SetInAt(2, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) {
+  LocationSummary* locations = select->GetLocations();
+  vixl::Label false_target;
+  GenerateTestAndBranch(select,
+                        /* condition_input_index */ 2,
+                        /* true_target */ nullptr,
+                        &false_target);
+  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+  __ Bind(&false_target);
+}
+
 void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
   new (GetGraph()->GetArena()) LocationSummary(info);
 }
@@ -2921,6 +3058,14 @@
   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
 }
 
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+  return kEmitCompilerReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+}
+
 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
@@ -2947,21 +3092,22 @@
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
   // When read barriers are enabled, we need a temporary register for
   // some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
   Register cls = InputRegisterAt(instruction, 1);
   Location out_loc = locations->Out();
   Register out = OutputRegister(instruction);
+  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(0) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -2977,10 +3123,9 @@
   }
 
   // /* HeapReference<Class> */ out = obj->klass_
-  __ Ldr(out, HeapOperand(obj.W(), class_offset));
-  codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
       __ Cmp(out, cls);
       __ Cset(out, eq);
@@ -2995,17 +3140,8 @@
       // object to avoid doing a comparison we know will fail.
       vixl::Label loop, success;
       __ Bind(&loop);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = WRegisterFrom(temp_loc);
-        __ Mov(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ Ldr(out, HeapOperand(out, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Cbz(out, &done);
       __ Cmp(out, cls);
@@ -3023,17 +3159,8 @@
       __ Bind(&loop);
       __ Cmp(out, cls);
       __ B(eq, &success);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = WRegisterFrom(temp_loc);
-        __ Mov(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ Ldr(out, HeapOperand(out, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
       __ Cbnz(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ B(&done);
@@ -3051,17 +3178,8 @@
       __ Cmp(out, cls);
       __ B(eq, &exact_check);
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = WRegisterFrom(temp_loc);
-        __ Mov(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->component_type_
-      __ Ldr(out, HeapOperand(out, component_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Cbz(out, &done);
       __ Ldrh(out, HeapOperand(out, primitive_offset));
@@ -3100,6 +3218,13 @@
       // HInstanceOf instruction (following the runtime calling
       // convention), which might be cluttered by the potential first
       // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       DCHECK(locations->OnlyCallsOnSlowPath());
       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
                                                                       /* is_fatal */ false);
@@ -3152,30 +3277,29 @@
   locations->SetInAt(1, Location::RequiresRegister());
   // Note that TypeCheckSlowPathARM64 uses this "temp" register too.
   locations->AddTemp(Location::RequiresRegister());
-  locations->AddTemp(Location::RequiresRegister());
   // When read barriers are enabled, we need an additional temporary
   // register for some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
-     locations->AddTemp(Location::RequiresRegister());
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
   Register cls = InputRegisterAt(instruction, 1);
   Location temp_loc = locations->GetTemp(0);
+  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(1) :
+      Location::NoLocation();
   Register temp = WRegisterFrom(temp_loc);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
 
-  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   bool is_type_check_slow_path_fatal =
       (type_check_kind == TypeCheckKind::kExactCheck ||
        type_check_kind == TypeCheckKind::kAbstractClassCheck ||
@@ -3194,8 +3318,7 @@
   }
 
   // /* HeapReference<Class> */ temp = obj->klass_
-  __ Ldr(temp, HeapOperand(obj, class_offset));
-  codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
 
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
@@ -3212,18 +3335,8 @@
       // object to avoid doing a comparison we know will fail.
       vixl::Label loop, compare_classes;
       __ Bind(&loop);
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = WRegisterFrom(temp2_loc);
-        __ Mov(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ Ldr(temp, HeapOperand(temp, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // to the `compare_classes` label to compare it with the checked
@@ -3235,8 +3348,8 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ Ldr(temp, HeapOperand(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ B(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&compare_classes);
@@ -3252,18 +3365,8 @@
       __ Cmp(temp, cls);
       __ B(eq, &done);
 
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = WRegisterFrom(temp2_loc);
-        __ Mov(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ Ldr(temp, HeapOperand(temp, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
@@ -3274,8 +3377,8 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ Ldr(temp, HeapOperand(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ B(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -3287,19 +3390,8 @@
       __ B(eq, &done);
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = WRegisterFrom(temp2_loc);
-        __ Mov(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->component_type_
-      __ Ldr(temp, HeapOperand(temp, component_offset));
-      codegen_->MaybeGenerateReadBarrier(
-          instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
 
       // If the component type is not null (i.e. the object is indeed
       // an array), jump to label `check_non_primitive_component_type`
@@ -3312,8 +3404,8 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ Ldr(temp, HeapOperand(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ B(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&check_non_primitive_component_type);
@@ -3322,8 +3414,8 @@
       __ Cbz(temp, &done);
       // Same comment as above regarding `temp` and the slow path.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ Ldr(temp, HeapOperand(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ B(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -3340,6 +3432,13 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       __ B(type_check_slow_path->GetEntryLabel());
       break;
   }
@@ -3465,7 +3564,7 @@
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method ATTRIBUTE_UNUSED) {
-  // On arm64 we support all dispatch types.
+  // On ARM64 we support all dispatch types.
   return desired_dispatch_info;
 }
 
@@ -3742,32 +3841,17 @@
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
-    uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-      __ Add(out.X(), current_method.X(), declaring_class_offset);
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-      __ Ldr(out, MemOperand(current_method, declaring_class_offset));
-    }
+    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+    GenerateGcRootFieldLoad(
+        cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   } else {
     MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize);
     // /* GcRoot<mirror::Class>[] */ out =
     //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
     __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value()));
-
-    size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &out[type_index]
-      __ Add(out.X(), out.X(), cache_offset);
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = out[type_index]
-      __ Ldr(out, MemOperand(out.X(), cache_offset));
-    }
+    // /* GcRoot<mirror::Class> */ out = out[type_index]
+    GenerateGcRootFieldLoad(
+        cls, out_loc, out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
 
     if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
       DCHECK(cls->CanCallRuntime());
@@ -3830,30 +3914,14 @@
   Register out = OutputRegister(load);
   Register current_method = InputRegisterAt(load, 0);
 
-  uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-    __ Add(out.X(), current_method.X(), declaring_class_offset);
-    // /* mirror::Class* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-    __ Ldr(out, MemOperand(current_method, declaring_class_offset));
-  }
-
+  // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+  GenerateGcRootFieldLoad(
+      load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
   __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
-
-  size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::String>* */ out = &out[string_index]
-    __ Add(out.X(), out.X(), cache_offset);
-    // /* mirror::String* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::String> */ out = out[string_index]
-    __ Ldr(out, MemOperand(out.X(), cache_offset));
-  }
+  // /* GcRoot<mirror::String> */ out = out[string_index]
+  GenerateGcRootFieldLoad(
+      load, out_loc, out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex()));
 
   if (!load->IsInDexCache()) {
     SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
@@ -4222,7 +4290,7 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
-  GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+  codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
 }
 
 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
@@ -4607,14 +4675,288 @@
   }
 }
 
-void CodeGeneratorARM64::GenerateReadBarrier(HInstruction* instruction,
-                                             Location out,
-                                             Location ref,
-                                             Location obj,
-                                             uint32_t offset,
-                                             Location index) {
+void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                                                     Location out,
+                                                                     uint32_t offset,
+                                                                     Location maybe_temp) {
+  Primitive::Type type = Primitive::kPrimNot;
+  Register out_reg = RegisterFrom(out, type);
+  if (kEmitCompilerReadBarrier) {
+    Register temp_reg = RegisterFrom(maybe_temp, type);
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(out + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                      out,
+                                                      out_reg,
+                                                      offset,
+                                                      temp_reg,
+                                                      /* needs_null_check */ false,
+                                                      /* use_load_acquire */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // Save the value of `out` into `maybe_temp` before overwriting it
+      // in the following move operation, as we will need it for the
+      // read barrier below.
+      __ Mov(temp_reg, out_reg);
+      // /* HeapReference<Object> */ out = *(out + offset)
+      __ Ldr(out_reg, HeapOperand(out_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    __ Ldr(out_reg, HeapOperand(out_reg, offset));
+    GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                                                      Location out,
+                                                                      Location obj,
+                                                                      uint32_t offset,
+                                                                      Location maybe_temp) {
+  Primitive::Type type = Primitive::kPrimNot;
+  Register out_reg = RegisterFrom(out, type);
+  Register obj_reg = RegisterFrom(obj, type);
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      Register temp_reg = RegisterFrom(maybe_temp, type);
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                      out,
+                                                      obj_reg,
+                                                      offset,
+                                                      temp_reg,
+                                                      /* needs_null_check */ false,
+                                                      /* use_load_acquire */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      __ Ldr(out_reg, HeapOperand(obj_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    __ Ldr(out_reg, HeapOperand(obj_reg, offset));
+    GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instruction,
+                                                            Location root,
+                                                            vixl::Register obj,
+                                                            uint32_t offset) {
+  Register root_reg = RegisterFrom(root, Primitive::kPrimNot);
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+      // Baker's read barrier are used:
+      //
+      //   root = obj.field;
+      //   if (Thread::Current()->GetIsGcMarking()) {
+      //     root = ReadBarrier::Mark(root)
+      //   }
+
+      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+      __ Ldr(root_reg, MemOperand(obj, offset));
+      static_assert(
+          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+          "have different sizes.");
+      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                    "have different sizes.");
+
+      // Slow path used to mark the GC root `root`.
+      SlowPathCodeARM64* slow_path =
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root, root);
+      codegen_->AddSlowPath(slow_path);
+
+      MacroAssembler* masm = GetVIXLAssembler();
+      UseScratchRegisterScope temps(masm);
+      Register temp = temps.AcquireW();
+      // temp = Thread::Current()->GetIsGcMarking()
+      __ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArm64WordSize>().Int32Value()));
+      __ Cbnz(temp, slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+    } else {
+      // GC root loaded through a slow path for read barriers other
+      // than Baker's.
+      // /* GcRoot<mirror::Object>* */ root = obj + offset
+      __ Add(root_reg.X(), obj.X(), offset);
+      // /* mirror::Object* */ root = root->Read()
+      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+    }
+  } else {
+    // Plain GC root load with no read barrier.
+    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+    __ Ldr(root_reg, MemOperand(obj, offset));
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
+  }
+}
+
+void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                               Location ref,
+                                                               vixl::Register obj,
+                                                               uint32_t offset,
+                                                               Register temp,
+                                                               bool needs_null_check,
+                                                               bool use_load_acquire) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref = *(obj + offset)
+  Location no_index = Location::NoLocation();
+  GenerateReferenceLoadWithBakerReadBarrier(
+      instruction, ref, obj, offset, no_index, temp, needs_null_check, use_load_acquire);
+}
+
+void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                               Location ref,
+                                                               vixl::Register obj,
+                                                               uint32_t data_offset,
+                                                               Location index,
+                                                               Register temp,
+                                                               bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // Array cells are never volatile variables, therefore array loads
+  // never use Load-Acquire instructions on ARM64.
+  const bool use_load_acquire = false;
+
+  // /* HeapReference<Object> */ ref =
+  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  GenerateReferenceLoadWithBakerReadBarrier(
+      instruction, ref, obj, data_offset, index, temp, needs_null_check, use_load_acquire);
+}
+
+void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                   Location ref,
+                                                                   vixl::Register obj,
+                                                                   uint32_t offset,
+                                                                   Location index,
+                                                                   Register temp,
+                                                                   bool needs_null_check,
+                                                                   bool use_load_acquire) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+  // If `index` is a valid location, then we are emitting an array
+  // load, so we shouldn't be using a Load Acquire instruction.
+  // In other words: `index.IsValid()` => `!use_load_acquire`.
+  DCHECK(!index.IsValid() || !use_load_acquire);
+
+  MacroAssembler* masm = GetVIXLAssembler();
+  UseScratchRegisterScope temps(masm);
+
+  // In slow path based read barriers, the read barrier call is
+  // inserted after the original load. However, in fast path based
+  // Baker's read barriers, we need to perform the load of
+  // mirror::Object::monitor_ *before* the original reference load.
+  // This load-load ordering is required by the read barrier.
+  // The fast path/slow path (for Baker's algorithm) should look like:
+  //
+  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //   HeapReference<Object> ref = *src;  // Original reference load.
+  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   if (is_gray) {
+  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  //   }
+  //
+  // Note: the original implementation in ReadBarrier::Barrier is
+  // slightly more complex as it performs additional checks that we do
+  // not do here for performance reasons.
+
+  Primitive::Type type = Primitive::kPrimNot;
+  Register ref_reg = RegisterFrom(ref, type);
+  DCHECK(obj.IsW());
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  // /* int32_t */ monitor = obj->monitor_
+  __ Ldr(temp, HeapOperand(obj, monitor_offset));
+  if (needs_null_check) {
+    MaybeRecordImplicitNullCheck(instruction);
+  }
+  // /* LockWord */ lock_word = LockWord(monitor)
+  static_assert(sizeof(LockWord) == sizeof(int32_t),
+                "art::LockWord and int32_t have different sizes.");
+  // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
+  __ Lsr(temp, temp, LockWord::kReadBarrierStateShift);
+  __ And(temp, temp, Operand(LockWord::kReadBarrierStateMask));
+  static_assert(
+      LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
+      "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
+
+  // Introduce a dependency on the high bits of rb_state, which shall
+  // be all zeroes, to prevent load-load reordering, and without using
+  // a memory barrier (which would be more expensive).
+  // temp2 = rb_state & ~LockWord::kReadBarrierStateMask = 0
+  Register temp2 = temps.AcquireW();
+  __ Bic(temp2, temp, Operand(LockWord::kReadBarrierStateMask));
+  // obj is unchanged by this operation, but its value now depends on
+  // temp2, which depends on temp.
+  __ Add(obj, obj, Operand(temp2));
+  temps.Release(temp2);
+
+  // The actual reference load.
+  if (index.IsValid()) {
+    static_assert(
+        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+    temp2 = temps.AcquireW();
+    // /* HeapReference<Object> */ ref =
+    //     *(obj + offset + index * sizeof(HeapReference<Object>))
+    MemOperand source = HeapOperand(obj);
+    if (index.IsConstant()) {
+      uint32_t computed_offset =
+          offset + (Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type));
+      source = HeapOperand(obj, computed_offset);
+    } else {
+      __ Add(temp2, obj, offset);
+      source = HeapOperand(temp2, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
+    }
+    Load(type, ref_reg, source);
+    temps.Release(temp2);
+  } else {
+    // /* HeapReference<Object> */ ref = *(obj + offset)
+    MemOperand field = HeapOperand(obj, offset);
+    if (use_load_acquire) {
+      LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
+    } else {
+      Load(type, ref_reg, field);
+    }
+  }
+
+  // Object* ref = ref_addr->AsMirrorPtr()
+  GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+
+  // Slow path used to mark the object `ref` when it is gray.
+  SlowPathCodeARM64* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref, ref);
+  AddSlowPath(slow_path);
+
+  // if (rb_state == ReadBarrier::gray_ptr_)
+  //   ref = ReadBarrier::Mark(ref);
+  __ Cmp(temp, ReadBarrier::gray_ptr_);
+  __ B(eq, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
+                                                 Location out,
+                                                 Location ref,
+                                                 Location obj,
+                                                 uint32_t offset,
+                                                 Location index) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the reference load.
+  //
   // If heap poisoning is enabled, the unpoisoning of the loaded
   // reference will be carried out by the runtime within the slow
   // path.
@@ -4628,61 +4970,68 @@
       ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
   AddSlowPath(slow_path);
 
-  // TODO: When read barrier has a fast path, add it here.
-  /* Currently the read barrier call is inserted after the original load.
-   * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
-   * original load. This load-load ordering is required by the read barrier.
-   * The fast path/slow path (for Baker's algorithm) should look like:
-   *
-   * bool isGray = obj.LockWord & kReadBarrierMask;
-   * lfence;  // load fence or artificial data dependence to prevent load-load reordering
-   * ref = obj.field;    // this is the original load
-   * if (isGray) {
-   *   ref = Mark(ref);  // ideally the slow path just does Mark(ref)
-   * }
-   */
-
   __ B(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
-void CodeGeneratorARM64::MaybeGenerateReadBarrier(HInstruction* instruction,
-                                                  Location out,
-                                                  Location ref,
-                                                  Location obj,
-                                                  uint32_t offset,
-                                                  Location index) {
+void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                                      Location out,
+                                                      Location ref,
+                                                      Location obj,
+                                                      uint32_t offset,
+                                                      Location index) {
   if (kEmitCompilerReadBarrier) {
+    // Baker's read barriers shall be handled by the fast path
+    // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
+    DCHECK(!kUseBakerReadBarrier);
     // If heap poisoning is enabled, unpoisoning will be taken care of
     // by the runtime within the slow path.
-    GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
   } else if (kPoisonHeapReferences) {
     GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
   }
 }
 
-void CodeGeneratorARM64::GenerateReadBarrierForRoot(HInstruction* instruction,
-                                                    Location out,
-                                                    Location root) {
+void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+                                                        Location out,
+                                                        Location root) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the GC root load.
+  //
   // Note that GC roots are not affected by heap poisoning, so we do
   // not need to do anything special for this here.
   SlowPathCodeARM64* slow_path =
       new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
   AddSlowPath(slow_path);
 
-  // TODO: Implement a fast path for ReadBarrierForRoot, performing
-  // the following operation (for Baker's algorithm):
-  //
-  //   if (thread.tls32_.is_gc_marking) {
-  //     root = Mark(root);
-  //   }
-
   __ B(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
+void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  uint32_t method_offset = 0;
+  if (instruction->GetTableKind() == HClassTableGet::kVTable) {
+    method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+        instruction->GetIndex(), kArm64PointerSize).SizeValue();
+  } else {
+    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+        instruction->GetIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value();
+  }
+  __ Ldr(XRegisterFrom(locations->Out()),
+         MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
+}
+
+
+
 #undef __
 #undef QUICK_ENTRY_POINT
 
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 8eb9fcc..a9d1bbd 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -208,14 +208,53 @@
 
  private:
   void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::Register class_reg);
-  void GenerateMemoryBarrier(MemBarrierKind kind);
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* instr);
+
   void HandleFieldSet(HInstruction* instruction,
                       const FieldInfo& field_info,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleCondition(HCondition* instruction);
+
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a read barrier and
+  // shall be a register in that case; it may be an invalid location
+  // otherwise.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location maybe_temp);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a Baker's (fast
+  // path) read barrier and shall be a register in that case; it may
+  // be an invalid location otherwise.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location maybe_temp);
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers (if any).
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               vixl::Register obj,
+                               uint32_t offset);
+
   void HandleShift(HBinaryOperation* instr);
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
@@ -337,6 +376,8 @@
   // Emit a write barrier.
   void MarkGCCard(vixl::Register object, vixl::Register value, bool value_can_be_null);
 
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   // Register allocation.
 
   void SetupBlockedRegisters() const OVERRIDE;
@@ -386,9 +427,12 @@
   void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
 
   void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src);
-  void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
-  void LoadAcquire(HInstruction* instruction, vixl::CPURegister dst, const vixl::MemOperand& src);
-  void StoreRelease(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
+  void Store(Primitive::Type type, vixl::CPURegister src, const vixl::MemOperand& dst);
+  void LoadAcquire(HInstruction* instruction,
+                   vixl::CPURegister dst,
+                   const vixl::MemOperand& src,
+                   bool needs_null_check);
+  void StoreRelease(Primitive::Type type, vixl::CPURegister src, const vixl::MemOperand& dst);
 
   // Generate code to invoke a runtime entry point.
   void InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -423,7 +467,27 @@
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
-  // Generate a read barrier for a heap reference within `instruction`.
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             vixl::Register obj,
+                                             uint32_t offset,
+                                             vixl::Register temp,
+                                             bool needs_null_check,
+                                             bool use_load_acquire);
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference array load when Baker's read barriers are used.
+  void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             vixl::Register obj,
+                                             uint32_t data_offset,
+                                             Location index,
+                                             vixl::Register temp,
+                                             bool needs_null_check);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
   //
   // A read barrier for an object reference read from the heap is
   // implemented as a call to the artReadBarrierSlow runtime entry
@@ -440,23 +504,25 @@
   // When `index` is provided (i.e. for array accesses), the offset
   // value passed to artReadBarrierSlow is adjusted to take `index`
   // into account.
-  void GenerateReadBarrier(HInstruction* instruction,
-                           Location out,
-                           Location ref,
-                           Location obj,
-                           uint32_t offset,
-                           Location index = Location::NoLocation());
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
 
-  // If read barriers are enabled, generate a read barrier for a heap reference.
-  // If heap poisoning is enabled, also unpoison the reference in `out`.
-  void MaybeGenerateReadBarrier(HInstruction* instruction,
-                                Location out,
-                                Location ref,
-                                Location obj,
-                                uint32_t offset,
-                                Location index = Location::NoLocation());
+  // If read barriers are enabled, generate a read barrier for a heap
+  // reference using a slow path. If heap poisoning is enabled, also
+  // unpoison the reference in `out`.
+  void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                    Location out,
+                                    Location ref,
+                                    Location obj,
+                                    uint32_t offset,
+                                    Location index = Location::NoLocation());
 
-  // Generate a read barrier for a GC root within `instruction`.
+  // Generate a read barrier for a GC root within `instruction` using
+  // a slow path.
   //
   // A read barrier for an object reference GC root is implemented as
   // a call to the artReadBarrierForRootSlow runtime entry point,
@@ -466,9 +532,20 @@
   //
   // The `out` location contains the value returned by
   // artReadBarrierForRootSlow.
-  void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+  void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
 
  private:
+  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 vixl::Register obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 vixl::Register temp,
+                                                 bool needs_null_check,
+                                                 bool use_load_acquire);
+
   using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>;
   using MethodToLiteralMap = ArenaSafeMap<MethodReference,
                                           vixl::Literal<uint64_t>*,
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 5bd136a..85ffd66 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -614,6 +614,31 @@
     Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ false);
   } else if (loc1.IsDoubleStackSlot() && loc2.IsDoubleStackSlot()) {
     Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ true);
+  } else if ((loc1.IsRegister() && loc2.IsStackSlot()) ||
+             (loc1.IsStackSlot() && loc2.IsRegister())) {
+    Register reg = loc1.IsRegister() ? loc1.AsRegister<Register>()
+                                     : loc2.AsRegister<Register>();
+    intptr_t offset = loc1.IsStackSlot() ? loc1.GetStackIndex()
+                                         : loc2.GetStackIndex();
+    __ Move(TMP, reg);
+    __ LoadFromOffset(kLoadWord, reg, SP, offset);
+    __ StoreToOffset(kStoreWord, TMP, SP, offset);
+  } else if ((loc1.IsRegisterPair() && loc2.IsDoubleStackSlot()) ||
+             (loc1.IsDoubleStackSlot() && loc2.IsRegisterPair())) {
+    Register reg_l = loc1.IsRegisterPair() ? loc1.AsRegisterPairLow<Register>()
+                                           : loc2.AsRegisterPairLow<Register>();
+    Register reg_h = loc1.IsRegisterPair() ? loc1.AsRegisterPairHigh<Register>()
+                                           : loc2.AsRegisterPairHigh<Register>();
+    intptr_t offset_l = loc1.IsDoubleStackSlot() ? loc1.GetStackIndex()
+                                                 : loc2.GetStackIndex();
+    intptr_t offset_h = loc1.IsDoubleStackSlot() ? loc1.GetHighStackIndex(kMipsWordSize)
+                                                 : loc2.GetHighStackIndex(kMipsWordSize);
+    __ Move(TMP, reg_l);
+    __ Move(AT, reg_h);
+    __ LoadFromOffset(kLoadWord, reg_l, SP, offset_l);
+    __ LoadFromOffset(kLoadWord, reg_h, SP, offset_h);
+    __ StoreToOffset(kStoreWord, TMP, SP, offset_l);
+    __ StoreToOffset(kStoreWord, AT, SP, offset_h);
   } else {
     LOG(FATAL) << "Swap between " << loc1 << " and " << loc2 << " is unsupported";
   }
@@ -2245,13 +2270,13 @@
       locations->SetInAt(1, Location::RequiresFpuRegister());
       break;
   }
-  if (instruction->NeedsMaterialization()) {
+  if (!instruction->IsEmittedAtUseSite()) {
     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
 }
 
 void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) {
-  if (!instruction->NeedsMaterialization()) {
+  if (instruction->IsEmittedAtUseSite()) {
     return;
   }
 
@@ -3381,6 +3406,32 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderMIPS::VisitSelect(HSelect* select) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetInAt(1, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
+  if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
+    locations->SetInAt(2, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+void InstructionCodeGeneratorMIPS::VisitSelect(HSelect* select) {
+  LocationSummary* locations = select->GetLocations();
+  MipsLabel false_target;
+  GenerateTestAndBranch(select,
+                        /* condition_input_index */ 2,
+                        /* true_target */ nullptr,
+                        &false_target);
+  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+  __ Bind(&false_target);
+}
+
 void LocationsBuilderMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) {
   new (GetGraph()->GetArena()) LocationSummary(info);
 }
@@ -5236,6 +5287,14 @@
   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
 }
 
+void LocationsBuilderMIPS::VisitClassTableGet(HClassTableGet*) {
+  UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips";
+}
+
+void InstructionCodeGeneratorMIPS::VisitClassTableGet(HClassTableGet*) {
+  UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips";
+}
+
 #undef __
 #undef QUICK_ENTRY_POINT
 
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 0505486..3c928de 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -964,11 +964,15 @@
   return Location::NoLocation();
 }
 
-void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, GpuRegister value) {
+void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object,
+                                     GpuRegister value,
+                                     bool value_can_be_null) {
   Mips64Label done;
   GpuRegister card = AT;
   GpuRegister temp = TMP;
-  __ Beqzc(value, &done);
+  if (value_can_be_null) {
+    __ Beqzc(value, &done);
+  }
   __ LoadFromOffset(kLoadDoubleword,
                     card,
                     TR,
@@ -976,7 +980,9 @@
   __ Dsrl(temp, object, gc::accounting::CardTable::kCardShift);
   __ Daddu(temp, card, temp);
   __ Sb(card, temp, 0);
-  __ Bind(&done);
+  if (value_can_be_null) {
+    __ Bind(&done);
+  }
 }
 
 void CodeGeneratorMIPS64::SetupBlockedRegisters() const {
@@ -1601,7 +1607,7 @@
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         if (needs_write_barrier) {
           DCHECK_EQ(value_type, Primitive::kPrimNot);
-          codegen_->MarkGCCard(obj, value);
+          codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
         }
       } else {
         DCHECK_EQ(value_type, Primitive::kPrimNot);
@@ -1866,13 +1872,13 @@
       locations->SetInAt(1, Location::RequiresFpuRegister());
       break;
   }
-  if (instruction->NeedsMaterialization()) {
+  if (!instruction->IsEmittedAtUseSite()) {
     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
 }
 
 void InstructionCodeGeneratorMIPS64::HandleCondition(HCondition* instruction) {
-  if (!instruction->NeedsMaterialization()) {
+  if (instruction->IsEmittedAtUseSite()) {
     return;
   }
 
@@ -2742,6 +2748,32 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderMIPS64::VisitSelect(HSelect* select) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetInAt(1, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
+  if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
+    locations->SetInAt(2, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitSelect(HSelect* select) {
+  LocationSummary* locations = select->GetLocations();
+  Mips64Label false_target;
+  GenerateTestAndBranch(select,
+                        /* condition_input_index */ 2,
+                        /* true_target */ nullptr,
+                        &false_target);
+  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+  __ Bind(&false_target);
+}
+
 void LocationsBuilderMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
   new (GetGraph()->GetArena()) LocationSummary(info);
 }
@@ -2827,7 +2859,8 @@
 }
 
 void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction,
-                                                    const FieldInfo& field_info) {
+                                                    const FieldInfo& field_info,
+                                                    bool value_can_be_null) {
   Primitive::Type type = field_info.GetFieldType();
   LocationSummary* locations = instruction->GetLocations();
   GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
@@ -2869,7 +2902,7 @@
   if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1))) {
     DCHECK(locations->InAt(1).IsRegister());
     GpuRegister src = locations->InAt(1).AsRegister<GpuRegister>();
-    codegen_->MarkGCCard(obj, src);
+    codegen_->MarkGCCard(obj, src, value_can_be_null);
   }
 }
 
@@ -2886,7 +2919,7 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
-  HandleFieldSet(instruction, instruction->GetFieldInfo());
+  HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
 }
 
 void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
@@ -3810,7 +3843,7 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
-  HandleFieldSet(instruction, instruction->GetFieldInfo());
+  HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
 }
 
 void LocationsBuilderMIPS64::VisitUnresolvedInstanceFieldGet(
@@ -4260,5 +4293,14 @@
   }
 }
 
+void LocationsBuilderMIPS64::VisitClassTableGet(HClassTableGet*) {
+  UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips64";
+}
+
+void InstructionCodeGeneratorMIPS64::VisitClassTableGet(HClassTableGet*) {
+  UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips64";
+}
+
 }  // namespace mips64
 }  // namespace art
+
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 140ff95..08e5615 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -227,7 +227,9 @@
   void HandleBinaryOp(HBinaryOperation* operation);
   void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* operation);
-  void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
+  void HandleFieldSet(HInstruction* instruction,
+                      const FieldInfo& field_info,
+                      bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
@@ -285,7 +287,7 @@
   Mips64Assembler* GetAssembler() OVERRIDE { return &assembler_; }
   const Mips64Assembler& GetAssembler() const OVERRIDE { return assembler_; }
 
-  void MarkGCCard(GpuRegister object, GpuRegister value);
+  void MarkGCCard(GpuRegister object, GpuRegister value, bool value_can_be_null);
 
   // Register allocation.
 
diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc
index 644a3fb..96fe2a1 100644
--- a/compiler/optimizing/code_generator_utils.cc
+++ b/compiler/optimizing/code_generator_utils.cc
@@ -96,7 +96,7 @@
 }
 
 bool IsBooleanValueOrMaterializedCondition(HInstruction* cond_input) {
-  return !cond_input->IsCondition() || cond_input->AsCondition()->NeedsMaterialization();
+  return !cond_input->IsCondition() || !cond_input->IsEmittedAtUseSite();
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index f7ccdd8..18d70da 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1218,11 +1218,14 @@
 }
 
 void CodeGeneratorX86::MoveLocation(Location dst, Location src, Primitive::Type dst_type) {
-  if (Primitive::Is64BitType(dst_type)) {
-    Move64(dst, src);
+  HParallelMove move(GetGraph()->GetArena());
+  if (dst_type == Primitive::kPrimLong && !src.IsConstant() && !src.IsFpuRegister()) {
+    move.AddMove(src.ToLow(), dst.ToLow(), Primitive::kPrimInt, nullptr);
+    move.AddMove(src.ToHigh(), dst.ToHigh(), Primitive::kPrimInt, nullptr);
   } else {
-    Move32(dst, src);
+    move.AddMove(src, dst, dst_type, nullptr);
   }
+  GetMoveResolver()->EmitNativeCode(&move);
 }
 
 void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) {
@@ -1559,10 +1562,36 @@
 
 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
-  GenerateTestAndBranch(deoptimize,
-                        /* condition_input_index */ 0,
-                        slow_path->GetEntryLabel(),
-                        /* false_target */ static_cast<Label*>(nullptr));
+  GenerateTestAndBranch<Label>(deoptimize,
+                               /* condition_input_index */ 0,
+                               slow_path->GetEntryLabel(),
+                               /* false_target */ nullptr);
+}
+
+void LocationsBuilderX86::VisitSelect(HSelect* select) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
+  Primitive::Type select_type = select->GetType();
+  HInstruction* cond = select->GetCondition();
+
+  if (Primitive::IsFloatingPointType(select_type)) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->SetInAt(1, Location::Any());
+  if (IsBooleanValueOrMaterializedCondition(cond)) {
+    locations->SetInAt(2, Location::Any());
+  }
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
+  LocationSummary* locations = select->GetLocations();
+  NearLabel false_target;
+  GenerateTestAndBranch<NearLabel>(
+      select, /* condition_input_index */ 2, /* true_target */ nullptr, &false_target);
+  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+  __ Bind(&false_target);
 }
 
 void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
@@ -1628,7 +1657,7 @@
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
-      if (cond->NeedsMaterialization()) {
+      if (!cond->IsEmittedAtUseSite()) {
         locations->SetOut(Location::RequiresRegister());
       }
       break;
@@ -1637,7 +1666,7 @@
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
       locations->SetInAt(1, Location::RequiresFpuRegister());
-      if (cond->NeedsMaterialization()) {
+      if (!cond->IsEmittedAtUseSite()) {
         locations->SetOut(Location::RequiresRegister());
       }
       break;
@@ -1645,7 +1674,7 @@
     default:
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::Any());
-      if (cond->NeedsMaterialization()) {
+      if (!cond->IsEmittedAtUseSite()) {
         // We need a byte register.
         locations->SetOut(Location::RegisterLocation(ECX));
       }
@@ -1654,7 +1683,7 @@
 }
 
 void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
-  if (!cond->NeedsMaterialization()) {
+  if (cond->IsEmittedAtUseSite()) {
     return;
   }
 
@@ -2657,7 +2686,11 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::Any());
+      if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
+        DCHECK(add->InputAt(1)->IsEmittedAtUseSite());
+      } else {
+        locations->SetInAt(1, Location::Any());
+      }
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -2721,7 +2754,7 @@
         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
         HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
-        DCHECK(!const_area->NeedsMaterialization());
+        DCHECK(const_area->IsEmittedAtUseSite());
         __ addss(first.AsFpuRegister<XmmRegister>(),
                  codegen_->LiteralFloatAddress(
                    const_area->GetConstant()->AsFloatConstant()->GetValue(),
@@ -2738,7 +2771,7 @@
         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
         HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
-        DCHECK(!const_area->NeedsMaterialization());
+        DCHECK(const_area->IsEmittedAtUseSite());
         __ addsd(first.AsFpuRegister<XmmRegister>(),
                  codegen_->LiteralDoubleAddress(
                    const_area->GetConstant()->AsDoubleConstant()->GetValue(),
@@ -2769,7 +2802,11 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::Any());
+      if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
+        DCHECK(sub->InputAt(1)->IsEmittedAtUseSite());
+      } else {
+        locations->SetInAt(1, Location::Any());
+      }
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -2819,7 +2856,7 @@
         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
         HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
-        DCHECK(!const_area->NeedsMaterialization());
+        DCHECK(const_area->IsEmittedAtUseSite());
         __ subss(first.AsFpuRegister<XmmRegister>(),
                  codegen_->LiteralFloatAddress(
                    const_area->GetConstant()->AsFloatConstant()->GetValue(),
@@ -2836,7 +2873,7 @@
         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
         HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
-        DCHECK(!const_area->NeedsMaterialization());
+        DCHECK(const_area->IsEmittedAtUseSite());
         __ subsd(first.AsFpuRegister<XmmRegister>(),
                  codegen_->LiteralDoubleAddress(
                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
@@ -2879,7 +2916,11 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::Any());
+      if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
+        DCHECK(mul->InputAt(1)->IsEmittedAtUseSite());
+      } else {
+        locations->SetInAt(1, Location::Any());
+      }
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -3000,7 +3041,7 @@
         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
         HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
-        DCHECK(!const_area->NeedsMaterialization());
+        DCHECK(const_area->IsEmittedAtUseSite());
         __ mulss(first.AsFpuRegister<XmmRegister>(),
                  codegen_->LiteralFloatAddress(
                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
@@ -3018,7 +3059,7 @@
         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
         HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
-        DCHECK(!const_area->NeedsMaterialization());
+        DCHECK(const_area->IsEmittedAtUseSite());
         __ mulsd(first.AsFpuRegister<XmmRegister>(),
                  codegen_->LiteralDoubleAddress(
                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
@@ -3372,7 +3413,11 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::Any());
+      if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
+        DCHECK(div->InputAt(1)->IsEmittedAtUseSite());
+      } else {
+        locations->SetInAt(1, Location::Any());
+      }
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -3399,7 +3444,7 @@
         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
         HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
-        DCHECK(!const_area->NeedsMaterialization());
+        DCHECK(const_area->IsEmittedAtUseSite());
         __ divss(first.AsFpuRegister<XmmRegister>(),
                  codegen_->LiteralFloatAddress(
                    const_area->GetConstant()->AsFloatConstant()->GetValue(),
@@ -3416,7 +3461,7 @@
         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
         HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
-        DCHECK(!const_area->NeedsMaterialization());
+        DCHECK(const_area->IsEmittedAtUseSite());
         __ divsd(first.AsFpuRegister<XmmRegister>(),
                  codegen_->LiteralDoubleAddress(
                    const_area->GetConstant()->AsDoubleConstant()->GetValue(),
@@ -3957,6 +4002,27 @@
 void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
 }
 
+void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  uint32_t method_offset = 0;
+  if (instruction->GetTableKind() == HClassTableGet::kVTable) {
+    method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+        instruction->GetIndex(), kX86PointerSize).SizeValue();
+  } else {
+    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+        instruction->GetIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value();
+  }
+  __ movl(locations->Out().AsRegister<Register>(),
+          Address(locations->InAt(0).AsRegister<Register>(), method_offset));
+}
+
 void LocationsBuilderX86::VisitNot(HNot* not_) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
@@ -5465,13 +5531,31 @@
   if (source.IsRegister()) {
     if (destination.IsRegister()) {
       __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
+    } else if (destination.IsFpuRegister()) {
+      __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
     } else {
       DCHECK(destination.IsStackSlot());
       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
     }
+  } else if (source.IsRegisterPair()) {
+      size_t elem_size = Primitive::ComponentSize(Primitive::kPrimInt);
+      // Create stack space for 2 elements.
+      __ subl(ESP, Immediate(2 * elem_size));
+      __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>());
+      __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>());
+      __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
+      // And remove the temporary stack space we allocated.
+      __ addl(ESP, Immediate(2 * elem_size));
   } else if (source.IsFpuRegister()) {
-    if (destination.IsFpuRegister()) {
+    if (destination.IsRegister()) {
+      __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
+    } else if (destination.IsFpuRegister()) {
       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
+    } else if (destination.IsRegisterPair()) {
+      XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
+      __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
+      __ psrlq(src_reg, Immediate(32));
+      __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
     } else if (destination.IsStackSlot()) {
       __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
     } else {
@@ -5488,7 +5572,11 @@
       MoveMemoryToMemory32(destination.GetStackIndex(), source.GetStackIndex());
     }
   } else if (source.IsDoubleStackSlot()) {
-    if (destination.IsFpuRegister()) {
+    if (destination.IsRegisterPair()) {
+      __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
+      __ movl(destination.AsRegisterPairHigh<Register>(),
+              Address(ESP, source.GetHighStackIndex(kX86WordSize)));
+    } else if (destination.IsFpuRegister()) {
       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
     } else {
       DCHECK(destination.IsDoubleStackSlot()) << destination;
@@ -5878,7 +5966,7 @@
   Location cls = locations->InAt(1);
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
-  Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
       locations->GetTemp(0) :
       Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -5896,7 +5984,7 @@
   }
 
   // /* HeapReference<Class> */ out = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc);
+  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
 
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
@@ -5920,7 +6008,7 @@
       NearLabel loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5950,7 +6038,7 @@
       }
       __ j(kEqual, &success);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
       __ testl(out, out);
       __ j(kNotEqual, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
@@ -5975,7 +6063,7 @@
       __ j(kEqual, &exact_check);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ out = out->component_type_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -6091,7 +6179,7 @@
   Location cls = locations->InAt(1);
   Location temp_loc = locations->GetTemp(0);
   Register temp = temp_loc.AsRegister<Register>();
-  Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
       locations->GetTemp(1) :
       Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -6118,7 +6206,7 @@
   }
 
   // /* HeapReference<Class> */ temp = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
+  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
 
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
@@ -6141,7 +6229,7 @@
       NearLabel loop, compare_classes;
       __ Bind(&loop);
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // to the `compare_classes` label to compare it with the checked
@@ -6154,7 +6242,8 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&compare_classes);
@@ -6181,7 +6270,7 @@
       __ j(kEqual, &done);
 
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
@@ -6193,7 +6282,8 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -6211,7 +6301,7 @@
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
 
       // If the component type is not null (i.e. the object is indeed
       // an array), jump to label `check_non_primitive_component_type`
@@ -6225,7 +6315,8 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&check_non_primitive_component_type);
@@ -6233,7 +6324,8 @@
       __ j(kEqual, &done);
       // Same comment as above regarding `temp` and the slow path.
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -6421,23 +6513,24 @@
 void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction* instruction,
                                                                    Location out,
                                                                    uint32_t offset,
-                                                                   Location temp) {
+                                                                   Location maybe_temp) {
   Register out_reg = out.AsRegister<Register>();
   if (kEmitCompilerReadBarrier) {
+    DCHECK(maybe_temp.IsRegister()) << maybe_temp;
     if (kUseBakerReadBarrier) {
       // Load with fast path based Baker's read barrier.
       // /* HeapReference<Object> */ out = *(out + offset)
       codegen_->GenerateFieldLoadWithBakerReadBarrier(
-          instruction, out, out_reg, offset, temp, /* needs_null_check */ false);
+          instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
     } else {
       // Load with slow path based read barrier.
-      // Save the value of `out` into `temp` before overwriting it
+      // Save the value of `out` into `maybe_temp` before overwriting it
       // in the following move operation, as we will need it for the
       // read barrier below.
-      __ movl(temp.AsRegister<Register>(), out_reg);
+      __ movl(maybe_temp.AsRegister<Register>(), out_reg);
       // /* HeapReference<Object> */ out = *(out + offset)
       __ movl(out_reg, Address(out_reg, offset));
-      codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset);
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
     }
   } else {
     // Plain load with no read barrier.
@@ -6451,15 +6544,16 @@
                                                                     Location out,
                                                                     Location obj,
                                                                     uint32_t offset,
-                                                                    Location temp) {
+                                                                    Location maybe_temp) {
   Register out_reg = out.AsRegister<Register>();
   Register obj_reg = obj.AsRegister<Register>();
   if (kEmitCompilerReadBarrier) {
     if (kUseBakerReadBarrier) {
+      DCHECK(maybe_temp.IsRegister()) << maybe_temp;
       // Load with fast path based Baker's read barrier.
       // /* HeapReference<Object> */ out = *(obj + offset)
       codegen_->GenerateFieldLoadWithBakerReadBarrier(
-          instruction, out, obj_reg, offset, temp, /* needs_null_check */ false);
+          instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
     } else {
       // Load with slow path based read barrier.
       // /* HeapReference<Object> */ out = *(obj + offset)
@@ -6859,7 +6953,7 @@
   locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant()));
 
   // If we don't need to be materialized, we only need the inputs to be set.
-  if (!insn->NeedsMaterialization()) {
+  if (insn->IsEmittedAtUseSite()) {
     return;
   }
 
@@ -6879,7 +6973,7 @@
 }
 
 void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromConstantTable* insn) {
-  if (!insn->NeedsMaterialization()) {
+  if (insn->IsEmittedAtUseSite()) {
     return;
   }
 
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 43e9543..0aef478 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -233,23 +233,29 @@
   //   out <- *(out + offset)
   //
   // while honoring heap poisoning and/or read barriers (if any).
-  // Register `temp` is used when generating a read barrier.
+  //
+  // Location `maybe_temp` is used when generating a read barrier and
+  // shall be a register in that case; it may be an invalid location
+  // otherwise.
   void GenerateReferenceLoadOneRegister(HInstruction* instruction,
                                         Location out,
                                         uint32_t offset,
-                                        Location temp);
+                                        Location maybe_temp);
   // Generate a heap reference load using two different registers
   // `out` and `obj`:
   //
   //   out <- *(obj + offset)
   //
   // while honoring heap poisoning and/or read barriers (if any).
-  // Register `temp` is used when generating a Baker's read barrier.
+  //
+  // Location `maybe_temp` is used when generating a Baker's (fast
+  // path) read barrier and shall be a register in that case; it may
+  // be an invalid location otherwise.
   void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
                                          Location out,
                                          Location obj,
                                          uint32_t offset,
-                                         Location temp);
+                                         Location maybe_temp);
   // Generate a GC root reference load:
   //
   //   root <- *(obj + offset)
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 2ce2d91..86ffb0f 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1558,10 +1558,36 @@
 
 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
-  GenerateTestAndBranch(deoptimize,
-                        /* condition_input_index */ 0,
-                        slow_path->GetEntryLabel(),
-                        /* false_target */ static_cast<Label*>(nullptr));
+  GenerateTestAndBranch<Label>(deoptimize,
+                               /* condition_input_index */ 0,
+                               slow_path->GetEntryLabel(),
+                               /* false_target */ nullptr);
+}
+
+void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetInAt(1, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
+  if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
+    locations->SetInAt(2, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
+  LocationSummary* locations = select->GetLocations();
+  NearLabel false_target;
+  GenerateTestAndBranch<NearLabel>(select,
+                                   /* condition_input_index */ 2,
+                                   /* true_target */ nullptr,
+                                   &false_target);
+  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+  __ Bind(&false_target);
 }
 
 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
@@ -1638,13 +1664,13 @@
       locations->SetInAt(1, Location::Any());
       break;
   }
-  if (cond->NeedsMaterialization()) {
+  if (!cond->IsEmittedAtUseSite()) {
     locations->SetOut(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
-  if (!cond->NeedsMaterialization()) {
+  if (cond->IsEmittedAtUseSite()) {
     return;
   }
 
@@ -3959,6 +3985,27 @@
   // Nothing to do, the method is already at its location.
 }
 
+void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  uint32_t method_offset = 0;
+  if (instruction->GetTableKind() == HClassTableGet::kVTable) {
+    method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+        instruction->GetIndex(), kX86_64PointerSize).SizeValue();
+  } else {
+    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+        instruction->GetIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
+  }
+  __ movq(locations->Out().AsRegister<CpuRegister>(),
+          Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
+}
+
 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
@@ -5490,7 +5537,7 @@
   Location cls = locations->InAt(1);
   Location out_loc =  locations->Out();
   CpuRegister out = out_loc.AsRegister<CpuRegister>();
-  Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
       locations->GetTemp(0) :
       Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -5508,7 +5555,7 @@
   }
 
   // /* HeapReference<Class> */ out = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc);
+  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
 
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
@@ -5537,7 +5584,7 @@
       NearLabel loop, success;
       __ Bind(&loop);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5567,7 +5614,7 @@
       }
       __ j(kEqual, &success);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
       __ testl(out, out);
       __ j(kNotEqual, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
@@ -5592,7 +5639,7 @@
       __ j(kEqual, &exact_check);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ out = out->component_type_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5708,7 +5755,7 @@
   Location cls = locations->InAt(1);
   Location temp_loc = locations->GetTemp(0);
   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
-  Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
       locations->GetTemp(1) :
       Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -5735,7 +5782,7 @@
   }
 
   // /* HeapReference<Class> */ temp = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
+  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
 
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
@@ -5758,7 +5805,7 @@
       NearLabel loop, compare_classes;
       __ Bind(&loop);
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // to the `compare_classes` label to compare it with the checked
@@ -5771,7 +5818,8 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&compare_classes);
@@ -5798,7 +5846,7 @@
       __ j(kEqual, &done);
 
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
@@ -5810,7 +5858,8 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -5828,7 +5877,7 @@
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
 
       // If the component type is not null (i.e. the object is indeed
       // an array), jump to label `check_non_primitive_component_type`
@@ -5842,7 +5891,8 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&check_non_primitive_component_type);
@@ -5850,7 +5900,8 @@
       __ j(kEqual, &done);
       // Same comment as above regarding `temp` and the slow path.
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -6020,23 +6071,24 @@
 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
                                                                       Location out,
                                                                       uint32_t offset,
-                                                                      Location temp) {
+                                                                      Location maybe_temp) {
   CpuRegister out_reg = out.AsRegister<CpuRegister>();
   if (kEmitCompilerReadBarrier) {
+    DCHECK(maybe_temp.IsRegister()) << maybe_temp;
     if (kUseBakerReadBarrier) {
       // Load with fast path based Baker's read barrier.
       // /* HeapReference<Object> */ out = *(out + offset)
       codegen_->GenerateFieldLoadWithBakerReadBarrier(
-          instruction, out, out_reg, offset, temp, /* needs_null_check */ false);
+          instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
     } else {
       // Load with slow path based read barrier.
-      // Save the value of `out` into `temp` before overwriting it
+      // Save the value of `out` into `maybe_temp` before overwriting it
       // in the following move operation, as we will need it for the
       // read barrier below.
-      __ movl(temp.AsRegister<CpuRegister>(), out_reg);
+      __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
       // /* HeapReference<Object> */ out = *(out + offset)
       __ movl(out_reg, Address(out_reg, offset));
-      codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset);
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
     }
   } else {
     // Plain load with no read barrier.
@@ -6050,15 +6102,16 @@
                                                                        Location out,
                                                                        Location obj,
                                                                        uint32_t offset,
-                                                                       Location temp) {
+                                                                       Location maybe_temp) {
   CpuRegister out_reg = out.AsRegister<CpuRegister>();
   CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
   if (kEmitCompilerReadBarrier) {
     if (kUseBakerReadBarrier) {
+      DCHECK(maybe_temp.IsRegister()) << maybe_temp;
       // Load with fast path based Baker's read barrier.
       // /* HeapReference<Object> */ out = *(obj + offset)
       codegen_->GenerateFieldLoadWithBakerReadBarrier(
-          instruction, out, obj_reg, offset, temp, /* needs_null_check */ false);
+          instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
     } else {
       // Load with slow path based read barrier.
       // /* HeapReference<Object> */ out = *(obj + offset)
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 82aabb0..9626590 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -227,23 +227,29 @@
   //   out <- *(out + offset)
   //
   // while honoring heap poisoning and/or read barriers (if any).
-  // Register `temp` is used when generating a read barrier.
+  //
+  // Location `maybe_temp` is used when generating a read barrier and
+  // shall be a register in that case; it may be an invalid location
+  // otherwise.
   void GenerateReferenceLoadOneRegister(HInstruction* instruction,
                                         Location out,
                                         uint32_t offset,
-                                        Location temp);
+                                        Location maybe_temp);
   // Generate a heap reference load using two different registers
   // `out` and `obj`:
   //
   //   out <- *(obj + offset)
   //
   // while honoring heap poisoning and/or read barriers (if any).
-  // Register `temp` is used when generating a Baker's read barrier.
+  //
+  // Location `maybe_temp` is used when generating a Baker's (fast
+  // path) read barrier and shall be a register in that case; it may
+  // be an invalid location otherwise.
   void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
                                          Location out,
                                          Location obj,
                                          uint32_t offset,
-                                         Location temp);
+                                         Location maybe_temp);
   // Generate a GC root reference load:
   //
   //   root <- *(obj + offset)
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 19d63de..322a577 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -671,10 +671,10 @@
     then_block->AddInstruction(new (&allocator) HReturn(constant0));
     else_block->AddInstruction(new (&allocator) HReturn(constant1));
 
-    ASSERT_TRUE(equal->NeedsMaterialization());
+    ASSERT_FALSE(equal->IsEmittedAtUseSite());
     TransformToSsa(graph);
     PrepareForRegisterAllocation(graph).Run();
-    ASSERT_FALSE(equal->NeedsMaterialization());
+    ASSERT_TRUE(equal->IsEmittedAtUseSite());
 
     auto hook_before_codegen = [](HGraph* graph_in) {
       HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index 91e4a99..feb8b20 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -133,8 +133,9 @@
 
   const uint32_t dominators[] = {
       kInvalidBlockId,
-      0,
-      kInvalidBlockId
+      3,
+      kInvalidBlockId,
+      0
   };
 
   TestCode(data1, dominators, sizeof(dominators) / sizeof(int));
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 3113677..962e77d 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -859,8 +859,12 @@
           value));
     }
   } else if (input->GetType() == Primitive::kPrimInt
-             && (input->IsPhi() || input->IsAnd() || input->IsOr() || input->IsXor())) {
-    // TODO: We need a data-flow analysis to determine if the Phi or
+             && (input->IsPhi() ||
+                 input->IsAnd() ||
+                 input->IsOr() ||
+                 input->IsXor() ||
+                 input->IsSelect())) {
+    // TODO: We need a data-flow analysis to determine if the Phi or Select or
     //       binary operation is actually Boolean. Allow for now.
   } else if (input->GetType() != Primitive::kPrimBoolean) {
     AddError(StringPrintf(
@@ -893,6 +897,11 @@
   HandleBooleanInput(instruction, 0);
 }
 
+void SSAChecker::VisitSelect(HSelect* instruction) {
+  VisitInstruction(instruction);
+  HandleBooleanInput(instruction, 2);
+}
+
 void SSAChecker::VisitBooleanNot(HBooleanNot* instruction) {
   VisitInstruction(instruction);
   HandleBooleanInput(instruction, 0);
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index 2e16bfe..8724cde 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -126,6 +126,7 @@
   void VisitCondition(HCondition* op) OVERRIDE;
   void VisitIf(HIf* instruction) OVERRIDE;
   void VisitPackedSwitch(HPackedSwitch* instruction) OVERRIDE;
+  void VisitSelect(HSelect* instruction) OVERRIDE;
   void VisitBooleanNot(HBooleanNot* instruction) OVERRIDE;
   void VisitConstant(HConstant* instruction) OVERRIDE;
   void VisitBoundType(HBoundType* instruction) OVERRIDE;
diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc
index d4b9b71..d530564 100644
--- a/compiler/optimizing/graph_test.cc
+++ b/compiler/optimizing/graph_test.cc
@@ -164,7 +164,7 @@
 
   // Ensure there is only one back edge.
   ASSERT_EQ(if_block->GetPredecessors().size(), 2u);
-  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block);
+  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block->GetSingleSuccessor());
   ASSERT_NE(if_block->GetPredecessors()[1], if_block);
 
   // Ensure the new block is the back edge.
@@ -199,7 +199,7 @@
 
   // Ensure there is only one back edge.
   ASSERT_EQ(if_block->GetPredecessors().size(), 2u);
-  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block);
+  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block->GetSingleSuccessor());
   ASSERT_NE(if_block->GetPredecessors()[1], if_block);
 
   // Ensure the new block is the back edge.
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 32c3a92..9d796c1 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -21,6 +21,7 @@
 #include <cctype>
 #include <sstream>
 
+#include "bounds_check_elimination.h"
 #include "code_generator.h"
 #include "dead_code_elimination.h"
 #include "disassembler.h"
@@ -505,6 +506,8 @@
     if (IsPass(LICM::kLoopInvariantCodeMotionPassName)
         || IsPass(HDeadCodeElimination::kFinalDeadCodeEliminationPassName)
         || IsPass(HDeadCodeElimination::kInitialDeadCodeEliminationPassName)
+        || IsPass(BoundsCheckElimination::kBoundsCheckEliminationPassName)
+        || IsPass(RegisterAllocator::kRegisterAllocatorPassName)
         || IsPass(SsaBuilder::kSsaBuilderPassName)) {
       HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
       if (info == nullptr) {
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 20c4f1f..35109fa 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -296,9 +296,29 @@
   return false;
 }
 
+HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker,
+                                                   HInstruction* receiver,
+                                                   uint32_t dex_pc) const {
+  ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0);
+  DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_");
+  return new (graph_->GetArena()) HInstanceFieldGet(
+      receiver,
+      Primitive::kPrimNot,
+      field->GetOffset(),
+      field->IsVolatile(),
+      field->GetDexFieldIndex(),
+      field->GetDeclaringClass()->GetDexClassDefIndex(),
+      *field->GetDexFile(),
+      handles_->NewHandle(field->GetDexCache()),
+      dex_pc);
+}
+
 bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
                                         ArtMethod* resolved_method,
                                         const InlineCache& ic) {
+  DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface())
+      << invoke_instruction->DebugName();
+
   const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
   uint32_t class_index = FindClassIndexIn(ic.GetMonomorphicType(), caller_dex_file);
   if (class_index == DexFile::kDexNoIndex) {
@@ -328,18 +348,8 @@
   }
 
   // We successfully inlined, now add a guard.
-  ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0);
-  DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_");
-  HInstanceFieldGet* field_get = new (graph_->GetArena()) HInstanceFieldGet(
-      receiver,
-      Primitive::kPrimNot,
-      field->GetOffset(),
-      field->IsVolatile(),
-      field->GetDexFieldIndex(),
-      field->GetDeclaringClass()->GetDexClassDefIndex(),
-      *field->GetDexFile(),
-      handles_->NewHandle(field->GetDexCache()),
-      invoke_instruction->GetDexPc());
+  HInstanceFieldGet* receiver_class = BuildGetReceiverClass(
+      class_linker, receiver, invoke_instruction->GetDexPc());
 
   bool is_referrer =
       (ic.GetMonomorphicType() == outermost_graph_->GetArtMethod()->GetDeclaringClass());
@@ -351,16 +361,16 @@
                                                                /* needs_access_check */ false,
                                                                /* is_in_dex_cache */ true);
 
-  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, field_get);
+  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class);
   HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
       compare, invoke_instruction->GetDexPc());
   // TODO: Extend reference type propagation to understand the guard.
   if (cursor != nullptr) {
-    bb_cursor->InsertInstructionAfter(field_get, cursor);
+    bb_cursor->InsertInstructionAfter(receiver_class, cursor);
   } else {
-    bb_cursor->InsertInstructionBefore(field_get, bb_cursor->GetFirstInstruction());
+    bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction());
   }
-  bb_cursor->InsertInstructionAfter(load_class, field_get);
+  bb_cursor->InsertInstructionAfter(load_class, receiver_class);
   bb_cursor->InsertInstructionAfter(compare, load_class);
   bb_cursor->InsertInstructionAfter(deoptimize, compare);
   deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
@@ -374,13 +384,101 @@
   return true;
 }
 
-bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction ATTRIBUTE_UNUSED,
+bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
                                         ArtMethod* resolved_method,
-                                        const InlineCache& ic ATTRIBUTE_UNUSED) {
-  // TODO
-  VLOG(compiler) << "Unimplemented polymorphic inlining for "
-                 << PrettyMethod(resolved_method);
-  return false;
+                                        const InlineCache& ic) {
+  DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface())
+      << invoke_instruction->DebugName();
+  // This optimization only works under JIT for now.
+  DCHECK(Runtime::Current()->UseJit());
+  if (graph_->GetInstructionSet() == kMips || graph_->GetInstructionSet() == kMips64) {
+    // TODO: Support HClassTableGet for mips and mips64.
+    return false;
+  }
+  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+  size_t pointer_size = class_linker->GetImagePointerSize();
+
+  DCHECK(resolved_method != nullptr);
+  ArtMethod* actual_method = nullptr;
+  // Check whether we are actually calling the same method among
+  // the different types seen.
+  for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
+    if (ic.GetTypeAt(i) == nullptr) {
+      break;
+    }
+    ArtMethod* new_method = nullptr;
+    if (invoke_instruction->IsInvokeInterface()) {
+      new_method = ic.GetTypeAt(i)->FindVirtualMethodForInterface(
+          resolved_method, pointer_size);
+    } else {
+      DCHECK(invoke_instruction->IsInvokeVirtual());
+      new_method = ic.GetTypeAt(i)->FindVirtualMethodForVirtual(
+          resolved_method, pointer_size);
+    }
+    if (actual_method == nullptr) {
+      actual_method = new_method;
+    } else if (actual_method != new_method) {
+      // Different methods, bailout.
+      return false;
+    }
+  }
+
+  HInstruction* receiver = invoke_instruction->InputAt(0);
+  HInstruction* cursor = invoke_instruction->GetPrevious();
+  HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
+
+  if (!TryInline(invoke_instruction, actual_method, /* do_rtp */ false)) {
+    return false;
+  }
+
+  // We successfully inlined, now add a guard.
+  HInstanceFieldGet* receiver_class = BuildGetReceiverClass(
+      class_linker, receiver, invoke_instruction->GetDexPc());
+
+  size_t method_offset = invoke_instruction->IsInvokeVirtual()
+      ? actual_method->GetVtableIndex()
+      : invoke_instruction->AsInvokeInterface()->GetImtIndex();
+
+  Primitive::Type type = Is64BitInstructionSet(graph_->GetInstructionSet())
+      ? Primitive::kPrimLong
+      : Primitive::kPrimInt;
+  HClassTableGet* class_table_get = new (graph_->GetArena()) HClassTableGet(
+      receiver_class,
+      type,
+      invoke_instruction->IsInvokeVirtual() ? HClassTableGet::kVTable : HClassTableGet::kIMTable,
+      method_offset,
+      invoke_instruction->GetDexPc());
+
+  HConstant* constant;
+  if (type == Primitive::kPrimLong) {
+    constant = graph_->GetLongConstant(
+        reinterpret_cast<intptr_t>(actual_method), invoke_instruction->GetDexPc());
+  } else {
+    constant = graph_->GetIntConstant(
+        reinterpret_cast<intptr_t>(actual_method), invoke_instruction->GetDexPc());
+  }
+
+  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(class_table_get, constant);
+  HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
+      compare, invoke_instruction->GetDexPc());
+  // TODO: Extend reference type propagation to understand the guard.
+  if (cursor != nullptr) {
+    bb_cursor->InsertInstructionAfter(receiver_class, cursor);
+  } else {
+    bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction());
+  }
+  bb_cursor->InsertInstructionAfter(class_table_get, receiver_class);
+  bb_cursor->InsertInstructionAfter(compare, class_table_get);
+  bb_cursor->InsertInstructionAfter(deoptimize, compare);
+  deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+
+  // Run type propagation to get the guard typed.
+  ReferenceTypePropagation rtp_fixup(graph_, handles_);
+  rtp_fixup.Run();
+
+  MaybeRecordStat(kInlinedPolymorphicCall);
+
+  return true;
 }
 
 bool HInliner::TryInline(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) {
@@ -419,7 +517,10 @@
   size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
   if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
     VLOG(compiler) << "Method " << PrettyMethod(method)
-                   << " is too big to inline";
+                   << " is too big to inline: "
+                   << code_item->insns_size_in_code_units_
+                   << " > "
+                   << inline_max_code_units;
     return false;
   }
 
@@ -639,9 +740,12 @@
 
   for (; !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
-    if (block->IsLoopHeader()) {
+
+    if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) {
+      // Don't inline methods with irreducible loops, they could prevent some
+      // optimizations to run.
       VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
-                     << " could not be inlined because it contains a loop";
+                     << " could not be inlined because it contains an irreducible loop";
       return false;
     }
 
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 8de510e..3c01751 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -85,6 +85,11 @@
                          bool same_dex_file,
                          bool do_rtp = true);
 
+  HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker,
+                                           HInstruction* receiver,
+                                           uint32_t dex_pc) const
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
   HGraph* const outermost_graph_;
   const DexCompilationUnit& outer_compilation_unit_;
   const DexCompilationUnit& caller_compilation_unit_;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 49fc8c7..7d3a723 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -76,6 +76,8 @@
   void VisitSub(HSub* instruction) OVERRIDE;
   void VisitUShr(HUShr* instruction) OVERRIDE;
   void VisitXor(HXor* instruction) OVERRIDE;
+  void VisitSelect(HSelect* select) OVERRIDE;
+  void VisitIf(HIf* instruction) OVERRIDE;
   void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE;
   void VisitInvoke(HInvoke* invoke) OVERRIDE;
   void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
@@ -559,14 +561,86 @@
 }
 
 void InstructionSimplifierVisitor::VisitBooleanNot(HBooleanNot* bool_not) {
-  HInstruction* parent = bool_not->InputAt(0);
-  if (parent->IsBooleanNot()) {
-    HInstruction* value = parent->InputAt(0);
-    // Replace (!(!bool_value)) with bool_value
-    bool_not->ReplaceWith(value);
+  HInstruction* input = bool_not->InputAt(0);
+  HInstruction* replace_with = nullptr;
+
+  if (input->IsIntConstant()) {
+    // Replace !(true/false) with false/true.
+    if (input->AsIntConstant()->IsOne()) {
+      replace_with = GetGraph()->GetIntConstant(0);
+    } else {
+      DCHECK(input->AsIntConstant()->IsZero());
+      replace_with = GetGraph()->GetIntConstant(1);
+    }
+  } else if (input->IsBooleanNot()) {
+    // Replace (!(!bool_value)) with bool_value.
+    replace_with = input->InputAt(0);
+  } else if (input->IsCondition() &&
+             // Don't change FP compares. The definition of compares involving
+             // NaNs forces the compares to be done as written by the user.
+             !Primitive::IsFloatingPointType(input->InputAt(0)->GetType())) {
+    // Replace condition with its opposite.
+    replace_with = GetGraph()->InsertOppositeCondition(input->AsCondition(), bool_not);
+  }
+
+  if (replace_with != nullptr) {
+    bool_not->ReplaceWith(replace_with);
     bool_not->GetBlock()->RemoveInstruction(bool_not);
-    // It is possible that `parent` is dead at this point but we leave
-    // its removal to DCE for simplicity.
+    RecordSimplification();
+  }
+}
+
+void InstructionSimplifierVisitor::VisitSelect(HSelect* select) {
+  HInstruction* replace_with = nullptr;
+  HInstruction* condition = select->GetCondition();
+  HInstruction* true_value = select->GetTrueValue();
+  HInstruction* false_value = select->GetFalseValue();
+
+  if (condition->IsBooleanNot()) {
+    // Change ((!cond) ? x : y) to (cond ? y : x).
+    condition = condition->InputAt(0);
+    std::swap(true_value, false_value);
+    select->ReplaceInput(false_value, 0);
+    select->ReplaceInput(true_value, 1);
+    select->ReplaceInput(condition, 2);
+    RecordSimplification();
+  }
+
+  if (true_value == false_value) {
+    // Replace (cond ? x : x) with (x).
+    replace_with = true_value;
+  } else if (condition->IsIntConstant()) {
+    if (condition->AsIntConstant()->IsOne()) {
+      // Replace (true ? x : y) with (x).
+      replace_with = true_value;
+    } else {
+      // Replace (false ? x : y) with (y).
+      DCHECK(condition->AsIntConstant()->IsZero());
+      replace_with = false_value;
+    }
+  } else if (true_value->IsIntConstant() && false_value->IsIntConstant()) {
+    if (true_value->AsIntConstant()->IsOne() && false_value->AsIntConstant()->IsZero()) {
+      // Replace (cond ? true : false) with (cond).
+      replace_with = condition;
+    } else if (true_value->AsIntConstant()->IsZero() && false_value->AsIntConstant()->IsOne()) {
+      // Replace (cond ? false : true) with (!cond).
+      replace_with = GetGraph()->InsertOppositeCondition(condition, select);
+    }
+  }
+
+  if (replace_with != nullptr) {
+    select->ReplaceWith(replace_with);
+    select->GetBlock()->RemoveInstruction(select);
+    RecordSimplification();
+  }
+}
+
+void InstructionSimplifierVisitor::VisitIf(HIf* instruction) {
+  HInstruction* condition = instruction->InputAt(0);
+  if (condition->IsBooleanNot()) {
+    // Swap successors if input is negated.
+    instruction->ReplaceInput(condition->InputAt(0), 0);
+    instruction->GetBlock()->SwapSuccessors();
     RecordSimplification();
   }
 }
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index c6da9a3..a6be324 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -131,6 +131,16 @@
       return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ?
           Intrinsics::kFloatFloatToRawIntBits : Intrinsics::kFloatIntBitsToFloat;
 
+    // Floating-point tests.
+    case kIntrinsicFloatIsInfinite:
+      return Intrinsics::kFloatIsInfinite;
+    case kIntrinsicDoubleIsInfinite:
+      return Intrinsics::kDoubleIsInfinite;
+    case kIntrinsicFloatIsNaN:
+      return Intrinsics::kFloatIsNaN;
+    case kIntrinsicDoubleIsNaN:
+      return Intrinsics::kDoubleIsNaN;
+
     // Bit manipulations.
     case kIntrinsicReverseBits:
       switch (GetType(method.d.data, true)) {
@@ -176,6 +186,46 @@
       }
 
     // Misc data processing.
+    case kIntrinsicBitCount:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::kPrimInt:
+          return Intrinsics::kIntegerBitCount;
+        case Primitive::kPrimLong:
+          return Intrinsics::kLongBitCount;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+    case kIntrinsicCompare:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::kPrimInt:
+          return Intrinsics::kIntegerCompare;
+        case Primitive::kPrimLong:
+          return Intrinsics::kLongCompare;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+    case kIntrinsicHighestOneBit:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::kPrimInt:
+          return Intrinsics::kIntegerHighestOneBit;
+        case Primitive::kPrimLong:
+          return Intrinsics::kLongHighestOneBit;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+    case kIntrinsicLowestOneBit:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::kPrimInt:
+          return Intrinsics::kIntegerLowestOneBit;
+        case Primitive::kPrimLong:
+          return Intrinsics::kLongLowestOneBit;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
     case kIntrinsicNumberOfLeadingZeros:
       switch (GetType(method.d.data, true)) {
         case Primitive::kPrimInt:
@@ -196,6 +246,16 @@
           LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
           UNREACHABLE();
       }
+    case kIntrinsicSignum:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::kPrimInt:
+          return Intrinsics::kIntegerSignum;
+        case Primitive::kPrimLong:
+          return Intrinsics::kLongSignum;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
 
     // Abs.
     case kIntrinsicAbsDouble:
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index b1fbf28..e8912b3 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -807,7 +807,8 @@
 }
 
 static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
-                                                HInvoke* invoke) {
+                                                HInvoke* invoke,
+                                                Primitive::Type type) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
@@ -817,11 +818,15 @@
   locations->SetInAt(3, Location::RequiresRegister());
   locations->SetInAt(4, Location::RequiresRegister());
 
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  // If heap poisoning is enabled, we don't want the unpoisoning
+  // operations to potentially clobber the output.
+  Location::OutputOverlap overlaps = (kPoisonHeapReferences && type == Primitive::kPrimNot)
+      ? Location::kOutputOverlap
+      : Location::kNoOutputOverlap;
+  locations->SetOut(Location::RequiresRegister(), overlaps);
 
   locations->AddTemp(Location::RequiresRegister());  // Pointer.
   locations->AddTemp(Location::RequiresRegister());  // Temp 1.
-  locations->AddTemp(Location::RequiresRegister());  // Temp 2.
 }
 
 static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM* codegen) {
@@ -856,7 +861,12 @@
 
   if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
     codegen->GetAssembler()->PoisonHeapReference(expected_lo);
-    codegen->GetAssembler()->PoisonHeapReference(value_lo);
+    if (value_lo == expected_lo) {
+      // Do not poison `value_lo`, as it is the same register as
+      // `expected_lo`, which has just been poisoned.
+    } else {
+      codegen->GetAssembler()->PoisonHeapReference(value_lo);
+    }
   }
 
   // do {
@@ -892,13 +902,18 @@
   __ mov(out, ShifterOperand(0), CC);
 
   if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
-    codegen->GetAssembler()->UnpoisonHeapReference(value_lo);
     codegen->GetAssembler()->UnpoisonHeapReference(expected_lo);
+    if (value_lo == expected_lo) {
+      // Do not unpoison `value_lo`, as it is the same register as
+      // `expected_lo`, which has just been unpoisoned.
+    } else {
+      codegen->GetAssembler()->UnpoisonHeapReference(value_lo);
+    }
   }
 }
 
 void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) {
-  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) {
   // The UnsafeCASObject intrinsic is missing a read barrier, and
@@ -906,16 +921,12 @@
   // Turn it off temporarily as a quick fix, until the read barrier is
   // implemented (see TODO in GenCAS below).
   //
-  // Also, the UnsafeCASObject intrinsic does not always work when heap
-  // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it
-  // off temporarily as a quick fix (b/26204023).
-  //
-  // TODO(rpl): Fix these two issues and re-enable this intrinsic.
-  if (kEmitCompilerReadBarrier || kPoisonHeapReferences) {
+  // TODO(rpl): Fix this issue and re-enable this intrinsic with read barriers.
+  if (kEmitCompilerReadBarrier) {
     return;
   }
 
-  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) {
   GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
@@ -1577,14 +1588,12 @@
 void IntrinsicCodeGeneratorARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
 UNIMPLEMENTED_INTRINSIC(IntegerReverse)
 UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes)
-UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
-UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
 UNIMPLEMENTED_INTRINSIC(LongReverse)
 UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
-UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
-UNIMPLEMENTED_INTRINSIC(LongRotateRight)
 UNIMPLEMENTED_INTRINSIC(ShortReverseBytes)
 UNIMPLEMENTED_INTRINSIC(MathMinDoubleDouble)
 UNIMPLEMENTED_INTRINSIC(MathMinFloatFloat)
@@ -1619,6 +1628,26 @@
 UNIMPLEMENTED_INTRINSIC(MathTan)
 UNIMPLEMENTED_INTRINSIC(MathTanh)
 
+UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
+UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
+UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
+UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
+
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
+UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
+UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
+UNIMPLEMENTED_INTRINSIC(IntegerSignum)
+UNIMPLEMENTED_INTRINSIC(LongSignum)
+
+// Rotate operations are handled as HRor instructions.
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateRight)
+
 #undef UNIMPLEMENTED_INTRINSIC
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 81cab86..d5ed585 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -752,21 +752,33 @@
   Register trg = RegisterFrom(trg_loc, type);
   bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
 
-  MemOperand mem_op(base.X(), offset);
-  if (is_volatile) {
-    if (use_acquire_release) {
-      codegen->LoadAcquire(invoke, trg, mem_op);
-    } else {
-      codegen->Load(type, trg, mem_op);
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
+    UseScratchRegisterScope temps(masm);
+    Register temp = temps.AcquireW();
+    codegen->GenerateArrayLoadWithBakerReadBarrier(
+        invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+    if (is_volatile && !use_acquire_release) {
       __ Dmb(InnerShareable, BarrierReads);
     }
   } else {
-    codegen->Load(type, trg, mem_op);
-  }
+    // Other cases.
+    MemOperand mem_op(base.X(), offset);
+    if (is_volatile) {
+      if (use_acquire_release) {
+        codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true);
+      } else {
+        codegen->Load(type, trg, mem_op);
+        __ Dmb(InnerShareable, BarrierReads);
+      }
+    } else {
+      codegen->Load(type, trg, mem_op);
+    }
 
-  if (type == Primitive::kPrimNot) {
-    DCHECK(trg.IsW());
-    codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+    if (type == Primitive::kPrimNot) {
+      DCHECK(trg.IsW());
+      codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+    }
   }
 }
 
@@ -974,7 +986,9 @@
                codegen_);
 }
 
-static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
+                                       HInvoke* invoke,
+                                       Primitive::Type type) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
@@ -984,7 +998,12 @@
   locations->SetInAt(3, Location::RequiresRegister());
   locations->SetInAt(4, Location::RequiresRegister());
 
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  // If heap poisoning is enabled, we don't want the unpoisoning
+  // operations to potentially clobber the output.
+  Location::OutputOverlap overlaps = (kPoisonHeapReferences && type == Primitive::kPrimNot)
+      ? Location::kOutputOverlap
+      : Location::kNoOutputOverlap;
+  locations->SetOut(Location::RequiresRegister(), overlaps);
 }
 
 static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) {
@@ -1015,7 +1034,12 @@
 
   if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
     codegen->GetAssembler()->PoisonHeapReference(expected);
-    codegen->GetAssembler()->PoisonHeapReference(value);
+    if (value.Is(expected)) {
+      // Do not poison `value`, as it is the same register as
+      // `expected`, which has just been poisoned.
+    } else {
+      codegen->GetAssembler()->PoisonHeapReference(value);
+    }
   }
 
   // do {
@@ -1026,10 +1050,15 @@
   vixl::Label loop_head, exit_loop;
   if (use_acquire_release) {
     __ Bind(&loop_head);
-    __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
-    // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`?
+    // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
+    // the reference stored in the object before attempting the CAS,
+    // similar to the one in the art::Unsafe_compareAndSwapObject JNI
+    // implementation.
+    //
     // Note that this code is not (yet) used when read barriers are
     // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
+    DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
+    __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
     __ Cmp(tmp_value, expected);
     __ B(&exit_loop, ne);
     __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
@@ -1060,16 +1089,21 @@
   __ Cset(out, eq);
 
   if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
-    codegen->GetAssembler()->UnpoisonHeapReference(value);
     codegen->GetAssembler()->UnpoisonHeapReference(expected);
+    if (value.Is(expected)) {
+      // Do not unpoison `value`, as it is the same register as
+      // `expected`, which has just been unpoisoned.
+    } else {
+      codegen->GetAssembler()->UnpoisonHeapReference(value);
+    }
   }
 }
 
 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) {
-  CreateIntIntIntIntIntToInt(arena_, invoke);
+  CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimInt);
 }
 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) {
-  CreateIntIntIntIntIntToInt(arena_, invoke);
+  CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimLong);
 }
 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
   // The UnsafeCASObject intrinsic is missing a read barrier, and
@@ -1077,16 +1111,12 @@
   // Turn it off temporarily as a quick fix, until the read barrier is
   // implemented (see TODO in GenCAS below).
   //
-  // Also, the UnsafeCASObject intrinsic does not always work when heap
-  // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it
-  // off temporarily as a quick fix (b/26204023).
-  //
-  // TODO(rpl): Fix these two issues and re-enable this intrinsic.
-  if (kEmitCompilerReadBarrier || kPoisonHeapReferences) {
+  // TODO(rpl): Fix this issue and re-enable this intrinsic with read barriers.
+  if (kEmitCompilerReadBarrier) {
     return;
   }
 
-  CreateIntIntIntIntIntToInt(arena_, invoke);
+  CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimNot);
 }
 
 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) {
@@ -1447,10 +1477,8 @@
 void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
-UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
-UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
-UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
-UNIMPLEMENTED_INTRINSIC(LongRotateRight)
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
@@ -1474,6 +1502,26 @@
 UNIMPLEMENTED_INTRINSIC(MathTan)
 UNIMPLEMENTED_INTRINSIC(MathTanh)
 
+UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
+UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
+UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
+UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
+
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
+UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
+UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
+UNIMPLEMENTED_INTRINSIC(IntegerSignum)
+UNIMPLEMENTED_INTRINSIC(LongSignum)
+
+// Rotate operations are handled as HRor instructions.
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateRight)
+
 #undef UNIMPLEMENTED_INTRINSIC
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index 2e87546..88217b3 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -23,21 +23,35 @@
 
 #define INTRINSICS_LIST(V) \
   V(DoubleDoubleToRawLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(DoubleIsInfinite, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(DoubleIsNaN, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(DoubleLongBitsToDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(FloatFloatToRawIntBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(FloatIsInfinite, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(FloatIsNaN, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(FloatIntBitsToFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerCompare, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerHighestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerLowestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerRotateLeft, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerSignum, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(LongReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(LongReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongCompare, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongHighestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongLowestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(LongNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(LongNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(LongRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(LongRotateLeft, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongSignum, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(ShortReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(MathAbsDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(MathAbsFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index bc126a2..0d9cf09 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -935,6 +935,9 @@
 void IntrinsicCodeGeneratorMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
+
 UNIMPLEMENTED_INTRINSIC(MathAbsDouble)
 UNIMPLEMENTED_INTRINSIC(MathAbsFloat)
 UNIMPLEMENTED_INTRINSIC(MathAbsInt)
@@ -1010,6 +1013,21 @@
 UNIMPLEMENTED_INTRINSIC(MathSinh)
 UNIMPLEMENTED_INTRINSIC(MathTan)
 UNIMPLEMENTED_INTRINSIC(MathTanh)
+
+UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
+UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
+UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
+UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
+
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
+UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
+UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
+UNIMPLEMENTED_INTRINSIC(IntegerSignum)
+UNIMPLEMENTED_INTRINSIC(LongSignum)
+
 #undef UNIMPLEMENTED_INTRINSIC
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 8b45ea7..cba84fa 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1141,7 +1141,8 @@
   }
 
   if (type == Primitive::kPrimNot) {
-    codegen->MarkGCCard(base, value);
+    bool value_can_be_null = true;  // TODO: Worth finding out this information?
+    codegen->MarkGCCard(base, value, value_can_be_null);
   }
 }
 
@@ -1287,6 +1288,12 @@
   DCHECK_NE(offset, out);
   DCHECK_NE(expected, out);
 
+  if (type == Primitive::kPrimNot) {
+    // Mark card for object assuming new value is stored.
+    bool value_can_be_null = true;  // TODO: Worth finding out this information?
+    codegen->MarkGCCard(base, value, value_can_be_null);
+  }
+
   // do {
   //   tmp_value = [tmp_ptr] - expected;
   // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
@@ -1724,6 +1731,9 @@
 void IntrinsicCodeGeneratorMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
+
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
 
@@ -1750,6 +1760,20 @@
 UNIMPLEMENTED_INTRINSIC(MathTan)
 UNIMPLEMENTED_INTRINSIC(MathTanh)
 
+UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
+UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
+UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
+UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
+
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
+UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
+UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
+UNIMPLEMENTED_INTRINSIC(IntegerSignum)
+UNIMPLEMENTED_INTRINSIC(LongSignum)
+
 #undef UNIMPLEMENTED_INTRINSIC
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 677f2e9..acc40bc 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2303,6 +2303,81 @@
   SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
 }
 
+static void CreateBitCountLocations(
+    ArenaAllocator* arena, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
+  if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
+    // Do nothing if there is no popcnt support. This results in generating
+    // a call for the intrinsic rather than direct code.
+    return;
+  }
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  if (is_long) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
+  locations->SetInAt(0, Location::Any());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void GenBitCount(X86Assembler* assembler, HInvoke* invoke, bool is_long) {
+  LocationSummary* locations = invoke->GetLocations();
+  Location src = locations->InAt(0);
+  Register out = locations->Out().AsRegister<Register>();
+
+  if (invoke->InputAt(0)->IsConstant()) {
+    // Evaluate this at compile time.
+    int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
+    value = is_long
+        ? POPCOUNT(static_cast<uint64_t>(value))
+        : POPCOUNT(static_cast<uint32_t>(value));
+    if (value == 0) {
+      __ xorl(out, out);
+    } else {
+      __ movl(out, Immediate(value));
+    }
+    return;
+  }
+
+  // Handle the non-constant cases.
+  if (!is_long) {
+    if (src.IsRegister()) {
+      __ popcntl(out, src.AsRegister<Register>());
+    } else {
+      DCHECK(src.IsStackSlot());
+      __ popcntl(out, Address(ESP, src.GetStackIndex()));
+    }
+  } else {
+    // The 64-bit case needs to worry about two parts.
+    Register temp = locations->GetTemp(0).AsRegister<Register>();
+    if (src.IsRegisterPair()) {
+      __ popcntl(temp, src.AsRegisterPairLow<Register>());
+      __ popcntl(out, src.AsRegisterPairHigh<Register>());
+    } else {
+      DCHECK(src.IsDoubleStackSlot());
+      __ popcntl(temp, Address(ESP, src.GetStackIndex()));
+      __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
+    }
+    __ addl(out, temp);
+  }
+}
+
+void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ false);
+}
+
+void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(GetAssembler(), invoke, /* is_long */ false);
+}
+
+void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
+  CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ true);
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(GetAssembler(), invoke, /* is_long */ true);
+}
+
 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
@@ -2518,11 +2593,27 @@
 
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
+
+UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
+UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
+UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
+UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
+
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
+UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
+UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
+UNIMPLEMENTED_INTRINSIC(IntegerSignum)
+UNIMPLEMENTED_INTRINSIC(LongSignum)
+
+// Rotate operations are handled as HRor instructions.
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
-UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 690cf3d..6ccc5d1 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2368,6 +2368,70 @@
   SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
 }
 
+static void CreateBitCountLocations(
+    ArenaAllocator* arena, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
+  if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
+    // Do nothing if there is no popcnt support. This results in generating
+    // a call for the intrinsic rather than direct code.
+    return;
+  }
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::Any());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void GenBitCount(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) {
+  LocationSummary* locations = invoke->GetLocations();
+  Location src = locations->InAt(0);
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+  if (invoke->InputAt(0)->IsConstant()) {
+    // Evaluate this at compile time.
+    int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
+    value = is_long
+        ? POPCOUNT(static_cast<uint64_t>(value))
+        : POPCOUNT(static_cast<uint32_t>(value));
+    if (value == 0) {
+      __ xorl(out, out);
+    } else {
+      __ movl(out, Immediate(value));
+    }
+    return;
+  }
+
+  if (src.IsRegister()) {
+    if (is_long) {
+      __ popcntq(out, src.AsRegister<CpuRegister>());
+    } else {
+      __ popcntl(out, src.AsRegister<CpuRegister>());
+    }
+  } else if (is_long) {
+    DCHECK(src.IsDoubleStackSlot());
+    __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
+  } else {
+    DCHECK(src.IsStackSlot());
+    __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateBitCountLocations(arena_, codegen_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(GetAssembler(), invoke, /* is_long */ false);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
+  CreateBitCountLocations(arena_, codegen_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(GetAssembler(), invoke, /* is_long */ true);
+}
+
 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
@@ -2528,6 +2592,22 @@
 }
 
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+
+UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
+UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
+UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
+UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
+
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
+UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
+UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
+UNIMPLEMENTED_INTRINSIC(IntegerSignum)
+UNIMPLEMENTED_INTRINSIC(LongSignum)
+
+// Rotate operations are handled as HRor instructions.
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 926f939..991f8f7 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -278,9 +278,9 @@
   // Test for the phi.
   interval = liveness.GetInstructionFromSsaIndex(3)->GetLiveInterval();
   range = interval->GetFirstRange();
-  // Instruction is consumed by the if.
+  // Instruction is input of non-materialized Equal and hence live until If.
   ASSERT_EQ(14u, range->GetStart());
-  ASSERT_EQ(17u, range->GetEnd());
+  ASSERT_EQ(19u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 2eabadf..c057eca 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -288,9 +288,10 @@
 
   // Make sure the loop has only one pre header. This simplifies SSA building by having
   // to just look at the pre header to know which locals are initialized at entry of the
-  // loop.
+  // loop. Also, don't allow the entry block to be a pre header: this simplifies inlining
+  // this graph.
   size_t number_of_incomings = header->GetPredecessors().size() - info->NumberOfBackEdges();
-  if (number_of_incomings != 1) {
+  if (number_of_incomings != 1 || (GetEntryBlock()->GetSingleSuccessor() == header)) {
     HBasicBlock* pre_header = new (arena_) HBasicBlock(this, header->GetDexPc());
     AddBlock(pre_header);
     pre_header->AddInstruction(new (arena_) HGoto(header->GetDexPc()));
@@ -721,6 +722,22 @@
   RemoveInstruction(initial);
 }
 
+void HBasicBlock::MoveInstructionBefore(HInstruction* insn, HInstruction* cursor) {
+  DCHECK(!cursor->IsPhi());
+  DCHECK(!insn->IsPhi());
+  DCHECK(!insn->IsControlFlow());
+  DCHECK(insn->CanBeMoved());
+  DCHECK(!insn->HasSideEffects());
+
+  HBasicBlock* from_block = insn->GetBlock();
+  HBasicBlock* to_block = cursor->GetBlock();
+  DCHECK(from_block != to_block);
+
+  from_block->RemoveInstruction(insn, /* ensure_safety */ false);
+  insn->SetBlock(to_block);
+  to_block->instructions_.InsertInstructionBefore(insn, cursor);
+}
+
 static void Add(HInstructionList* instruction_list,
                 HBasicBlock* block,
                 HInstruction* instruction) {
@@ -1837,6 +1854,7 @@
     DCHECK(GetBlocks()[0]->IsEntryBlock());
     DCHECK(GetBlocks()[2]->IsExitBlock());
     DCHECK(!body->IsExitBlock());
+    DCHECK(!body->IsInLoop());
     HInstruction* last = body->GetLastInstruction();
 
     invoke->GetBlock()->instructions_.AddAfter(invoke, body->GetInstructions());
@@ -1895,7 +1913,7 @@
     // Update the meta information surrounding blocks:
     // (1) the graph they are now in,
     // (2) the reverse post order of that graph,
-    // (3) the potential loop information they are now in,
+    // (3) their potential loop information, inner and outer,
     // (4) try block membership.
     // Note that we do not need to update catch phi inputs because they
     // correspond to the register file of the outer method which the inlinee
@@ -1924,15 +1942,24 @@
     for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
       HBasicBlock* current = it.Current();
       if (current != exit_block_ && current != entry_block_ && current != first) {
-        DCHECK(!current->IsInLoop());
         DCHECK(current->GetTryCatchInformation() == nullptr);
         DCHECK(current->GetGraph() == this);
         current->SetGraph(outer_graph);
         outer_graph->AddBlock(current);
         outer_graph->reverse_post_order_[++index_of_at] = current;
-        if (loop_info != nullptr) {
+        if (!current->IsInLoop()) {
           current->SetLoopInformation(loop_info);
-          for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) {
+        } else if (current->IsLoopHeader()) {
+          // Clear the information of which blocks are contained in that loop. Since the
+          // information is stored as a bit vector based on block ids, we have to update
+          // it, as those block ids were specific to the callee graph and we are now adding
+          // these blocks to the caller graph.
+          current->GetLoopInformation()->ClearAllBlocks();
+        }
+        if (current->IsInLoop()) {
+          for (HLoopInformationOutwardIterator loop_it(*current);
+               !loop_it.Done();
+               loop_it.Advance()) {
             loop_it.Current()->Add(current);
           }
         }
@@ -1945,7 +1972,9 @@
     outer_graph->AddBlock(to);
     outer_graph->reverse_post_order_[++index_of_at] = to;
     if (loop_info != nullptr) {
-      to->SetLoopInformation(loop_info);
+      if (!to->IsInLoop()) {
+        to->SetLoopInformation(loop_info);
+      }
       for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) {
         loop_it.Current()->Add(to);
       }
@@ -2208,10 +2237,7 @@
     SetSideEffects(GetSideEffects().Union(SideEffects::CanTriggerGC()));
   }
   // Adjust method's exception status from intrinsic table.
-  switch (exceptions) {
-    case kNoThrow: SetCanThrow(false); break;
-    case kCanThrow: SetCanThrow(true); break;
-  }
+  SetCanThrow(exceptions == kCanThrow);
 }
 
 bool HNewInstance::IsStringAlloc() const {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index e222ef7..d90c1fb 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -159,7 +159,7 @@
 
   static ReferenceTypeInfo CreateInvalid() { return ReferenceTypeInfo(); }
 
-  static bool IsValidHandle(TypeHandle handle) SHARED_REQUIRES(Locks::mutator_lock_) {
+  static bool IsValidHandle(TypeHandle handle) {
     return handle.GetReference() != nullptr;
   }
 
@@ -689,6 +689,10 @@
   void Add(HBasicBlock* block);
   void Remove(HBasicBlock* block);
 
+  void ClearAllBlocks() {
+    blocks_.ClearAllBits();
+  }
+
  private:
   // Internal recursive implementation of `Populate`.
   void PopulateRecursive(HBasicBlock* block);
@@ -1007,6 +1011,7 @@
   // Replace instruction `initial` with `replacement` within this block.
   void ReplaceAndRemoveInstructionWith(HInstruction* initial,
                                        HInstruction* replacement);
+  void MoveInstructionBefore(HInstruction* insn, HInstruction* cursor);
   void AddPhi(HPhi* phi);
   void InsertPhiAfter(HPhi* instruction, HPhi* cursor);
   // RemoveInstruction and RemovePhi delete a given instruction from the respective
@@ -1157,6 +1162,7 @@
   M(BoundsCheck, Instruction)                                           \
   M(BoundType, Instruction)                                             \
   M(CheckCast, Instruction)                                             \
+  M(ClassTableGet, Instruction)                                         \
   M(ClearException, Instruction)                                        \
   M(ClinitCheck, Instruction)                                           \
   M(Compare, BinaryOperation)                                           \
@@ -1216,6 +1222,7 @@
   M(UnresolvedInstanceFieldSet, Instruction)                            \
   M(UnresolvedStaticFieldGet, Instruction)                              \
   M(UnresolvedStaticFieldSet, Instruction)                              \
+  M(Select, Instruction)                                                \
   M(StoreLocal, Instruction)                                            \
   M(Sub, BinaryOperation)                                               \
   M(SuspendCheck, Instruction)                                          \
@@ -1815,6 +1822,7 @@
         dex_pc_(dex_pc),
         id_(-1),
         ssa_index_(-1),
+        emitted_at_use_site_(false),
         environment_(nullptr),
         locations_(nullptr),
         live_interval_(nullptr),
@@ -2077,6 +2085,9 @@
   // The caller must ensure that this is safe to do.
   void RemoveEnvironmentUsers();
 
+  bool IsEmittedAtUseSite() const { return emitted_at_use_site_; }
+  void MarkEmittedAtUseSite() { emitted_at_use_site_ = true; }
+
  protected:
   virtual const HUserRecord<HInstruction*> InputRecordAt(size_t i) const = 0;
   virtual void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) = 0;
@@ -2098,6 +2109,10 @@
   // When doing liveness analysis, instructions that have uses get an SSA index.
   int ssa_index_;
 
+  // If set, the machine code for this instruction is assumed to be generated by
+  // its users. Used by liveness analysis to compute use positions accordingly.
+  bool emitted_at_use_site_;
+
   // List of instructions that have this instruction as input.
   HUseList<HInstruction*> uses_;
 
@@ -2538,6 +2553,44 @@
   DISALLOW_COPY_AND_ASSIGN(HCurrentMethod);
 };
 
+// Fetches an ArtMethod from the virtual table or the interface method table
+// of a class.
+class HClassTableGet : public HExpression<1> {
+ public:
+  enum TableKind {
+    kVTable,
+    kIMTable,
+  };
+  HClassTableGet(HInstruction* cls,
+                 Primitive::Type type,
+                 TableKind kind,
+                 size_t index,
+                 uint32_t dex_pc)
+      : HExpression(type, SideEffects::None(), dex_pc),
+        index_(index),
+        table_kind_(kind) {
+    SetRawInputAt(0, cls);
+  }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+    return other->AsClassTableGet()->GetIndex() == index_ &&
+        other->AsClassTableGet()->GetTableKind() == table_kind_;
+  }
+
+  TableKind GetTableKind() const { return table_kind_; }
+  size_t GetIndex() const { return index_; }
+
+  DECLARE_INSTRUCTION(ClassTableGet);
+
+ private:
+  // The index of the ArtMethod in the table.
+  const size_t index_;
+  const TableKind table_kind_;
+
+  DISALLOW_COPY_AND_ASSIGN(HClassTableGet);
+};
+
 // PackedSwitch (jump table). A block ending with a PackedSwitch instruction will
 // have one successor for each entry in the switch table, and the final successor
 // will be the block containing the next Dex opcode.
@@ -2707,12 +2760,8 @@
  public:
   HCondition(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HBinaryOperation(Primitive::kPrimBoolean, first, second, SideEffects::None(), dex_pc),
-        needs_materialization_(true),
         bias_(ComparisonBias::kNoBias) {}
 
-  bool NeedsMaterialization() const { return needs_materialization_; }
-  void ClearNeedsMaterialization() { needs_materialization_ = false; }
-
   // For code generation purposes, returns whether this instruction is just before
   // `instruction`, and disregard moves in between.
   bool IsBeforeWhenDisregardMoves(HInstruction* instruction) const;
@@ -2744,10 +2793,6 @@
   }
 
  private:
-  // For register allocation purposes, returns whether this instruction needs to be
-  // materialized (that is, not just be in the processor flags).
-  bool needs_materialization_;
-
   // Needed if we merge a HCompare into a HCondition.
   ComparisonBias bias_;
 
@@ -3689,19 +3734,13 @@
     DCHECK(!IsStaticWithExplicitClinitCheck());
   }
 
-  HNewInstance* GetThisArgumentOfStringInit() const {
+  HInstruction* GetAndRemoveThisArgumentOfStringInit() {
     DCHECK(IsStringInit());
     size_t index = InputCount() - 1;
-    DCHECK(InputAt(index)->IsNewInstance());
-    return InputAt(index)->AsNewInstance();
-  }
-
-  void RemoveThisArgumentOfStringInit() {
-    DCHECK(IsStringInit());
-    size_t index = InputCount() - 1;
-    DCHECK(InputAt(index)->IsNewInstance());
+    HInstruction* input = InputAt(index);
     RemoveAsUserOfInput(index);
     inputs_.pop_back();
+    return input;
   }
 
   // Is this a call to a static method whose declaring class has an
@@ -5588,6 +5627,41 @@
   DISALLOW_COPY_AND_ASSIGN(HMonitorOperation);
 };
 
+class HSelect : public HExpression<3> {
+ public:
+  HSelect(HInstruction* condition,
+          HInstruction* true_value,
+          HInstruction* false_value,
+          uint32_t dex_pc)
+      : HExpression(HPhi::ToPhiType(true_value->GetType()), SideEffects::None(), dex_pc) {
+    DCHECK_EQ(HPhi::ToPhiType(true_value->GetType()), HPhi::ToPhiType(false_value->GetType()));
+
+    // First input must be `true_value` or `false_value` to allow codegens to
+    // use the SameAsFirstInput allocation policy. We make it `false_value`, so
+    // that architectures which implement HSelect as a conditional move also
+    // will not need to invert the condition.
+    SetRawInputAt(0, false_value);
+    SetRawInputAt(1, true_value);
+    SetRawInputAt(2, condition);
+  }
+
+  HInstruction* GetFalseValue() const { return InputAt(0); }
+  HInstruction* GetTrueValue() const { return InputAt(1); }
+  HInstruction* GetCondition() const { return InputAt(2); }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; }
+
+  bool CanBeNull() const OVERRIDE {
+    return GetTrueValue()->CanBeNull() || GetFalseValue()->CanBeNull();
+  }
+
+  DECLARE_INSTRUCTION(Select);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HSelect);
+};
+
 class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> {
  public:
   MoveOperands(Location source,
diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h
index 556217b..b1bf939 100644
--- a/compiler/optimizing/nodes_x86.h
+++ b/compiler/optimizing/nodes_x86.h
@@ -36,16 +36,12 @@
 class HX86LoadFromConstantTable : public HExpression<2> {
  public:
   HX86LoadFromConstantTable(HX86ComputeBaseMethodAddress* method_base,
-                            HConstant* constant,
-                            bool needs_materialization = true)
-      : HExpression(constant->GetType(), SideEffects::None(), kNoDexPc),
-        needs_materialization_(needs_materialization) {
+                            HConstant* constant)
+      : HExpression(constant->GetType(), SideEffects::None(), kNoDexPc) {
     SetRawInputAt(0, method_base);
     SetRawInputAt(1, constant);
   }
 
-  bool NeedsMaterialization() const { return needs_materialization_; }
-
   HX86ComputeBaseMethodAddress* GetBaseMethodAddress() const {
     return InputAt(0)->AsX86ComputeBaseMethodAddress();
   }
@@ -57,8 +53,6 @@
   DECLARE_INSTRUCTION(X86LoadFromConstantTable);
 
  private:
-  const bool needs_materialization_;
-
   DISALLOW_COPY_AND_ASSIGN(HX86LoadFromConstantTable);
 };
 
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index fffd005..bdc664b 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -38,7 +38,6 @@
 #include "base/dumpable.h"
 #include "base/macros.h"
 #include "base/timing_logger.h"
-#include "boolean_simplifier.h"
 #include "bounds_check_elimination.h"
 #include "builder.h"
 #include "code_generator.h"
@@ -73,6 +72,7 @@
 #include "reference_type_propagation.h"
 #include "register_allocator.h"
 #include "oat_quick_method_header.h"
+#include "select_generator.h"
 #include "sharpening.h"
 #include "side_effects_analysis.h"
 #include "ssa_builder.h"
@@ -512,7 +512,7 @@
       graph, stats, HDeadCodeElimination::kFinalDeadCodeEliminationPassName);
   HConstantFolding* fold1 = new (arena) HConstantFolding(graph);
   InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
-  HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph);
+  HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph);
   HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining");
   HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding_after_bce");
   SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
@@ -540,9 +540,9 @@
   MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, handles);
 
   HOptimization* optimizations2[] = {
-    // BooleanSimplifier depends on the InstructionSimplifier removing
+    // SelectGenerator depends on the InstructionSimplifier removing
     // redundant suspend checks to recognize empty blocks.
-    boolean_simplify,
+    select_generator,
     fold2,  // TODO: if we don't inline we can also skip fold2.
     side_effects,
     gvn,
@@ -676,7 +676,7 @@
     return nullptr;
   }
   codegen->GetAssembler()->cfi().SetEnabled(
-      compiler_driver->GetCompilerOptions().GetGenerateDebugInfo());
+      compiler_driver->GetCompilerOptions().GenerateAnyDebugInfo());
 
   PassObserver pass_observer(graph,
                              codegen.get(),
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index f8035aa..881beb4 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -51,6 +51,7 @@
   kNotCompiledVerificationError,
   kNotCompiledVerifyAtRuntime,
   kInlinedMonomorphicCall,
+  kInlinedPolymorphicCall,
   kMonomorphicCall,
   kPolymorphicCall,
   kMegamorphicCall,
@@ -118,6 +119,7 @@
       case kNotCompiledVerificationError : name = "NotCompiledVerificationError"; break;
       case kNotCompiledVerifyAtRuntime : name = "NotCompiledVerifyAtRuntime"; break;
       case kInlinedMonomorphicCall: name = "InlinedMonomorphicCall"; break;
+      case kInlinedPolymorphicCall: name = "InlinedPolymorphicCall"; break;
       case kMonomorphicCall: name = "MonomorphicCall"; break;
       case kPolymorphicCall: name = "PolymorphicCall"; break;
       case kMegamorphicCall: name = "kMegamorphicCall"; break;
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index 1394dfa..a2180bc 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -115,7 +115,10 @@
   void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) {
     InitializePCRelativeBasePointer();
     HX86LoadFromConstantTable* load_constant =
-        new (GetGraph()->GetArena()) HX86LoadFromConstantTable(base_, value, materialize);
+        new (GetGraph()->GetArena()) HX86LoadFromConstantTable(base_, value);
+    if (!materialize) {
+      load_constant->MarkEmittedAtUseSite();
+    }
     insn->GetBlock()->InsertInstructionBefore(load_constant, insn);
     insn->ReplaceInput(load_constant, input_index);
   }
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 63ef600..324d84f 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -127,24 +127,37 @@
   }
 }
 
-void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) {
-  bool needs_materialization = false;
-  if (!condition->GetUses().HasOnlyOneUse() || !condition->GetEnvUses().IsEmpty()) {
-    needs_materialization = true;
-  } else {
-    HInstruction* user = condition->GetUses().GetFirst()->GetUser();
-    if (!user->IsIf() && !user->IsDeoptimize()) {
-      needs_materialization = true;
+bool PrepareForRegisterAllocation::CanEmitConditionAt(HCondition* condition,
+                                                      HInstruction* user) const {
+  if (condition->GetNext() != user) {
+    return false;
+  }
+
+  if (user->IsIf() || user->IsDeoptimize()) {
+    return true;
+  }
+
+  if (user->IsSelect() && user->AsSelect()->GetCondition() == condition) {
+    if (GetGraph()->GetInstructionSet() == kX86) {
+      // Long values and long condition inputs result in 8 required core registers.
+      // We don't have that many on x86. Materialize the condition in such case.
+      return user->GetType() != Primitive::kPrimLong ||
+             condition->InputAt(1)->GetType() != Primitive::kPrimLong ||
+             condition->InputAt(1)->IsConstant();
     } else {
-      // TODO: if there is no intervening instructions with side-effect between this condition
-      // and the If instruction, we should move the condition just before the If.
-      if (condition->GetNext() != user) {
-        needs_materialization = true;
-      }
+      return true;
     }
   }
-  if (!needs_materialization) {
-    condition->ClearNeedsMaterialization();
+
+  return false;
+}
+
+void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) {
+  if (condition->HasOnlyOneNonEnvironmentUse()) {
+    HInstruction* user = condition->GetUses().GetFirst()->GetUser();
+    if (CanEmitConditionAt(condition, user)) {
+      condition->MarkEmittedAtUseSite();
+    }
   }
 }
 
@@ -165,7 +178,8 @@
   }
 }
 
-bool PrepareForRegisterAllocation::CanMoveClinitCheck(HInstruction* input, HInstruction* user) {
+bool PrepareForRegisterAllocation::CanMoveClinitCheck(HInstruction* input,
+                                                      HInstruction* user) const {
   // Determine if input and user come from the same dex instruction, so that we can move
   // the clinit check responsibility from one to the other, i.e. from HClinitCheck (user)
   // to HLoadClass (input), or from HClinitCheck (input) to HInvokeStaticOrDirect (user).
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index 9b24342..c8b8b0d 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -42,7 +42,8 @@
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
   void VisitNewInstance(HNewInstance* instruction) OVERRIDE;
 
-  bool CanMoveClinitCheck(HInstruction* input, HInstruction* user);
+  bool CanMoveClinitCheck(HInstruction* input, HInstruction* user) const;
+  bool CanEmitConditionAt(HCondition* condition, HInstruction* user) const;
 
   DISALLOW_COPY_AND_ASSIGN(PrepareForRegisterAllocation);
 };
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 527c242..779f319 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -23,21 +23,41 @@
 
 namespace art {
 
-class RTPVisitor : public HGraphDelegateVisitor {
+static inline ReferenceTypeInfo::TypeHandle GetRootHandle(StackHandleScopeCollection* handles,
+                                                          ClassLinker::ClassRoot class_root,
+                                                          ReferenceTypeInfo::TypeHandle* cache) {
+  if (!ReferenceTypeInfo::IsValidHandle(*cache)) {
+    // Mutator lock is required for NewHandle.
+    ClassLinker* linker = Runtime::Current()->GetClassLinker();
+    ScopedObjectAccess soa(Thread::Current());
+    *cache = handles->NewHandle(linker->GetClassRoot(class_root));
+  }
+  return *cache;
+}
+
+ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetObjectClassHandle() {
+  return GetRootHandle(handles_, ClassLinker::kJavaLangObject, &object_class_handle_);
+}
+
+ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetClassClassHandle() {
+  return GetRootHandle(handles_, ClassLinker::kJavaLangClass, &class_class_handle_);
+}
+
+ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetStringClassHandle() {
+  return GetRootHandle(handles_, ClassLinker::kJavaLangString, &string_class_handle_);
+}
+
+ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetThrowableClassHandle() {
+  return GetRootHandle(handles_, ClassLinker::kJavaLangThrowable, &throwable_class_handle_);
+}
+
+class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor {
  public:
   RTPVisitor(HGraph* graph,
-             StackHandleScopeCollection* handles,
-             ArenaVector<HInstruction*>* worklist,
-             ReferenceTypeInfo::TypeHandle object_class_handle,
-             ReferenceTypeInfo::TypeHandle class_class_handle,
-             ReferenceTypeInfo::TypeHandle string_class_handle,
-             ReferenceTypeInfo::TypeHandle throwable_class_handle)
+             HandleCache* handle_cache,
+             ArenaVector<HInstruction*>* worklist)
     : HGraphDelegateVisitor(graph),
-      handles_(handles),
-      object_class_handle_(object_class_handle),
-      class_class_handle_(class_class_handle),
-      string_class_handle_(string_class_handle),
-      throwable_class_handle_(throwable_class_handle),
+      handle_cache_(handle_cache),
       worklist_(worklist) {}
 
   void VisitNewInstance(HNewInstance* new_instance) OVERRIDE;
@@ -64,11 +84,7 @@
                                bool is_exact);
 
  private:
-  StackHandleScopeCollection* handles_;
-  ReferenceTypeInfo::TypeHandle object_class_handle_;
-  ReferenceTypeInfo::TypeHandle class_class_handle_;
-  ReferenceTypeInfo::TypeHandle string_class_handle_;
-  ReferenceTypeInfo::TypeHandle throwable_class_handle_;
+  HandleCache* handle_cache_;
   ArenaVector<HInstruction*>* worklist_;
 };
 
@@ -76,24 +92,9 @@
                                                    StackHandleScopeCollection* handles,
                                                    const char* name)
     : HOptimization(graph, name),
-      handles_(handles),
+      handle_cache_(handles),
       worklist_(graph->GetArena()->Adapter(kArenaAllocReferenceTypePropagation)) {
   worklist_.reserve(kDefaultWorklistSize);
-  // Mutator lock is required for NewHandle, but annotalysis ignores constructors.
-  ScopedObjectAccess soa(Thread::Current());
-  ClassLinker* linker = Runtime::Current()->GetClassLinker();
-  object_class_handle_ = handles_->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangObject));
-  string_class_handle_ = handles_->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangString));
-  class_class_handle_ = handles_->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangClass));
-  throwable_class_handle_ =
-      handles_->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangThrowable));
-
-  if (kIsDebugBuild) {
-    DCHECK(ReferenceTypeInfo::IsValidHandle(object_class_handle_));
-    DCHECK(ReferenceTypeInfo::IsValidHandle(class_class_handle_));
-    DCHECK(ReferenceTypeInfo::IsValidHandle(string_class_handle_));
-    DCHECK(ReferenceTypeInfo::IsValidHandle(throwable_class_handle_));
-  }
 }
 
 void ReferenceTypePropagation::ValidateTypes() {
@@ -137,13 +138,7 @@
 }
 
 void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) {
-  RTPVisitor visitor(graph_,
-                     handles_,
-                     &worklist_,
-                     object_class_handle_,
-                     class_class_handle_,
-                     string_class_handle_,
-                     throwable_class_handle_);
+  RTPVisitor visitor(graph_, &handle_cache_, &worklist_);
   // Handle Phis first as there might be instructions in the same block who depend on them.
   for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
     VisitPhi(it.Current()->AsPhi());
@@ -243,7 +238,7 @@
         ScopedObjectAccess soa(Thread::Current());
         HInstruction* insert_point = notNullBlock->GetFirstInstruction();
         ReferenceTypeInfo object_rti = ReferenceTypeInfo::Create(
-            object_class_handle_, /* is_exact */ true);
+            handle_cache_.GetObjectClassHandle(), /* is_exact */ true);
         if (ShouldCreateBoundType(insert_point, obj, object_rti, nullptr, notNullBlock)) {
           bound_type = new (graph_->GetArena()) HBoundType(obj);
           bound_type->SetUpperBound(object_rti, /* bound_can_be_null */ false);
@@ -395,9 +390,9 @@
   }
 }
 
-void RTPVisitor::SetClassAsTypeInfo(HInstruction* instr,
-                                    mirror::Class* klass,
-                                    bool is_exact) {
+void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* instr,
+                                                              mirror::Class* klass,
+                                                              bool is_exact) {
   if (instr->IsInvokeStaticOrDirect() && instr->AsInvokeStaticOrDirect()->IsStringInit()) {
     // Calls to String.<init> are replaced with a StringFactory.
     if (kIsDebugBuild) {
@@ -422,22 +417,22 @@
           << "Expected String.<init>: " << PrettyMethod(method);
     }
     instr->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(string_class_handle_, /* is_exact */ true));
+        ReferenceTypeInfo::Create(handle_cache_->GetStringClassHandle(), /* is_exact */ true));
   } else if (klass != nullptr) {
     ScopedObjectAccess soa(Thread::Current());
-    ReferenceTypeInfo::TypeHandle handle = handles_->NewHandle(klass);
+    ReferenceTypeInfo::TypeHandle handle = handle_cache_->NewHandle(klass);
     is_exact = is_exact || klass->CannotBeAssignedFromOtherTypes();
     instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, is_exact));
   } else {
     instr->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false));
+        ReferenceTypeInfo::Create(handle_cache_->GetObjectClassHandle(), /* is_exact */ false));
   }
 }
 
-void RTPVisitor::UpdateReferenceTypeInfo(HInstruction* instr,
-                                         uint16_t type_idx,
-                                         const DexFile& dex_file,
-                                         bool is_exact) {
+void ReferenceTypePropagation::RTPVisitor::UpdateReferenceTypeInfo(HInstruction* instr,
+                                                                   uint16_t type_idx,
+                                                                   const DexFile& dex_file,
+                                                                   bool is_exact) {
   DCHECK_EQ(instr->GetType(), Primitive::kPrimNot);
 
   ScopedObjectAccess soa(Thread::Current());
@@ -447,11 +442,11 @@
   SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx), is_exact);
 }
 
-void RTPVisitor::VisitNewInstance(HNewInstance* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitNewInstance(HNewInstance* instr) {
   UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true);
 }
 
-void RTPVisitor::VisitNewArray(HNewArray* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitNewArray(HNewArray* instr) {
   UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true);
 }
 
@@ -467,7 +462,7 @@
   return dex_cache->GetResolvedType(type_idx);
 }
 
-void RTPVisitor::VisitParameterValue(HParameterValue* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitParameterValue(HParameterValue* instr) {
   ScopedObjectAccess soa(Thread::Current());
   // We check if the existing type is valid: the inliner may have set it.
   if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
@@ -477,8 +472,8 @@
   }
 }
 
-void RTPVisitor::UpdateFieldAccessTypeInfo(HInstruction* instr,
-                                           const FieldInfo& info) {
+void ReferenceTypePropagation::RTPVisitor::UpdateFieldAccessTypeInfo(HInstruction* instr,
+                                                                     const FieldInfo& info) {
   if (instr->GetType() != Primitive::kPrimNot) {
     return;
   }
@@ -500,57 +495,61 @@
   SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
 }
 
-void RTPVisitor::VisitInstanceFieldGet(HInstanceFieldGet* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitInstanceFieldGet(HInstanceFieldGet* instr) {
   UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo());
 }
 
-void RTPVisitor::VisitStaticFieldGet(HStaticFieldGet* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitStaticFieldGet(HStaticFieldGet* instr) {
   UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo());
 }
 
-void RTPVisitor::VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitUnresolvedInstanceFieldGet(
+    HUnresolvedInstanceFieldGet* instr) {
   // TODO: Use descriptor to get the actual type.
   if (instr->GetFieldType() == Primitive::kPrimNot) {
     instr->SetReferenceTypeInfo(
-      ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false));
+      ReferenceTypeInfo::Create(handle_cache_->GetObjectClassHandle(), /* is_exact */ false));
   }
 }
 
-void RTPVisitor::VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitUnresolvedStaticFieldGet(
+    HUnresolvedStaticFieldGet* instr) {
   // TODO: Use descriptor to get the actual type.
   if (instr->GetFieldType() == Primitive::kPrimNot) {
     instr->SetReferenceTypeInfo(
-      ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false));
+      ReferenceTypeInfo::Create(handle_cache_->GetObjectClassHandle(), /* is_exact */ false));
   }
 }
 
-void RTPVisitor::VisitLoadClass(HLoadClass* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitLoadClass(HLoadClass* instr) {
   ScopedObjectAccess soa(Thread::Current());
   // Get type from dex cache assuming it was populated by the verifier.
   mirror::Class* resolved_class =
       GetClassFromDexCache(soa.Self(), instr->GetDexFile(), instr->GetTypeIndex());
   if (resolved_class != nullptr) {
     instr->SetLoadedClassRTI(ReferenceTypeInfo::Create(
-        handles_->NewHandle(resolved_class), /* is_exact */ true));
+        handle_cache_->NewHandle(resolved_class), /* is_exact */ true));
   }
-  instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(class_class_handle_, /* is_exact */ true));
+  instr->SetReferenceTypeInfo(
+      ReferenceTypeInfo::Create(handle_cache_->GetClassClassHandle(), /* is_exact */ true));
 }
 
-void RTPVisitor::VisitClinitCheck(HClinitCheck* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitClinitCheck(HClinitCheck* instr) {
   instr->SetReferenceTypeInfo(instr->InputAt(0)->GetReferenceTypeInfo());
 }
 
-void RTPVisitor::VisitLoadString(HLoadString* instr) {
-  instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(string_class_handle_, /* is_exact */ true));
+void ReferenceTypePropagation::RTPVisitor::VisitLoadString(HLoadString* instr) {
+  instr->SetReferenceTypeInfo(
+      ReferenceTypeInfo::Create(handle_cache_->GetStringClassHandle(), /* is_exact */ true));
 }
 
-void RTPVisitor::VisitLoadException(HLoadException* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitLoadException(HLoadException* instr) {
   DCHECK(instr->GetBlock()->IsCatchBlock());
   TryCatchInformation* catch_info = instr->GetBlock()->GetTryCatchInformation();
 
   if (catch_info->IsCatchAllTypeIndex()) {
-    instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(throwable_class_handle_,
-                                                          /* is_exact */ false));
+    instr->SetReferenceTypeInfo(
+        ReferenceTypeInfo::Create(handle_cache_->GetThrowableClassHandle(), /* is_exact */ false));
   } else {
     UpdateReferenceTypeInfo(instr,
                             catch_info->GetCatchTypeIndex(),
@@ -559,7 +558,7 @@
   }
 }
 
-void RTPVisitor::VisitNullCheck(HNullCheck* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitNullCheck(HNullCheck* instr) {
   ScopedObjectAccess soa(Thread::Current());
   ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo();
   if (parent_rti.IsValid()) {
@@ -567,7 +566,7 @@
   }
 }
 
-void RTPVisitor::VisitBoundType(HBoundType* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) {
   ScopedObjectAccess soa(Thread::Current());
 
   ReferenceTypeInfo class_rti = instr->GetUpperBound();
@@ -601,7 +600,7 @@
   }
 }
 
-void RTPVisitor::VisitCheckCast(HCheckCast* check_cast) {
+void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast) {
   ScopedObjectAccess soa(Thread::Current());
 
   HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
@@ -677,7 +676,8 @@
     result_type_handle = b_type_handle;
     is_exact = false;
   } else if (!a_is_interface && !b_is_interface) {
-    result_type_handle = handles_->NewHandle(a_type_handle->GetCommonSuperClass(b_type_handle));
+    result_type_handle =
+        handle_cache_.NewHandle(a_type_handle->GetCommonSuperClass(b_type_handle));
     is_exact = false;
   } else {
     // This can happen if:
@@ -687,17 +687,14 @@
     //        void foo(Interface i, boolean cond) {
     //          Object o = cond ? i : new Object();
     //        }
-    result_type_handle = object_class_handle_;
+    result_type_handle = handle_cache_.GetObjectClassHandle();
     is_exact = false;
   }
 
   return ReferenceTypeInfo::Create(result_type_handle, is_exact);
 }
 
-static void UpdateArrayGet(HArrayGet* instr,
-                           StackHandleScopeCollection* handles,
-                           ReferenceTypeInfo::TypeHandle object_class_handle)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+void ReferenceTypePropagation::UpdateArrayGet(HArrayGet* instr, HandleCache* handle_cache) {
   DCHECK_EQ(Primitive::kPrimNot, instr->GetType());
 
   ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo();
@@ -707,13 +704,14 @@
 
   Handle<mirror::Class> handle = parent_rti.GetTypeHandle();
   if (handle->IsObjectArrayClass()) {
-    ReferenceTypeInfo::TypeHandle component_handle = handles->NewHandle(handle->GetComponentType());
+    ReferenceTypeInfo::TypeHandle component_handle =
+        handle_cache->NewHandle(handle->GetComponentType());
     instr->SetReferenceTypeInfo(
         ReferenceTypeInfo::Create(component_handle, /* is_exact */ false));
   } else {
     // We don't know what the parent actually is, so we fallback to object.
     instr->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(object_class_handle, /* is_exact */ false));
+        ReferenceTypeInfo::Create(handle_cache->GetObjectClassHandle(), /* is_exact */ false));
   }
 }
 
@@ -733,7 +731,7 @@
   } else if (instr->IsArrayGet()) {
     // TODO: consider if it's worth "looking back" and binding the input object
     // to an array type.
-    UpdateArrayGet(instr->AsArrayGet(), handles_, object_class_handle_);
+    UpdateArrayGet(instr->AsArrayGet(), &handle_cache_);
   } else {
     LOG(FATAL) << "Invalid instruction (should not get here)";
   }
@@ -741,7 +739,7 @@
   return !previous_rti.IsEqual(instr->GetReferenceTypeInfo());
 }
 
-void RTPVisitor::VisitInvoke(HInvoke* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitInvoke(HInvoke* instr) {
   if (instr->GetType() != Primitive::kPrimNot) {
     return;
   }
@@ -755,13 +753,13 @@
   SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
 }
 
-void RTPVisitor::VisitArrayGet(HArrayGet* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitArrayGet(HArrayGet* instr) {
   if (instr->GetType() != Primitive::kPrimNot) {
     return;
   }
 
   ScopedObjectAccess soa(Thread::Current());
-  UpdateArrayGet(instr, handles_, object_class_handle_);
+  UpdateArrayGet(instr, handle_cache_);
   if (!instr->GetReferenceTypeInfo().IsValid()) {
     worklist_->push_back(instr);
   }
@@ -796,7 +794,7 @@
     // All inputs are NullConstants, set the type to object.
     // This may happen in the presence of inlining.
     instr->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false));
+        ReferenceTypeInfo::Create(handle_cache_.GetObjectClassHandle(), /* is_exact */ false));
     return;
   }
 
@@ -886,4 +884,5 @@
     }
   }
 }
+
 }  // namespace art
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index 5c05592..47ba027 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -40,6 +40,31 @@
   static constexpr const char* kReferenceTypePropagationPassName = "reference_type_propagation";
 
  private:
+  class HandleCache {
+   public:
+    explicit HandleCache(StackHandleScopeCollection* handles) : handles_(handles) { }
+
+    template <typename T>
+    MutableHandle<T> NewHandle(T* object) SHARED_REQUIRES(Locks::mutator_lock_) {
+      return handles_->NewHandle(object);
+    }
+
+    ReferenceTypeInfo::TypeHandle GetObjectClassHandle();
+    ReferenceTypeInfo::TypeHandle GetClassClassHandle();
+    ReferenceTypeInfo::TypeHandle GetStringClassHandle();
+    ReferenceTypeInfo::TypeHandle GetThrowableClassHandle();
+
+   private:
+    StackHandleScopeCollection* handles_;
+
+    ReferenceTypeInfo::TypeHandle object_class_handle_;
+    ReferenceTypeInfo::TypeHandle class_class_handle_;
+    ReferenceTypeInfo::TypeHandle string_class_handle_;
+    ReferenceTypeInfo::TypeHandle throwable_class_handle_;
+  };
+
+  class RTPVisitor;
+
   void VisitPhi(HPhi* phi);
   void VisitBasicBlock(HBasicBlock* block);
   void UpdateBoundType(HBoundType* bound_type) SHARED_REQUIRES(Locks::mutator_lock_);
@@ -53,19 +78,18 @@
   bool UpdateNullability(HInstruction* instr);
   bool UpdateReferenceTypeInfo(HInstruction* instr);
 
+  static void UpdateArrayGet(HArrayGet* instr, HandleCache* handle_cache)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void ValidateTypes();
 
-  StackHandleScopeCollection* handles_;
+  HandleCache handle_cache_;
 
   ArenaVector<HInstruction*> worklist_;
 
-  ReferenceTypeInfo::TypeHandle object_class_handle_;
-  ReferenceTypeInfo::TypeHandle class_class_handle_;
-  ReferenceTypeInfo::TypeHandle string_class_handle_;
-  ReferenceTypeInfo::TypeHandle throwable_class_handle_;
 
   static constexpr size_t kDefaultWorklistSize = 8;
 
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index a966b62..5cd30ad 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -179,7 +179,7 @@
     }
 
     if (block->IsCatchBlock() ||
-        (block->GetLoopInformation() != nullptr && block->GetLoopInformation()->IsIrreducible())) {
+        (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
       // By blocking all registers at the top of each catch block or irreducible loop, we force
       // intervals belonging to the live-in set of the catch/header block to be spilled.
       // TODO(ngeoffray): Phis in this block could be allocated in register.
@@ -1734,6 +1734,12 @@
   }
 }
 
+static bool IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(
+    HInstruction* instruction) {
+  return instruction->GetBlock()->GetGraph()->HasIrreducibleLoops() &&
+         (instruction->IsConstant() || instruction->IsCurrentMethod());
+}
+
 void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
                                              HBasicBlock* from,
                                              HBasicBlock* to) const {
@@ -1743,32 +1749,70 @@
   }
 
   // Find the intervals that cover `from` and `to`.
-  LiveInterval* destination = interval->GetSiblingAt(to->GetLifetimeStart());
-  LiveInterval* source = interval->GetSiblingAt(from->GetLifetimeEnd() - 1);
+  size_t destination_position = to->GetLifetimeStart();
+  size_t source_position = from->GetLifetimeEnd() - 1;
+  LiveInterval* destination = interval->GetSiblingAt(destination_position);
+  LiveInterval* source = interval->GetSiblingAt(source_position);
 
   if (destination == source) {
     // Interval was not split.
     return;
   }
-  DCHECK(destination != nullptr && source != nullptr);
+
+  LiveInterval* parent = interval->GetParent();
+  HInstruction* defined_by = parent->GetDefinedBy();
+  if (codegen_->GetGraph()->HasIrreducibleLoops() &&
+      (destination == nullptr || !destination->CoversSlow(destination_position))) {
+    // Our live_in fixed point calculation has found that the instruction is live
+    // in the `to` block because it will eventually enter an irreducible loop. Our
+    // live interval computation however does not compute a fixed point, and
+    // therefore will not have a location for that instruction for `to`.
+    // Because the instruction is a constant or the ArtMethod, we don't need to
+    // do anything: it will be materialized in the irreducible loop.
+    DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by));
+    return;
+  }
 
   if (!destination->HasRegister()) {
     // Values are eagerly spilled. Spill slot already contains appropriate value.
     return;
   }
 
+  Location location_source;
+  // `GetSiblingAt` returns the interval whose start and end cover `position`,
+  // but does not check whether the interval is inactive at that position.
+  // The only situation where the interval is inactive at that position is in the
+  // presence of irreducible loops for constants and ArtMethod.
+  if (codegen_->GetGraph()->HasIrreducibleLoops() &&
+      (source == nullptr || !source->CoversSlow(source_position))) {
+    DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by));
+    if (defined_by->IsConstant()) {
+      location_source = defined_by->GetLocations()->Out();
+    } else {
+      DCHECK(defined_by->IsCurrentMethod());
+      location_source = parent->NeedsTwoSpillSlots()
+          ? Location::DoubleStackSlot(parent->GetSpillSlot())
+          : Location::StackSlot(parent->GetSpillSlot());
+    }
+  } else {
+    DCHECK(source != nullptr);
+    DCHECK(source->CoversSlow(source_position));
+    DCHECK(destination->CoversSlow(destination_position));
+    location_source = source->ToLocation();
+  }
+
   // If `from` has only one successor, we can put the moves at the exit of it. Otherwise
   // we need to put the moves at the entry of `to`.
   if (from->GetNormalSuccessors().size() == 1) {
     InsertParallelMoveAtExitOf(from,
-                               interval->GetParent()->GetDefinedBy(),
-                               source->ToLocation(),
+                               defined_by,
+                               location_source,
                                destination->ToLocation());
   } else {
     DCHECK_EQ(to->GetPredecessors().size(), 1u);
     InsertParallelMoveAtEntryOf(to,
-                                interval->GetParent()->GetDefinedBy(),
-                                source->ToLocation(),
+                                defined_by,
+                                location_source,
                                 destination->ToLocation());
   }
 }
@@ -1872,7 +1916,7 @@
   for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     if (block->IsCatchBlock() ||
-        (block->GetLoopInformation() != nullptr && block->GetLoopInformation()->IsIrreducible())) {
+        (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
       // Instructions live at the top of catch blocks or irreducible loop header
       // were forced to spill.
       if (kIsDebugBuild) {
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
new file mode 100644
index 0000000..105b30a
--- /dev/null
+++ b/compiler/optimizing/select_generator.cc
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "select_generator.h"
+
+namespace art {
+
+static constexpr size_t kMaxInstructionsInBranch = 1u;
+
+// Returns true if `block` has only one predecessor, ends with a Goto and
+// contains at most `kMaxInstructionsInBranch` other movable instruction with
+// no side-effects.
+static bool IsSimpleBlock(HBasicBlock* block) {
+  if (block->GetPredecessors().size() != 1u) {
+    return false;
+  }
+  DCHECK(block->GetPhis().IsEmpty());
+
+  size_t num_instructions = 0u;
+  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+    HInstruction* instruction = it.Current();
+    if (instruction->IsControlFlow()) {
+      return instruction->IsGoto() && num_instructions <= kMaxInstructionsInBranch;
+    } else if (instruction->CanBeMoved() && !instruction->HasSideEffects()) {
+      num_instructions++;
+    } else {
+      return false;
+    }
+  }
+
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+}
+
+// Returns true if 'block1' and 'block2' are empty, merge into the same single
+// successor and the successor can only be reached from them.
+static bool BlocksMergeTogether(HBasicBlock* block1, HBasicBlock* block2) {
+  return block1->GetSingleSuccessor() == block2->GetSingleSuccessor();
+}
+
+// Returns nullptr if `block` has either no phis or there is more than one phi
+// with different inputs at `index1` and `index2`. Otherwise returns that phi.
+static HPhi* GetSingleChangedPhi(HBasicBlock* block, size_t index1, size_t index2) {
+  DCHECK_NE(index1, index2);
+
+  HPhi* select_phi = nullptr;
+  for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+    HPhi* phi = it.Current()->AsPhi();
+    if (phi->InputAt(index1) != phi->InputAt(index2)) {
+      if (select_phi == nullptr) {
+        // First phi with different inputs for the two indices found.
+        select_phi = phi;
+      } else {
+        // More than one phis has different inputs for the two indices.
+        return nullptr;
+      }
+    }
+  }
+  return select_phi;
+}
+
+void HSelectGenerator::Run() {
+  // Iterate in post order in the unlikely case that removing one occurrence of
+  // the selection pattern empties a branch block of another occurrence.
+  // Otherwise the order does not matter.
+  for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    if (!block->EndsWithIf()) continue;
+
+    // Find elements of the diamond pattern.
+    HIf* if_instruction = block->GetLastInstruction()->AsIf();
+    HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
+    HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
+    DCHECK_NE(true_block, false_block);
+    if (!IsSimpleBlock(true_block) ||
+        !IsSimpleBlock(false_block) ||
+        !BlocksMergeTogether(true_block, false_block)) {
+      continue;
+    }
+    HBasicBlock* merge_block = true_block->GetSingleSuccessor();
+
+    // If the branches are not empty, move instructions in front of the If.
+    // TODO(dbrazdil): This puts an instruction between If and its condition.
+    //                 Implement moving of conditions to first users if possible.
+    if (!true_block->IsSingleGoto()) {
+      true_block->MoveInstructionBefore(true_block->GetFirstInstruction(), if_instruction);
+    }
+    if (!false_block->IsSingleGoto()) {
+      false_block->MoveInstructionBefore(false_block->GetFirstInstruction(), if_instruction);
+    }
+    DCHECK(true_block->IsSingleGoto());
+    DCHECK(false_block->IsSingleGoto());
+
+    // Find the resulting true/false values.
+    size_t predecessor_index_true = merge_block->GetPredecessorIndexOf(true_block);
+    size_t predecessor_index_false = merge_block->GetPredecessorIndexOf(false_block);
+    DCHECK_NE(predecessor_index_true, predecessor_index_false);
+
+    HPhi* phi = GetSingleChangedPhi(merge_block, predecessor_index_true, predecessor_index_false);
+    if (phi == nullptr) {
+      continue;
+    }
+    HInstruction* true_value = phi->InputAt(predecessor_index_true);
+    HInstruction* false_value = phi->InputAt(predecessor_index_false);
+
+    // Create the Select instruction and insert it in front of the If.
+    HSelect* select = new (graph_->GetArena()) HSelect(if_instruction->InputAt(0),
+                                                       true_value,
+                                                       false_value,
+                                                       if_instruction->GetDexPc());
+    if (phi->GetType() == Primitive::kPrimNot) {
+      select->SetReferenceTypeInfo(phi->GetReferenceTypeInfo());
+    }
+    block->InsertInstructionBefore(select, if_instruction);
+
+    // Remove the true branch which removes the corresponding Phi input.
+    // If left only with the false branch, the Phi is automatically removed.
+    phi->ReplaceInput(select, predecessor_index_false);
+    bool only_two_predecessors = (merge_block->GetPredecessors().size() == 2u);
+    true_block->DisconnectAndDelete();
+    DCHECK_EQ(only_two_predecessors, phi->GetBlock() == nullptr);
+
+    // Merge remaining blocks which are now connected with Goto.
+    DCHECK_EQ(block->GetSingleSuccessor(), false_block);
+    block->MergeWith(false_block);
+    if (only_two_predecessors) {
+      DCHECK_EQ(block->GetSingleSuccessor(), merge_block);
+      block->MergeWith(merge_block);
+    }
+
+    // No need to update dominance information, as we are simplifying
+    // a simple diamond shape, where the join block is merged with the
+    // entry block. Any following blocks would have had the join block
+    // as a dominator, and `MergeWith` handles changing that to the
+    // entry block.
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/select_generator.h b/compiler/optimizing/select_generator.h
new file mode 100644
index 0000000..f9d6d4d
--- /dev/null
+++ b/compiler/optimizing/select_generator.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This optimization recognizes the common diamond selection pattern and
+ * replaces it with an instance of the HSelect instruction.
+ *
+ * Recognized pattern:
+ *
+ *          If [ Condition ]
+ *            /          \
+ *      false branch  true branch
+ *            \          /
+ *     Phi [FalseValue, TrueValue]
+ *
+ * The pattern will be simplified if `true_branch` and `false_branch` each
+ * contain at most one instruction without any side effects.
+ *
+ * Blocks are merged into one and Select replaces the If and the Phi:
+ *              true branch
+ *              false branch
+ *              Select [FalseValue, TrueValue, Condition]
+ *
+ * Note: In order to recognize no side-effect blocks, this optimization must be
+ * run after the instruction simplifier has removed redundant suspend checks.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SELECT_GENERATOR_H_
+#define ART_COMPILER_OPTIMIZING_SELECT_GENERATOR_H_
+
+#include "optimization.h"
+
+namespace art {
+
+class HSelectGenerator : public HOptimization {
+ public:
+  explicit HSelectGenerator(HGraph* graph)
+    : HOptimization(graph, kSelectGeneratorPassName) {}
+
+  void Run() OVERRIDE;
+
+  static constexpr const char* kSelectGeneratorPassName = "select_generator";
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HSelectGenerator);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_SELECT_GENERATOR_H_
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index c0011cd..165d09d 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -927,16 +927,21 @@
   if (invoke->IsStringInit()) {
     // This is a StringFactory call which acts as a String constructor. Its
     // result replaces the empty String pre-allocated by NewInstance.
-    HNewInstance* new_instance = invoke->GetThisArgumentOfStringInit();
-    invoke->RemoveThisArgumentOfStringInit();
+    HInstruction* arg_this = invoke->GetAndRemoveThisArgumentOfStringInit();
 
     // Replacing the NewInstance might render it redundant. Keep a list of these
     // to be visited once it is clear whether it is has remaining uses.
-    uninitialized_strings_.push_back(new_instance);
+    if (arg_this->IsNewInstance()) {
+      uninitialized_strings_.push_back(arg_this->AsNewInstance());
+    } else {
+      DCHECK(arg_this->IsPhi());
+      // NewInstance is not the direct input of the StringFactory call. It might
+      // be redundant but optimizing this case is not worth the effort.
+    }
 
-    // Walk over all vregs and replace any occurrence of `new_instance` with `invoke`.
+    // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`.
     for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
-      if ((*current_locals_)[vreg] == new_instance) {
+      if ((*current_locals_)[vreg] == arg_this) {
         (*current_locals_)[vreg] = invoke;
       }
     }
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index a5609fc..7ed3c84 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -242,19 +242,53 @@
           }
           if (instruction != nullptr) {
             instruction->GetLiveInterval()->AddUse(
-                current, environment, i, should_be_live);
+                current, environment, i, /* actual_user */ nullptr, should_be_live);
           }
         }
       }
 
-      // All inputs of an instruction must be live.
-      for (size_t i = 0, e = current->InputCount(); i < e; ++i) {
-        HInstruction* input = current->InputAt(i);
-        // Some instructions 'inline' their inputs, that is they do not need
-        // to be materialized.
-        if (input->HasSsaIndex() && current->GetLocations()->InAt(i).IsValid()) {
-          live_in->SetBit(input->GetSsaIndex());
-          input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i);
+      // Process inputs of instructions.
+      if (current->IsEmittedAtUseSite()) {
+        if (kIsDebugBuild) {
+          DCHECK(!current->GetLocations()->Out().IsValid());
+          for (HUseIterator<HInstruction*> use_it(current->GetUses());
+               !use_it.Done();
+               use_it.Advance()) {
+            HInstruction* user = use_it.Current()->GetUser();
+            size_t index = use_it.Current()->GetIndex();
+            DCHECK(!user->GetLocations()->InAt(index).IsValid());
+          }
+          DCHECK(!current->HasEnvironmentUses());
+        }
+      } else {
+        for (size_t i = 0, e = current->InputCount(); i < e; ++i) {
+          HInstruction* input = current->InputAt(i);
+          bool has_in_location = current->GetLocations()->InAt(i).IsValid();
+          bool has_out_location = input->GetLocations()->Out().IsValid();
+
+          if (has_in_location) {
+            DCHECK(has_out_location);
+            DCHECK(input->HasSsaIndex());
+            // `Input` generates a result used by `current`. Add use and update
+            // the live-in set.
+            input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i);
+            live_in->SetBit(input->GetSsaIndex());
+          } else if (has_out_location) {
+            // `Input` generates a result but it is not used by `current`.
+          } else {
+            // `Input` is inlined into `current`. Walk over its inputs and record
+            // uses at `current`.
+            DCHECK(input->IsEmittedAtUseSite());
+            for (size_t i2 = 0, e2 = input->InputCount(); i2 < e2; ++i2) {
+              HInstruction* inlined_input = input->InputAt(i2);
+              DCHECK(inlined_input->HasSsaIndex()) << "Recursive inlining not allowed.";
+              if (input->GetLocations()->InAt(i2).IsValid()) {
+                live_in->SetBit(inlined_input->GetSsaIndex());
+                inlined_input->GetLiveInterval()->AddUse(
+                    /* owner */ input, /* environment */ nullptr, i2, /* actual_user */ current);
+              }
+            }
+          }
         }
       }
     }
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 572a7b6..a78aedc 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -113,10 +113,6 @@
         input_index_(input_index),
         position_(position),
         next_(next) {
-    DCHECK((user == nullptr)
-        || user->IsPhi()
-        || (GetPosition() == user->GetLifetimePosition() + 1)
-        || (GetPosition() == user->GetLifetimePosition()));
     DCHECK(environment == nullptr || user == nullptr);
     DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition());
   }
@@ -243,21 +239,30 @@
     AddRange(position, position + 1);
   }
 
+  // Record use of an input. The use will be recorded as an environment use if
+  // `environment` is not null and as register use otherwise. If `actual_user`
+  // is specified, the use will be recorded at `actual_user`'s lifetime position.
   void AddUse(HInstruction* instruction,
               HEnvironment* environment,
               size_t input_index,
+              HInstruction* actual_user = nullptr,
               bool keep_alive = false) {
-    // Set the use within the instruction.
     bool is_environment = (environment != nullptr);
-    size_t position = instruction->GetLifetimePosition() + 1;
     LocationSummary* locations = instruction->GetLocations();
+    if (actual_user == nullptr) {
+      actual_user = instruction;
+    }
+
+    // Set the use within the instruction.
+    size_t position = actual_user->GetLifetimePosition() + 1;
     if (!is_environment) {
       if (locations->IsFixedInput(input_index) || locations->OutputUsesSameAs(input_index)) {
         // For fixed inputs and output same as input, the register allocator
         // requires to have inputs die at the instruction, so that input moves use the
         // location of the input just before that instruction (and not potential moves due
         // to splitting).
-        position = instruction->GetLifetimePosition();
+        DCHECK_EQ(instruction, actual_user);
+        position = actual_user->GetLifetimePosition();
       } else if (!locations->InAt(input_index).IsValid()) {
         return;
       }
@@ -267,11 +272,8 @@
       AddBackEdgeUses(*instruction->GetBlock());
     }
 
-    DCHECK(position == instruction->GetLifetimePosition()
-           || position == instruction->GetLifetimePosition() + 1);
-
     if ((first_use_ != nullptr)
-        && (first_use_->GetUser() == instruction)
+        && (first_use_->GetUser() == actual_user)
         && (first_use_->GetPosition() < position)) {
       // The user uses the instruction multiple times, and one use dies before the other.
       // We update the use list so that the latter is first.
diff --git a/compiler/profile_assistant.cc b/compiler/profile_assistant.cc
index 81f2a56..85335ef 100644
--- a/compiler/profile_assistant.cc
+++ b/compiler/profile_assistant.cc
@@ -16,54 +16,154 @@
 
 #include "profile_assistant.h"
 
+#include "base/unix_file/fd_file.h"
+#include "os.h"
+
 namespace art {
 
 // Minimum number of new methods that profiles must contain to enable recompilation.
 static constexpr const uint32_t kMinNewMethodsForCompilation = 10;
 
-bool ProfileAssistant::ProcessProfiles(
-      const std::vector<std::string>& profile_files,
-      const std::vector<std::string>& reference_profile_files,
-      /*out*/ ProfileCompilationInfo** profile_compilation_info) {
+bool ProfileAssistant::ProcessProfilesInternal(
+        const std::vector<ScopedFlock>& profile_files,
+        const std::vector<ScopedFlock>& reference_profile_files,
+        /*out*/ ProfileCompilationInfo** profile_compilation_info) {
   DCHECK(!profile_files.empty());
-  DCHECK(reference_profile_files.empty() ||
+  DCHECK(!reference_profile_files.empty() ||
       (profile_files.size() == reference_profile_files.size()));
 
   std::vector<ProfileCompilationInfo> new_info(profile_files.size());
   bool should_compile = false;
   // Read the main profile files.
-  for (size_t i = 0; i < profile_files.size(); i++) {
-    if (!new_info[i].Load(profile_files[i])) {
-      LOG(WARNING) << "Could not load profile file: " << profile_files[i];
+  for (size_t i = 0; i < new_info.size(); i++) {
+    if (!new_info[i].Load(profile_files[i].GetFile()->Fd())) {
+      LOG(WARNING) << "Could not load profile file at index " << i;
       return false;
     }
     // Do we have enough new profiled methods that will make the compilation worthwhile?
     should_compile |= (new_info[i].GetNumberOfMethods() > kMinNewMethodsForCompilation);
   }
+
   if (!should_compile) {
-    *profile_compilation_info = nullptr;
     return true;
   }
 
   std::unique_ptr<ProfileCompilationInfo> result(new ProfileCompilationInfo());
+  // Merge information.
   for (size_t i = 0; i < new_info.size(); i++) {
-    // Merge all data into a single object.
-    result->Load(new_info[i]);
-    // If we have any reference profile information merge their information with
-    // the current profiles and save them back to disk.
     if (!reference_profile_files.empty()) {
-      if (!new_info[i].Load(reference_profile_files[i])) {
-        LOG(WARNING) << "Could not load reference profile file: " << reference_profile_files[i];
+      if (!new_info[i].Load(reference_profile_files[i].GetFile()->Fd())) {
+        LOG(WARNING) << "Could not load reference profile file at index " << i;
         return false;
       }
-      if (!new_info[i].Save(reference_profile_files[i])) {
-        LOG(WARNING) << "Could not save reference profile file: " << reference_profile_files[i];
+    }
+    // Merge all data into a single object.
+    if (!result->Load(new_info[i])) {
+      LOG(WARNING) << "Could not merge profile data at index " << i;
+      return false;
+    }
+  }
+  // We were successful in merging all profile information. Update the files.
+  for (size_t i = 0; i < new_info.size(); i++) {
+    if (!reference_profile_files.empty()) {
+      if (!reference_profile_files[i].GetFile()->ClearContent()) {
+        PLOG(WARNING) << "Could not clear reference profile file at index " << i;
+        return false;
+      }
+      if (!new_info[i].Save(reference_profile_files[i].GetFile()->Fd())) {
+        LOG(WARNING) << "Could not save reference profile file at index " << i;
+        return false;
+      }
+      if (!profile_files[i].GetFile()->ClearContent()) {
+        PLOG(WARNING) << "Could not clear profile file at index " << i;
         return false;
       }
     }
   }
+
   *profile_compilation_info = result.release();
   return true;
 }
 
+class ScopedCollectionFlock {
+ public:
+  explicit ScopedCollectionFlock(size_t size) : flocks_(size) {}
+
+  // Will block until all the locks are acquired.
+  bool Init(const std::vector<std::string>& filenames, /* out */ std::string* error) {
+    for (size_t i = 0; i < filenames.size(); i++) {
+      if (!flocks_[i].Init(filenames[i].c_str(), O_RDWR, /* block */ true, error)) {
+        *error += " (index=" + std::to_string(i) + ")";
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // Will block until all the locks are acquired.
+  bool Init(const std::vector<uint32_t>& fds, /* out */ std::string* error) {
+    for (size_t i = 0; i < fds.size(); i++) {
+      // We do not own the descriptor, so disable auto-close and don't check usage.
+      File file(fds[i], false);
+      file.DisableAutoClose();
+      if (!flocks_[i].Init(&file, error)) {
+        *error += " (index=" + std::to_string(i) + ")";
+        return false;
+      }
+    }
+    return true;
+  }
+
+  const std::vector<ScopedFlock>& Get() const { return flocks_; }
+
+ private:
+  std::vector<ScopedFlock> flocks_;
+};
+
+bool ProfileAssistant::ProcessProfiles(
+        const std::vector<uint32_t>& profile_files_fd,
+        const std::vector<uint32_t>& reference_profile_files_fd,
+        /*out*/ ProfileCompilationInfo** profile_compilation_info) {
+  *profile_compilation_info = nullptr;
+
+  std::string error;
+  ScopedCollectionFlock profile_files_flocks(profile_files_fd.size());
+  if (!profile_files_flocks.Init(profile_files_fd, &error)) {
+    LOG(WARNING) << "Could not lock profile files: " << error;
+    return false;
+  }
+  ScopedCollectionFlock reference_profile_files_flocks(reference_profile_files_fd.size());
+  if (!reference_profile_files_flocks.Init(reference_profile_files_fd, &error)) {
+    LOG(WARNING) << "Could not lock reference profile files: " << error;
+    return false;
+  }
+
+  return ProcessProfilesInternal(profile_files_flocks.Get(),
+                                 reference_profile_files_flocks.Get(),
+                                 profile_compilation_info);
+}
+
+bool ProfileAssistant::ProcessProfiles(
+        const std::vector<std::string>& profile_files,
+        const std::vector<std::string>& reference_profile_files,
+        /*out*/ ProfileCompilationInfo** profile_compilation_info) {
+  *profile_compilation_info = nullptr;
+
+  std::string error;
+  ScopedCollectionFlock profile_files_flocks(profile_files.size());
+  if (!profile_files_flocks.Init(profile_files, &error)) {
+    LOG(WARNING) << "Could not lock profile files: " << error;
+    return false;
+  }
+  ScopedCollectionFlock reference_profile_files_flocks(reference_profile_files.size());
+  if (!reference_profile_files_flocks.Init(reference_profile_files, &error)) {
+    LOG(WARNING) << "Could not lock reference profile files: " << error;
+    return false;
+  }
+
+  return ProcessProfilesInternal(profile_files_flocks.Get(),
+                                 reference_profile_files_flocks.Get(),
+                                 profile_compilation_info);
+}
+
 }  // namespace art
diff --git a/compiler/profile_assistant.h b/compiler/profile_assistant.h
index 088c8bd..ad5e216 100644
--- a/compiler/profile_assistant.h
+++ b/compiler/profile_assistant.h
@@ -20,6 +20,7 @@
 #include <string>
 #include <vector>
 
+#include "base/scoped_flock.h"
 #include "jit/offline_profiling_info.cc"
 
 namespace art {
@@ -52,7 +53,17 @@
       const std::vector<std::string>& reference_profile_files,
       /*out*/ ProfileCompilationInfo** profile_compilation_info);
 
+  static bool ProcessProfiles(
+      const std::vector<uint32_t>& profile_files_fd_,
+      const std::vector<uint32_t>& reference_profile_files_fd_,
+      /*out*/ ProfileCompilationInfo** profile_compilation_info);
+
  private:
+  static bool ProcessProfilesInternal(
+      const std::vector<ScopedFlock>& profile_files,
+      const std::vector<ScopedFlock>& reference_profile_files,
+      /*out*/ ProfileCompilationInfo** profile_compilation_info);
+
   DISALLOW_COPY_AND_ASSIGN(ProfileAssistant);
 };
 
diff --git a/compiler/profile_assistant_test.cc b/compiler/profile_assistant_test.cc
new file mode 100644
index 0000000..58b7513
--- /dev/null
+++ b/compiler/profile_assistant_test.cc
@@ -0,0 +1,279 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "base/unix_file/fd_file.h"
+#include "common_runtime_test.h"
+#include "compiler/profile_assistant.h"
+#include "jit/offline_profiling_info.h"
+
+namespace art {
+
+class ProfileAssistantTest : public CommonRuntimeTest {
+ protected:
+  void SetupProfile(const std::string& id,
+                    uint32_t checksum,
+                    uint16_t number_of_methods,
+                    const ScratchFile& profile,
+                    ProfileCompilationInfo* info,
+                    uint16_t start_method_index = 0) {
+    std::string dex_location1 = "location1" + id;
+    uint32_t dex_location_checksum1 = checksum;
+    std::string dex_location2 = "location2" + id;
+    uint32_t dex_location_checksum2 = 10 * checksum;
+    for (uint16_t i = start_method_index; i < start_method_index + number_of_methods; i++) {
+      ASSERT_TRUE(info->AddData(dex_location1, dex_location_checksum1, i));
+      ASSERT_TRUE(info->AddData(dex_location2, dex_location_checksum2, i));
+    }
+    ASSERT_TRUE(info->Save(GetFd(profile)));
+    ASSERT_EQ(0, profile.GetFile()->Flush());
+    ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  }
+
+  uint32_t GetFd(const ScratchFile& file) const {
+    return static_cast<uint32_t>(file.GetFd());
+  }
+};
+
+TEST_F(ProfileAssistantTest, AdviseCompilationEmptyReferences) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile1;
+  ScratchFile reference_profile2;
+
+  std::vector<uint32_t> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  std::vector<uint32_t> reference_profile_fds({
+      GetFd(reference_profile1),
+      GetFd(reference_profile2)});
+
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+
+  // We should advise compilation.
+  ProfileCompilationInfo* result;
+  ASSERT_TRUE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+  ASSERT_TRUE(result != nullptr);
+
+  // The resulting compilation info must be equal to the merge of the inputs.
+  ProfileCompilationInfo expected;
+  ASSERT_TRUE(expected.Load(info1));
+  ASSERT_TRUE(expected.Load(info2));
+  ASSERT_TRUE(expected.Equals(*result));
+
+  // The information from profiles must be transfered to the reference profiles.
+  ProfileCompilationInfo file_info1;
+  ASSERT_TRUE(reference_profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(reference_profile1)));
+  ASSERT_TRUE(file_info1.Equals(info1));
+
+  ProfileCompilationInfo file_info2;
+  ASSERT_TRUE(reference_profile2.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(reference_profile2)));
+  ASSERT_TRUE(file_info2.Equals(info2));
+
+  // Initial profiles must be cleared.
+  ASSERT_EQ(0, profile1.GetFile()->GetLength());
+  ASSERT_EQ(0, profile2.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, AdviseCompilationNonEmptyReferences) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile1;
+  ScratchFile reference_profile2;
+
+  std::vector<uint32_t> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  std::vector<uint32_t> reference_profile_fds({
+      GetFd(reference_profile1),
+      GetFd(reference_profile2)});
+
+  // The new profile info will contain the methods with indices 0-100.
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+
+
+  // The reference profile info will contain the methods with indices 50-150.
+  const uint16_t kNumberOfMethodsAlreadyCompiled = 100;
+  ProfileCompilationInfo reference_info1;
+  SetupProfile("p1", 1, kNumberOfMethodsAlreadyCompiled, reference_profile1,
+      &reference_info1, kNumberOfMethodsToEnableCompilation / 2);
+  ProfileCompilationInfo reference_info2;
+  SetupProfile("p2", 2, kNumberOfMethodsAlreadyCompiled, reference_profile2,
+      &reference_info2, kNumberOfMethodsToEnableCompilation / 2);
+
+  // We should advise compilation.
+  ProfileCompilationInfo* result;
+  ASSERT_TRUE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+  ASSERT_TRUE(result != nullptr);
+
+  // The resulting compilation info must be equal to the merge of the inputs
+  ProfileCompilationInfo expected;
+  ASSERT_TRUE(expected.Load(info1));
+  ASSERT_TRUE(expected.Load(info2));
+  ASSERT_TRUE(expected.Load(reference_info1));
+  ASSERT_TRUE(expected.Load(reference_info2));
+  ASSERT_TRUE(expected.Equals(*result));
+
+  // The information from profiles must be transfered to the reference profiles.
+  ProfileCompilationInfo file_info1;
+  ProfileCompilationInfo merge1;
+  ASSERT_TRUE(merge1.Load(info1));
+  ASSERT_TRUE(merge1.Load(reference_info1));
+  ASSERT_TRUE(reference_profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(reference_profile1)));
+  ASSERT_TRUE(file_info1.Equals(merge1));
+
+  ProfileCompilationInfo file_info2;
+  ProfileCompilationInfo merge2;
+  ASSERT_TRUE(merge2.Load(info2));
+  ASSERT_TRUE(merge2.Load(reference_info2));
+  ASSERT_TRUE(reference_profile2.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(reference_profile2)));
+  ASSERT_TRUE(file_info2.Equals(merge2));
+
+  // Initial profiles must be cleared.
+  ASSERT_EQ(0, profile1.GetFile()->GetLength());
+  ASSERT_EQ(0, profile2.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, DoNotAdviseCompilation) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile1;
+  ScratchFile reference_profile2;
+
+  std::vector<uint32_t> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  std::vector<uint32_t> reference_profile_fds({
+      GetFd(reference_profile1),
+      GetFd(reference_profile2)});
+
+  const uint16_t kNumberOfMethodsToSkipCompilation = 1;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToSkipCompilation, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p2", 2, kNumberOfMethodsToSkipCompilation, profile2, &info2);
+
+  // We should not advise compilation.
+  ProfileCompilationInfo* result = nullptr;
+  ASSERT_TRUE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+  ASSERT_TRUE(result == nullptr);
+
+  // The information from profiles must remain the same.
+  ProfileCompilationInfo file_info1;
+  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(profile1)));
+  ASSERT_TRUE(file_info1.Equals(info1));
+
+  ProfileCompilationInfo file_info2;
+  ASSERT_TRUE(profile2.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(profile2)));
+  ASSERT_TRUE(file_info2.Equals(info2));
+
+  // Reference profile files must remain empty.
+  ASSERT_EQ(0, reference_profile1.GetFile()->GetLength());
+  ASSERT_EQ(0, reference_profile2.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, FailProcessingBecauseOfProfiles) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile1;
+  ScratchFile reference_profile2;
+
+  std::vector<uint32_t> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  std::vector<uint32_t> reference_profile_fds({
+      GetFd(reference_profile1),
+      GetFd(reference_profile2)});
+
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  // Assign different hashes for the same dex file. This will make merging of information to fail.
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+
+  // We should fail processing.
+  ProfileCompilationInfo* result = nullptr;
+  ASSERT_FALSE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+  ASSERT_TRUE(result == nullptr);
+
+  // The information from profiles must still remain the same.
+  ProfileCompilationInfo file_info1;
+  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(profile1)));
+  ASSERT_TRUE(file_info1.Equals(info1));
+
+  ProfileCompilationInfo file_info2;
+  ASSERT_TRUE(profile2.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(profile2)));
+  ASSERT_TRUE(file_info2.Equals(info2));
+
+  // Reference profile files must still remain empty.
+  ASSERT_EQ(0, reference_profile1.GetFile()->GetLength());
+  ASSERT_EQ(0, reference_profile2.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, FailProcessingBecauseOfReferenceProfiles) {
+  ScratchFile profile1;
+  ScratchFile reference_profile;
+
+  std::vector<uint32_t> profile_fds({
+      GetFd(profile1)});
+  std::vector<uint32_t> reference_profile_fds({
+      GetFd(reference_profile)});
+
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  // Assign different hashes for the same dex file. This will make merging of information to fail.
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  ProfileCompilationInfo reference_info;
+  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, reference_profile, &reference_info);
+
+  // We should not advise compilation.
+  ProfileCompilationInfo* result = nullptr;
+  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_FALSE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+  ASSERT_TRUE(result == nullptr);
+
+  // The information from profiles must still remain the same.
+  ProfileCompilationInfo file_info1;
+  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(profile1)));
+  ASSERT_TRUE(file_info1.Equals(info1));
+
+  ProfileCompilationInfo file_info2;
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(reference_profile)));
+  ASSERT_TRUE(file_info2.Equals(reference_info));
+}
+
+}  // namespace art
diff --git a/compiler/utils/test_dex_file_builder.h b/compiler/utils/test_dex_file_builder.h
index b6a228c..2958dc6 100644
--- a/compiler/utils/test_dex_file_builder.h
+++ b/compiler/utils/test_dex_file_builder.h
@@ -21,6 +21,7 @@
 #include <set>
 #include <map>
 #include <vector>
+#include <zlib.h>
 
 #include "base/bit_utils.h"
 #include "base/logging.h"
@@ -88,11 +89,12 @@
     DexFile::Header* header = reinterpret_cast<DexFile::Header*>(&header_data.data);
     std::copy_n(DexFile::kDexMagic, 4u, header->magic_);
     std::copy_n(DexFile::kDexMagicVersion, 4u, header->magic_ + 4u);
-    header->header_size_ = sizeof(header);
+    header->header_size_ = sizeof(DexFile::Header);
     header->endian_tag_ = DexFile::kDexEndianConstant;
     header->link_size_ = 0u;  // Unused.
     header->link_off_ = 0u;  // Unused.
-    header->map_off_ = 0u;  // Unused.
+    header->map_off_ = 0u;  // Unused. TODO: This is wrong. Dex files created by this builder
+                            //               cannot be verified. b/26808512
 
     uint32_t data_section_size = 0u;
 
@@ -161,7 +163,6 @@
     uint32_t total_size = data_section_offset + data_section_size;
 
     dex_file_data_.resize(total_size);
-    std::memcpy(&dex_file_data_[0], header_data.data, sizeof(DexFile::Header));
 
     for (const auto& entry : strings_) {
       CHECK_LT(entry.first.size(), 128u);
@@ -210,11 +211,25 @@
       Write32(raw_offset + 4u, GetStringIdx(entry.first.name));
     }
 
-    // Leave checksum and signature as zeros.
+    // Leave signature as zeros.
+
+    header->file_size_ = dex_file_data_.size();
+
+    // Write the complete header early, as part of it needs to be checksummed.
+    std::memcpy(&dex_file_data_[0], header_data.data, sizeof(DexFile::Header));
+
+    // Checksum starts after the checksum field.
+    size_t skip = sizeof(header->magic_) + sizeof(header->checksum_);
+    header->checksum_ = adler32(adler32(0L, Z_NULL, 0),
+                                dex_file_data_.data() + skip,
+                                dex_file_data_.size() - skip);
+
+    // Write the complete header again, just simpler that way.
+    std::memcpy(&dex_file_data_[0], header_data.data, sizeof(DexFile::Header));
 
     std::string error_msg;
     std::unique_ptr<const DexFile> dex_file(DexFile::Open(
-        &dex_file_data_[0], dex_file_data_.size(), dex_location, 0u, nullptr, &error_msg));
+        &dex_file_data_[0], dex_file_data_.size(), dex_location, 0u, nullptr, false, &error_msg));
     CHECK(dex_file != nullptr) << error_msg;
     return dex_file;
   }
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index d6caa3c..7138a46 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -186,6 +186,22 @@
   EmitOperand(dst, src);
 }
 
+void X86Assembler::popcntl(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitRegisterOperand(dst, src);
+}
+
+void X86Assembler::popcntl(Register dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitOperand(dst, src);
+}
+
 void X86Assembler::movzxb(Register dst, ByteRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 655af9c..759a41e 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -330,11 +330,15 @@
   void movntl(const Address& dst, Register src);
 
   void bswapl(Register dst);
+
   void bsfl(Register dst, Register src);
   void bsfl(Register dst, const Address& src);
   void bsrl(Register dst, Register src);
   void bsrl(Register dst, const Address& src);
 
+  void popcntl(Register dst, Register src);
+  void popcntl(Register dst, const Address& src);
+
   void rorl(Register reg, const Immediate& imm);
   void rorl(Register operand, Register shifter);
   void roll(Register reg, const Immediate& imm);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index a9b991c..0fd0982 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -260,6 +260,19 @@
   DriverStr(expected, "bsrl_address");
 }
 
+TEST_F(AssemblerX86Test, Popcntl) {
+  DriverStr(RepeatRR(&x86::X86Assembler::popcntl, "popcntl %{reg2}, %{reg1}"), "popcntl");
+}
+
+TEST_F(AssemblerX86Test, PopcntlAddress) {
+  GetAssembler()->popcntl(x86::Register(x86::EDI), x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12));
+  const char* expected =
+    "popcntl 0xc(%EDI,%EBX,4), %EDI\n";
+
+  DriverStr(expected, "popcntl_address");
+}
+
 // Rorl only allows CL as the shift count.
 std::string rorl_fn(AssemblerX86Test::Base* assembler_test, x86::X86Assembler* assembler) {
   std::ostringstream str;
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index db07267..10f5a00 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -2247,6 +2247,42 @@
   EmitOperand(dst.LowBits(), src);
 }
 
+void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitRegisterOperand(dst.LowBits(), src.LowBits());
+}
+
+void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitRex64(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitRegisterOperand(dst.LowBits(), src.LowBits());
+}
+
+void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitRex64(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitOperand(dst.LowBits(), src);
+}
+
 void X86_64Assembler::repne_scasw() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 01d28e3..6f0847e 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -647,6 +647,11 @@
   void bsrq(CpuRegister dst, CpuRegister src);
   void bsrq(CpuRegister dst, const Address& src);
 
+  void popcntl(CpuRegister dst, CpuRegister src);
+  void popcntl(CpuRegister dst, const Address& src);
+  void popcntq(CpuRegister dst, CpuRegister src);
+  void popcntq(CpuRegister dst, const Address& src);
+
   void rorl(CpuRegister reg, const Immediate& imm);
   void rorl(CpuRegister operand, CpuRegister shifter);
   void roll(CpuRegister reg, const Immediate& imm);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 00bb5ca..8a87fca 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1333,6 +1333,44 @@
   DriverStr(expected, "bsrq_address");
 }
 
+TEST_F(AssemblerX86_64Test, Popcntl) {
+  DriverStr(Repeatrr(&x86_64::X86_64Assembler::popcntl, "popcntl %{reg2}, %{reg1}"), "popcntl");
+}
+
+TEST_F(AssemblerX86_64Test, PopcntlAddress) {
+  GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+  const char* expected =
+    "popcntl 0xc(%RDI,%RBX,4), %R10d\n"
+    "popcntl 0xc(%R10,%RBX,4), %edi\n"
+    "popcntl 0xc(%RDI,%R9,4), %edi\n";
+
+  DriverStr(expected, "popcntl_address");
+}
+
+TEST_F(AssemblerX86_64Test, Popcntq) {
+  DriverStr(RepeatRR(&x86_64::X86_64Assembler::popcntq, "popcntq %{reg2}, %{reg1}"), "popcntq");
+}
+
+TEST_F(AssemblerX86_64Test, PopcntqAddress) {
+  GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+  const char* expected =
+    "popcntq 0xc(%RDI,%RBX,4), %R10\n"
+    "popcntq 0xc(%R10,%RBX,4), %RDI\n"
+    "popcntq 0xc(%RDI,%R9,4), %RDI\n";
+
+  DriverStr(expected, "popcntq_address");
+}
+
 /////////////////
 // Near labels //
 /////////////////
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index dbb7341..f56fc38 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -318,6 +318,11 @@
   UsageError("");
   UsageError("  --no-generate-debug-info: Do not generate debug information for native debugging.");
   UsageError("");
+  UsageError("  --generate-mini-debug-info: Generate minimal amount of LZMA-compressed");
+  UsageError("      debug information necessary to print backtraces. (disabled by default)");
+  UsageError("");
+  UsageError("  --no-generate-mini-debug-info: Do do generated backtrace info.");
+  UsageError("");
   UsageError("  --debuggable: Produce code debuggable with Java debugger.");
   UsageError("");
   UsageError("  --native-debuggable: Produce code debuggable with native debugger (like LLDB).");
@@ -340,6 +345,12 @@
   UsageError("      --profile-file will be merged into --reference-profile-file. Valid only when");
   UsageError("      specified together with --profile-file.");
   UsageError("");
+  UsageError("  --profile-file-fd=<number>: same as --profile-file but accepts a file descriptor.");
+  UsageError("      Cannot be used together with --profile-file.");
+  UsageError("");
+  UsageError("  --reference-profile-file-fd=<number>: same as --reference-profile-file but");
+  UsageError("      accepts a file descriptor. Cannot be used together with");
+  UsageError("       --reference-profile-file.");
   UsageError("  --print-pass-names: print a list of pass names");
   UsageError("");
   UsageError("  --disable-passes=<pass-names>:  disable one or more passes separated by comma.");
@@ -369,6 +380,10 @@
   UsageError("  --multi-image: specify that separate oat and image files be generated for each "
              "input dex file.");
   UsageError("");
+  UsageError("  --force-determinism: force the compiler to emit a deterministic output.");
+  UsageError("      This option is incompatible with read barriers (e.g., if dex2oat has been");
+  UsageError("      built with the environment variable `ART_USE_READ_BARRIER` set to `true`).");
+  UsageError("");
   std::cerr << "See log for usage error information\n";
   exit(EXIT_FAILURE);
 }
@@ -497,12 +512,24 @@
   return dex_files_size >= kMinDexFileCumulativeSizeForSwap;
 }
 
+static void CloseAllFds(const std::vector<uint32_t>& fds, const char* descriptor) {
+  for (size_t i = 0; i < fds.size(); i++) {
+    if (close(fds[i]) < 0) {
+      PLOG(WARNING) << "Failed to close descriptor for " << descriptor << " at index " << i;
+    }
+  }
+}
+
 class Dex2Oat FINAL {
  public:
   explicit Dex2Oat(TimingLogger* timings) :
       compiler_kind_(Compiler::kOptimizing),
       instruction_set_(kRuntimeISA),
       // Take the default set of instruction features from the build.
+      image_file_location_oat_checksum_(0),
+      image_file_location_oat_data_begin_(0),
+      image_patch_delta_(0),
+      key_value_store_(nullptr),
       verification_results_(nullptr),
       method_inliner_map_(),
       runtime_(nullptr),
@@ -522,22 +549,25 @@
       boot_image_(false),
       multi_image_(false),
       is_host_(false),
+      class_loader_(nullptr),
+      elf_writers_(),
+      oat_writers_(),
+      rodata_(),
       image_writer_(nullptr),
       driver_(nullptr),
+      opened_dex_files_maps_(),
+      opened_dex_files_(),
+      no_inline_from_dex_files_(),
       dump_stats_(false),
       dump_passes_(false),
       dump_timing_(false),
       dump_slow_timing_(kIsDebugBuild),
       swap_fd_(-1),
-      app_image_fd_(kInvalidImageFd),
-      timings_(timings) {}
+      app_image_fd_(kInvalidFd),
+      timings_(timings),
+      force_determinism_(false) {}
 
   ~Dex2Oat() {
-    // Free opened dex files before deleting the runtime_, because ~DexFile
-    // uses MemMap, which is shut down by ~Runtime.
-    class_path_files_.clear();
-    opened_dex_files_.clear();
-
     // Log completion time before deleting the runtime_, because this accesses
     // the runtime.
     LogCompletionTime();
@@ -550,6 +580,9 @@
       for (std::unique_ptr<const DexFile>& dex_file : opened_dex_files_) {
         dex_file.release();
       }
+      for (std::unique_ptr<MemMap>& map : opened_dex_files_maps_) {
+        map.release();
+      }
       for (std::unique_ptr<File>& oat_file : oat_files_) {
         oat_file.release();
       }
@@ -575,6 +608,14 @@
     ParseUintOption(option, "--oat-fd", &oat_fd_, Usage);
   }
 
+  void ParseFdForCollection(const StringPiece& option,
+                            const char* arg_name,
+                            std::vector<uint32_t>* fds) {
+    uint32_t fd;
+    ParseUintOption(option, arg_name, &fd, Usage);
+    fds->push_back(fd);
+  }
+
   void ParseJ(const StringPiece& option) {
     ParseUintOption(option, "-j", &thread_count_, Usage, /* is_long_option */ false);
   }
@@ -778,11 +819,25 @@
       }
     }
 
+    if (!profile_files_.empty() && !profile_files_fd_.empty()) {
+      Usage("Profile files should not be specified with both --profile-file-fd and --profile-file");
+    }
     if (!profile_files_.empty()) {
       if (!reference_profile_files_.empty() &&
           (reference_profile_files_.size() != profile_files_.size())) {
         Usage("If specified, --reference-profile-file should match the number of --profile-file.");
       }
+    } else if (!reference_profile_files_.empty()) {
+      Usage("--reference-profile-file should only be supplied with --profile-file");
+    }
+    if (!profile_files_fd_.empty()) {
+      if (!reference_profile_files_fd_.empty() &&
+          (reference_profile_files_fd_.size() != profile_files_fd_.size())) {
+        Usage("If specified, --reference-profile-file-fd should match the number",
+              " of --profile-file-fd.");
+      }
+    } else if (!reference_profile_files_fd_.empty()) {
+      Usage("--reference-profile-file-fd should only be supplied with --profile-file-fd");
     }
 
     if (!parser_options->oat_symbols.empty()) {
@@ -872,6 +927,15 @@
 
     // Fill some values into the key-value store for the oat header.
     key_value_store_.reset(new SafeMap<std::string, std::string>());
+
+    // Automatically force determinism for the boot image in a host
+    // build, except when read barriers are enabled, as the former
+    // switches the GC to a non-concurrent one by passing the
+    // option `-Xgc:nonconcurrent` (see below).
+    if (!kIsTargetBuild && IsBootImage() && !kEmitCompilerReadBarrier) {
+      force_determinism_ = true;
+    }
+    compiler_options_->force_determinism_ = force_determinism_;
   }
 
   void ExpandOatAndImageFilenames() {
@@ -1076,6 +1140,10 @@
       } else if (option.starts_with("--reference-profile-file=")) {
         reference_profile_files_.push_back(
             option.substr(strlen("--reference-profile-file=")).ToString());
+      } else if (option.starts_with("--profile-file-fd=")) {
+        ParseFdForCollection(option, "--profile-file-fd", &profile_files_fd_);
+      } else if (option.starts_with("--reference-profile-file-fd=")) {
+        ParseFdForCollection(option, "--reference_profile-file-fd", &reference_profile_files_fd_);
       } else if (option == "--no-profile-file") {
         // No profile
       } else if (option == "--host") {
@@ -1111,6 +1179,11 @@
         multi_image_ = true;
       } else if (option.starts_with("--no-inline-from=")) {
         no_inline_from_string_ = option.substr(strlen("--no-inline-from=")).data();
+      } else if (option == "--force-determinism") {
+        if (kEmitCompilerReadBarrier) {
+          Usage("Cannot use --force-determinism with read barriers");
+        }
+        force_determinism_ = true;
       } else if (!compiler_options_->ParseCompilerOption(option, Usage)) {
         Usage("Unknown argument %s", option.data());
       }
@@ -1125,6 +1198,9 @@
   // Check whether the oat output files are writable, and open them for later. Also open a swap
   // file, if a name is given.
   bool OpenFile() {
+    // Prune non-existent dex files now so that we don't create empty oat files for multi-image.
+    PruneNonExistentDexFiles();
+
     // Expand oat and image filenames for multi image.
     if (IsBootImage() && multi_image_) {
       ExpandOatAndImageFilenames();
@@ -1196,9 +1272,6 @@
     }
     // Note that dex2oat won't close the swap_fd_. The compiler driver's swap space will do that.
 
-    // Organize inputs, handling multi-dex and multiple oat file outputs.
-    CreateDexOatMappings();
-
     return true;
   }
 
@@ -1241,89 +1314,136 @@
       return false;
     }
 
+    CreateOatWriters();
+    if (!AddDexFileSources()) {
+      return false;
+    }
+
+    if (IsBootImage() && image_filenames_.size() > 1) {
+      // If we're compiling the boot image, store the boot classpath into the Key-Value store.
+      // We need this for the multi-image case.
+      key_value_store_->Put(OatHeader::kBootClassPath, GetMultiImageBootClassPath());
+    }
+
+    if (!IsBootImage()) {
+      // When compiling an app, create the runtime early to retrieve
+      // the image location key needed for the oat header.
+      if (!CreateRuntime(std::move(runtime_options))) {
+        return false;
+      }
+
+      {
+        TimingLogger::ScopedTiming t3("Loading image checksum", timings_);
+        std::vector<gc::space::ImageSpace*> image_spaces =
+            Runtime::Current()->GetHeap()->GetBootImageSpaces();
+        image_file_location_oat_checksum_ = image_spaces[0]->GetImageHeader().GetOatChecksum();
+        image_file_location_oat_data_begin_ =
+            reinterpret_cast<uintptr_t>(image_spaces[0]->GetImageHeader().GetOatDataBegin());
+        image_patch_delta_ = image_spaces[0]->GetImageHeader().GetPatchDelta();
+        // Store the boot image filename(s).
+        std::vector<std::string> image_filenames;
+        for (const gc::space::ImageSpace* image_space : image_spaces) {
+          image_filenames.push_back(image_space->GetImageFilename());
+        }
+        std::string image_file_location = Join(image_filenames, ':');
+        if (!image_file_location.empty()) {
+          key_value_store_->Put(OatHeader::kImageLocationKey, image_file_location);
+        }
+      }
+
+      // Open dex files for class path.
+      const std::vector<std::string> class_path_locations =
+          GetClassPathLocations(runtime_->GetClassPathString());
+      OpenClassPathFiles(class_path_locations, &class_path_files_);
+
+      // Store the classpath we have right now.
+      std::vector<const DexFile*> class_path_files = MakeNonOwningPointerVector(class_path_files_);
+      key_value_store_->Put(OatHeader::kClassPathKey,
+                            OatFile::EncodeDexFileDependencies(class_path_files));
+    }
+
+    // Now that we have finalized key_value_store_, start writing the oat file.
     {
-      TimingLogger::ScopedTiming t_runtime("Create runtime", timings_);
+      TimingLogger::ScopedTiming t_dex("Writing and opening dex files", timings_);
+      rodata_.reserve(oat_writers_.size());
+      for (size_t i = 0, size = oat_writers_.size(); i != size; ++i) {
+        rodata_.push_back(elf_writers_[i]->StartRoData());
+        // Unzip or copy dex files straight to the oat file.
+        std::unique_ptr<MemMap> opened_dex_files_map;
+        std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+        if (!oat_writers_[i]->WriteAndOpenDexFiles(rodata_.back(),
+                                                   oat_files_[i].get(),
+                                                   instruction_set_,
+                                                   instruction_set_features_.get(),
+                                                   key_value_store_.get(),
+                                                   /* verify */ true,
+                                                   &opened_dex_files_map,
+                                                   &opened_dex_files)) {
+          return false;
+        }
+        dex_files_per_oat_file_.push_back(MakeNonOwningPointerVector(opened_dex_files));
+        if (opened_dex_files_map != nullptr) {
+          opened_dex_files_maps_.push_back(std::move(opened_dex_files_map));
+          for (std::unique_ptr<const DexFile>& dex_file : opened_dex_files) {
+            dex_file_oat_filename_map_.emplace(dex_file.get(), oat_filenames_[i]);
+            opened_dex_files_.push_back(std::move(dex_file));
+          }
+        } else {
+          DCHECK(opened_dex_files.empty());
+        }
+      }
+    }
+
+    dex_files_ = MakeNonOwningPointerVector(opened_dex_files_);
+    if (IsBootImage()) {
+      // For boot image, pass opened dex files to the Runtime::Create().
+      // Note: Runtime acquires ownership of these dex files.
+      runtime_options.Set(RuntimeArgumentMap::BootClassPathDexList, &opened_dex_files_);
       if (!CreateRuntime(std::move(runtime_options))) {
         return false;
       }
     }
 
-    // Runtime::Create acquired the mutator_lock_ that is normally given away when we
-    // Runtime::Start, give it away now so that we don't starve GC.
-    Thread* self = Thread::Current();
-    self->TransitionFromRunnableToSuspended(kNative);
     // If we're doing the image, override the compiler filter to force full compilation. Must be
     // done ahead of WellKnownClasses::Init that causes verification.  Note: doesn't force
     // compilation of class initializers.
     // Whilst we're in native take the opportunity to initialize well known classes.
+    Thread* self = Thread::Current();
     WellKnownClasses::Init(self->GetJniEnv());
 
     ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-    if (boot_image_filename_.empty()) {
-      dex_files_ = class_linker->GetBootClassPath();
-      // Prune invalid dex locations.
-      for (size_t i = 0; i < dex_locations_.size(); i++) {
-        const char* dex_location = dex_locations_[i];
-        bool contains = false;
-        for (const DexFile* dex_file : dex_files_) {
-          if (strcmp(dex_location, dex_file->GetLocation().c_str()) == 0) {
-            contains = true;
-            break;
-          }
-        }
-        if (!contains) {
-          dex_locations_.erase(dex_locations_.begin() + i);
-          i--;
-        }
-      }
-    } else {
-      TimingLogger::ScopedTiming t_dex("Opening dex files", timings_);
-      if (dex_filenames_.empty()) {
-        ATRACE_BEGIN("Opening zip archive from file descriptor");
-        std::string error_msg;
-        std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(zip_fd_,
-                                                                       zip_location_.c_str(),
-                                                                       &error_msg));
-        if (zip_archive.get() == nullptr) {
-          LOG(ERROR) << "Failed to open zip from file descriptor for '" << zip_location_ << "': "
-              << error_msg;
-          return false;
-        }
-        if (!DexFile::OpenFromZip(*zip_archive.get(), zip_location_, &error_msg, &opened_dex_files_)) {
-          LOG(ERROR) << "Failed to open dex from file descriptor for zip file '" << zip_location_
-              << "': " << error_msg;
-          return false;
-        }
-        for (auto& dex_file : opened_dex_files_) {
-          dex_files_.push_back(dex_file.get());
-        }
-        ATRACE_END();
-      } else {
-        size_t failure_count = OpenDexFiles(dex_filenames_, dex_locations_, &opened_dex_files_);
-        if (failure_count > 0) {
-          LOG(ERROR) << "Failed to open some dex files: " << failure_count;
-          return false;
-        }
-        for (auto& dex_file : opened_dex_files_) {
-          dex_files_.push_back(dex_file.get());
-        }
-      }
-
+    if (!IsBootImage()) {
       constexpr bool kSaveDexInput = false;
       if (kSaveDexInput) {
         SaveDexInput();
       }
+
+      // Handle and ClassLoader creation needs to come after Runtime::Create.
+      ScopedObjectAccess soa(self);
+
+      // Classpath: first the class-path given.
+      std::vector<const DexFile*> class_path_files = MakeNonOwningPointerVector(class_path_files_);
+
+      // Then the dex files we'll compile. Thus we'll resolve the class-path first.
+      class_path_files.insert(class_path_files.end(), dex_files_.begin(), dex_files_.end());
+
+      class_loader_ = class_linker->CreatePathClassLoader(self, class_path_files);
     }
-    // Ensure opened dex files are writable for dex-to-dex transformations. Also ensure that
-    // the dex caches stay live since we don't want class unloading to occur during compilation.
-    for (const auto& dex_file : dex_files_) {
-      if (!dex_file->EnableWrite()) {
-        PLOG(ERROR) << "Failed to make .dex file writeable '" << dex_file->GetLocation() << "'\n";
+
+    // Ensure opened dex files are writable for dex-to-dex transformations.
+    for (const std::unique_ptr<MemMap>& map : opened_dex_files_maps_) {
+      if (!map->Protect(PROT_READ | PROT_WRITE)) {
+        PLOG(ERROR) << "Failed to make .dex files writeable.";
+        return false;
       }
+    }
+
+    // Ensure that the dex caches stay live since we don't want class unloading
+    // to occur during compilation.
+    for (const auto& dex_file : dex_files_) {
       ScopedObjectAccess soa(self);
       dex_caches_.push_back(soa.AddLocalReference<jobject>(
           class_linker->RegisterDexFile(*dex_file, Runtime::Current()->GetLinearAlloc())));
-      dex_file->CreateTypeLookupTable();
     }
 
     /*
@@ -1348,30 +1468,9 @@
     return true;
   }
 
-  void CreateDexOatMappings() {
-    if (oat_files_.size() > 1) {
-      size_t index = 0;
-      for (size_t i = 0; i < oat_files_.size(); ++i) {
-        std::vector<const DexFile*> dex_files;
-        if (index < dex_files_.size()) {
-          dex_files.push_back(dex_files_[index]);
-          dex_file_oat_filename_map_.emplace(dex_files_[index], oat_filenames_[i]);
-          index++;
-          while (index < dex_files_.size() &&
-              (dex_files_[index]->GetBaseLocation() == dex_files_[index - 1]->GetBaseLocation())) {
-            dex_file_oat_filename_map_.emplace(dex_files_[index], oat_filenames_[i]);
-            dex_files.push_back(dex_files_[index]);
-            index++;
-          }
-        }
-        dex_files_per_oat_file_.push_back(std::move(dex_files));
-      }
-    } else {
-      dex_files_per_oat_file_.push_back(dex_files_);
-      for (const DexFile* dex_file : dex_files_) {
-        dex_file_oat_filename_map_.emplace(dex_file, oat_filenames_[0]);
-      }
-    }
+  // If we need to keep the oat file open for the image writer.
+  bool ShouldKeepOatFileOpen() const {
+    return IsImage() && oat_fd_ != kInvalidFd;
   }
 
   // Create and invoke the compiler driver. This will compile all the dex files.
@@ -1379,36 +1478,18 @@
     TimingLogger::ScopedTiming t("dex2oat Compile", timings_);
     compiler_phases_timings_.reset(new CumulativeLogger("compilation times"));
 
-    // Handle and ClassLoader creation needs to come after Runtime::Create
-    jobject class_loader = nullptr;
-    Thread* self = Thread::Current();
+    // Find the dex files we should not inline from.
 
-    if (!boot_image_filename_.empty()) {
-      ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-      OpenClassPathFiles(runtime_->GetClassPathString(), dex_files_, &class_path_files_);
-      ScopedObjectAccess soa(self);
-
-      // Classpath: first the class-path given.
-      std::vector<const DexFile*> class_path_files = MakeNonOwningPointerVector(class_path_files_);
-
-      // Store the classpath we have right now.
-      key_value_store_->Put(OatHeader::kClassPathKey,
-                            OatFile::EncodeDexFileDependencies(class_path_files));
-
-      // Then the dex files we'll compile. Thus we'll resolve the class-path first.
-      class_path_files.insert(class_path_files.end(), dex_files_.begin(), dex_files_.end());
-
-      class_loader = class_linker->CreatePathClassLoader(self, class_path_files);
-    }
-
-    // Find the dex file we should not inline from.
+    std::vector<std::string> no_inline_filters;
+    Split(no_inline_from_string_, ',', &no_inline_filters);
 
     // For now, on the host always have core-oj removed.
-    if (!kIsTargetBuild && no_inline_from_string_.empty()) {
-      no_inline_from_string_ = "core-oj";
+    const std::string core_oj = "core-oj";
+    if (!kIsTargetBuild && !ContainsElement(no_inline_filters, core_oj)) {
+      no_inline_filters.push_back(core_oj);
     }
 
-    if (!no_inline_from_string_.empty()) {
+    if (!no_inline_filters.empty()) {
       ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
       std::vector<const DexFile*> class_path_files = MakeNonOwningPointerVector(class_path_files_);
       std::vector<const std::vector<const DexFile*>*> dex_file_vectors = {
@@ -1417,78 +1498,31 @@
           &dex_files_
       };
       for (const std::vector<const DexFile*>* dex_file_vector : dex_file_vectors) {
-        if (dex_file_vector == nullptr) {
-          continue;
-        }
-
-        bool found = false;
-
         for (const DexFile* dex_file : *dex_file_vector) {
-          // Try the complete location first.
-          found = no_inline_from_string_ == dex_file->GetLocation();
-          // The try just the name.
-          if (!found) {
-            size_t last_slash = dex_file->GetLocation().rfind('/');
-            if (last_slash != std::string::npos) {
-              found = StartsWith(dex_file->GetLocation().substr(last_slash + 1),
-                                 no_inline_from_string_.c_str());
+          for (const std::string& filter : no_inline_filters) {
+            // Use dex_file->GetLocation() rather than dex_file->GetBaseLocation(). This
+            // allows tests to specify <test-dexfile>:classes2.dex if needed but if the
+            // base location passes the StartsWith() test, so do all extra locations.
+            std::string dex_location = dex_file->GetLocation();
+            if (filter.find('/') == std::string::npos) {
+              // The filter does not contain the path. Remove the path from dex_location as well.
+              size_t last_slash = dex_file->GetLocation().rfind('/');
+              if (last_slash != std::string::npos) {
+                dex_location = dex_location.substr(last_slash + 1);
+              }
+            }
+
+            if (StartsWith(dex_location, filter.c_str())) {
+              VLOG(compiler) << "Disabling inlining from " << dex_file->GetLocation();
+              no_inline_from_dex_files_.push_back(dex_file);
+              break;
             }
           }
-
-          if (found) {
-            VLOG(compiler) << "Disabling inlining from " << dex_file->GetLocation();
-            compiler_options_->no_inline_from_ = dex_file;
-            break;
-          }
-        }
-
-        if (found) {
-          break;
         }
       }
-    }
-
-    if (IsBootImage() && image_filenames_.size() > 1) {
-      // If we're compiling the boot image, store the boot classpath into the Key-Value store. If
-      // the image filename was adapted (e.g., for our tests), we need to change this here, too, but
-      // need to strip all path components (they will be re-established when loading).
-      // We need this for the multi-image case.
-      std::ostringstream bootcp_oss;
-      bool first_bootcp = true;
-      for (size_t i = 0; i < dex_locations_.size(); ++i) {
-        if (!first_bootcp) {
-          bootcp_oss << ":";
-        }
-
-        std::string dex_loc = dex_locations_[i];
-        std::string image_filename = image_filenames_[i];
-
-        // Use the dex_loc path, but the image_filename name (without path elements).
-        size_t dex_last_slash = dex_loc.rfind('/');
-
-        // npos is max(size_t). That makes this a bit ugly.
-        size_t image_last_slash = image_filename.rfind('/');
-        size_t image_last_at = image_filename.rfind('@');
-        size_t image_last_sep = (image_last_slash == std::string::npos)
-                                    ? image_last_at
-                                    : (image_last_at == std::string::npos)
-                                          ? std::string::npos
-                                          : std::max(image_last_slash, image_last_at);
-        // Note: whenever image_last_sep == npos, +1 overflow means using the full string.
-
-        if (dex_last_slash == std::string::npos) {
-          dex_loc = image_filename.substr(image_last_sep + 1);
-        } else {
-          dex_loc = dex_loc.substr(0, dex_last_slash + 1) +
-              image_filename.substr(image_last_sep + 1);
-        }
-
-        // Image filenames already end with .art, no need to replace.
-
-        bootcp_oss << dex_loc;
-        first_bootcp = false;
+      if (!no_inline_from_dex_files_.empty()) {
+        compiler_options_->no_inline_from_ = &no_inline_from_dex_files_;
       }
-      key_value_store_->Put(OatHeader::kBootClassPath, bootcp_oss.str());
     }
 
     driver_.reset(new CompilerDriver(compiler_options_.get(),
@@ -1509,7 +1543,7 @@
                                      &dex_file_oat_filename_map_,
                                      profile_compilation_info_.get()));
     driver_->SetDexFilesForOatFile(dex_files_);
-    driver_->CompileAll(class_loader, dex_files_, timings_);
+    driver_->CompileAll(class_loader_, dex_files_, timings_);
   }
 
   // Notes on the interleaving of creating the images and oat files to
@@ -1577,71 +1611,42 @@
   // ImageWriter, if necessary.
   // Note: Flushing (and closing) the file is the caller's responsibility, except for the failure
   //       case (when the file will be explicitly erased).
-  bool CreateOatFiles() {
-    CHECK(key_value_store_.get() != nullptr);
-
+  bool WriteOatFiles() {
     TimingLogger::ScopedTiming t("dex2oat Oat", timings_);
 
-    std::vector<std::unique_ptr<OatWriter>> oat_writers;
-    {
-      TimingLogger::ScopedTiming t2("dex2oat OatWriter", timings_);
-      std::string image_file_location;
-      uint32_t image_file_location_oat_checksum = 0;
-      uintptr_t image_file_location_oat_data_begin = 0;
-      int32_t image_patch_delta = 0;
-
-      if (app_image_ && image_base_ == 0) {
-        std::vector<gc::space::ImageSpace*> image_spaces =
-            Runtime::Current()->GetHeap()->GetBootImageSpaces();
-        for (gc::space::ImageSpace* image_space : image_spaces) {
-          image_base_ = std::max(image_base_, RoundUp(
-              reinterpret_cast<uintptr_t>(image_space->GetImageHeader().GetOatFileEnd()),
-              kPageSize));
-        }
-        VLOG(compiler) << "App image base=" << reinterpret_cast<void*>(image_base_);
-      }
-
-      if (IsImage()) {
-        PrepareImageWriter(image_base_);
-      }
-
-      if (!IsBootImage()) {
-        TimingLogger::ScopedTiming t3("Loading image checksum", timings_);
-        std::vector<gc::space::ImageSpace*> image_spaces =
-            Runtime::Current()->GetHeap()->GetBootImageSpaces();
-        image_file_location_oat_checksum = image_spaces[0]->GetImageHeader().GetOatChecksum();
-        image_file_location_oat_data_begin =
-            reinterpret_cast<uintptr_t>(image_spaces[0]->GetImageHeader().GetOatDataBegin());
-        image_patch_delta = image_spaces[0]->GetImageHeader().GetPatchDelta();
-        std::vector<std::string> image_filenames;
-        for (const gc::space::ImageSpace* image_space : image_spaces) {
-          image_filenames.push_back(image_space->GetImageFilename());
-        }
-        image_file_location = Join(image_filenames, ':');
-      }
-
-      if (!image_file_location.empty()) {
-        key_value_store_->Put(OatHeader::kImageLocationKey, image_file_location);
-      }
-
-      for (size_t i = 0; i < oat_files_.size(); ++i) {
-        std::vector<const DexFile*>& dex_files = dex_files_per_oat_file_[i];
-        std::unique_ptr<OatWriter> oat_writer(new OatWriter(dex_files,
-                                                            image_file_location_oat_checksum,
-                                                            image_file_location_oat_data_begin,
-                                                            image_patch_delta,
-                                                            driver_.get(),
-                                                            image_writer_.get(),
-                                                            IsBootImage(),
-                                                            timings_,
-                                                            key_value_store_.get()));
-        oat_writers.push_back(std::move(oat_writer));
+    // Sync the data to the file, in case we did dex2dex transformations.
+    for (const std::unique_ptr<MemMap>& map : opened_dex_files_maps_) {
+      if (!map->Sync()) {
+        PLOG(ERROR) << "Failed to Sync() dex2dex output. Map: " << map->GetName();
+        return false;
       }
     }
 
     if (IsImage()) {
-      // The OatWriter constructor has already updated offsets in methods and we need to
-      // prepare method offsets in the image address space for direct method patching.
+      if (app_image_ && image_base_ == 0) {
+        gc::Heap* const heap = Runtime::Current()->GetHeap();
+        for (gc::space::ImageSpace* image_space : heap->GetBootImageSpaces()) {
+          image_base_ = std::max(image_base_, RoundUp(
+              reinterpret_cast<uintptr_t>(image_space->GetImageHeader().GetOatFileEnd()),
+              kPageSize));
+        }
+        // The non moving space is right after the oat file. Put the preferred app image location
+        // right after the non moving space so that we ideally get a continuous immune region for
+        // the GC.
+        const size_t non_moving_space_capacity = heap->GetNonMovingSpace()->Capacity();
+        image_base_ += non_moving_space_capacity;
+        VLOG(compiler) << "App image base=" << reinterpret_cast<void*>(image_base_);
+      }
+
+      image_writer_.reset(new ImageWriter(*driver_,
+                                          image_base_,
+                                          compiler_options_->GetCompilePic(),
+                                          IsAppImage(),
+                                          image_storage_mode_,
+                                          oat_filenames_,
+                                          dex_file_oat_filename_map_));
+
+      // We need to prepare method offsets in the image address space for direct method patching.
       TimingLogger::ScopedTiming t2("dex2oat Prepare image address space", timings_);
       if (!image_writer_->PrepareImageAddressSpace()) {
         LOG(ERROR) << "Failed to prepare image address space.";
@@ -1651,20 +1656,22 @@
 
     {
       TimingLogger::ScopedTiming t2("dex2oat Write ELF", timings_);
-      for (size_t i = 0; i < oat_files_.size(); ++i) {
+      for (size_t i = 0, size = oat_files_.size(); i != size; ++i) {
         std::unique_ptr<File>& oat_file = oat_files_[i];
-        std::unique_ptr<OatWriter>& oat_writer = oat_writers[i];
-        std::unique_ptr<ElfWriter> elf_writer =
-            CreateElfWriterQuick(instruction_set_, compiler_options_.get(), oat_file.get());
+        std::unique_ptr<ElfWriter>& elf_writer = elf_writers_[i];
+        std::unique_ptr<OatWriter>& oat_writer = oat_writers_[i];
 
-        elf_writer->Start();
+        std::vector<const DexFile*>& dex_files = dex_files_per_oat_file_[i];
+        oat_writer->PrepareLayout(driver_.get(), image_writer_.get(), dex_files);
 
-        OutputStream* rodata = elf_writer->StartRoData();
+        OutputStream*& rodata = rodata_[i];
+        DCHECK(rodata != nullptr);
         if (!oat_writer->WriteRodata(rodata)) {
           LOG(ERROR) << "Failed to write .rodata section to the ELF file " << oat_file->GetPath();
           return false;
         }
         elf_writer->EndRoData(rodata);
+        rodata = nullptr;
 
         OutputStream* text = elf_writer->StartText();
         if (!oat_writer->WriteCode(text)) {
@@ -1673,6 +1680,14 @@
         }
         elf_writer->EndText(text);
 
+        if (!oat_writer->WriteHeader(elf_writer->GetStream(),
+                                     image_file_location_oat_checksum_,
+                                     image_file_location_oat_data_begin_,
+                                     image_patch_delta_)) {
+          LOG(ERROR) << "Failed to write oat header to the ELF file " << oat_file->GetPath();
+          return false;
+        }
+
         elf_writer->SetBssSize(oat_writer->GetBssSize());
         elf_writer->WriteDynamicSection();
         elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo());
@@ -1698,6 +1713,9 @@
         }
 
         VLOG(compiler) << "Oat file written successfully: " << oat_filenames_[i];
+
+        oat_writer.reset();
+        elf_writer.reset();
       }
     }
 
@@ -1815,17 +1833,27 @@
   }
 
   bool UseProfileGuidedCompilation() const {
-    return !profile_files_.empty();
+    return !profile_files_.empty() || !profile_files_fd_.empty();
   }
 
   bool ProcessProfiles() {
     DCHECK(UseProfileGuidedCompilation());
     ProfileCompilationInfo* info = nullptr;
-    if (ProfileAssistant::ProcessProfiles(profile_files_, reference_profile_files_, &info)) {
-      profile_compilation_info_.reset(info);
-      return true;
+    bool result = false;
+    if (profile_files_.empty()) {
+      DCHECK(!profile_files_fd_.empty());
+      result = ProfileAssistant::ProcessProfiles(
+          profile_files_fd_, reference_profile_files_fd_, &info);
+      CloseAllFds(profile_files_fd_, "profile_files_fd_");
+      CloseAllFds(reference_profile_files_fd_, "reference_profile_files_fd_");
+    } else {
+      result = ProfileAssistant::ProcessProfiles(
+          profile_files_, reference_profile_files_, &info);
     }
-    return false;
+
+    profile_compilation_info_.reset(info);
+
+    return result;
   }
 
   bool ShouldCompileBasedOnProfiles() const {
@@ -1845,65 +1873,78 @@
     return result;
   }
 
-  static size_t OpenDexFiles(std::vector<const char*>& dex_filenames,
-                             std::vector<const char*>& dex_locations,
-                             std::vector<std::unique_ptr<const DexFile>>* dex_files) {
-    DCHECK(dex_files != nullptr) << "OpenDexFiles out-param is nullptr";
-    size_t failure_count = 0;
-    for (size_t i = 0; i < dex_filenames.size(); i++) {
-      const char* dex_filename = dex_filenames[i];
-      const char* dex_location = dex_locations[i];
-      ATRACE_BEGIN(StringPrintf("Opening dex file '%s'", dex_filenames[i]).c_str());
-      std::string error_msg;
-      if (!OS::FileExists(dex_filename)) {
-        LOG(WARNING) << "Skipping non-existent dex file '" << dex_filename << "'";
-        dex_filenames.erase(dex_filenames.begin() + i);
-        dex_locations.erase(dex_locations.begin() + i);
-        i--;
-        continue;
+  std::string GetMultiImageBootClassPath() {
+    DCHECK(IsBootImage());
+    DCHECK_GT(oat_filenames_.size(), 1u);
+    // If the image filename was adapted (e.g., for our tests), we need to change this here,
+    // too, but need to strip all path components (they will be re-established when loading).
+    std::ostringstream bootcp_oss;
+    bool first_bootcp = true;
+    for (size_t i = 0; i < dex_locations_.size(); ++i) {
+      if (!first_bootcp) {
+        bootcp_oss << ":";
       }
-      if (!DexFile::Open(dex_filename, dex_location, &error_msg, dex_files)) {
-        LOG(WARNING) << "Failed to open .dex from file '" << dex_filename << "': " << error_msg;
-        ++failure_count;
+
+      std::string dex_loc = dex_locations_[i];
+      std::string image_filename = image_filenames_[i];
+
+      // Use the dex_loc path, but the image_filename name (without path elements).
+      size_t dex_last_slash = dex_loc.rfind('/');
+
+      // npos is max(size_t). That makes this a bit ugly.
+      size_t image_last_slash = image_filename.rfind('/');
+      size_t image_last_at = image_filename.rfind('@');
+      size_t image_last_sep = (image_last_slash == std::string::npos)
+                                  ? image_last_at
+                                  : (image_last_at == std::string::npos)
+                                        ? std::string::npos
+                                        : std::max(image_last_slash, image_last_at);
+      // Note: whenever image_last_sep == npos, +1 overflow means using the full string.
+
+      if (dex_last_slash == std::string::npos) {
+        dex_loc = image_filename.substr(image_last_sep + 1);
+      } else {
+        dex_loc = dex_loc.substr(0, dex_last_slash + 1) +
+            image_filename.substr(image_last_sep + 1);
       }
-      ATRACE_END();
+
+      // Image filenames already end with .art, no need to replace.
+
+      bootcp_oss << dex_loc;
+      first_bootcp = false;
     }
-    return failure_count;
+    return bootcp_oss.str();
   }
 
-  // Returns true if dex_files has a dex with the named location. We compare canonical locations,
-  // so that relative and absolute paths will match. Not caching for the dex_files isn't very
-  // efficient, but under normal circumstances the list is neither large nor is this part too
-  // sensitive.
-  static bool DexFilesContains(const std::vector<const DexFile*>& dex_files,
-                               const std::string& location) {
-    std::string canonical_location(DexFile::GetDexCanonicalLocation(location.c_str()));
-    for (size_t i = 0; i < dex_files.size(); ++i) {
-      if (DexFile::GetDexCanonicalLocation(dex_files[i]->GetLocation().c_str()) ==
-          canonical_location) {
-        return true;
-      }
+  std::vector<std::string> GetClassPathLocations(const std::string& class_path) {
+    // This function is used only for apps and for an app we have exactly one oat file.
+    DCHECK(!IsBootImage());
+    DCHECK_EQ(oat_writers_.size(), 1u);
+    std::vector<std::string> dex_files_canonical_locations;
+    for (const char* location : oat_writers_[0]->GetSourceLocations()) {
+      dex_files_canonical_locations.push_back(DexFile::GetDexCanonicalLocation(location));
     }
-    return false;
-  }
 
-  // Appends to opened_dex_files any elements of class_path that dex_files
-  // doesn't already contain. This will open those dex files as necessary.
-  static void OpenClassPathFiles(const std::string& class_path,
-                                 std::vector<const DexFile*> dex_files,
-                                 std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
-    DCHECK(opened_dex_files != nullptr) << "OpenClassPathFiles out-param is nullptr";
     std::vector<std::string> parsed;
     Split(class_path, ':', &parsed);
-    // Take Locks::mutator_lock_ so that lock ordering on the ClassLinker::dex_lock_ is maintained.
-    ScopedObjectAccess soa(Thread::Current());
-    for (size_t i = 0; i < parsed.size(); ++i) {
-      if (DexFilesContains(dex_files, parsed[i])) {
-        continue;
-      }
+    auto kept_it = std::remove_if(parsed.begin(),
+                                  parsed.end(),
+                                  [dex_files_canonical_locations](const std::string& location) {
+      return ContainsElement(dex_files_canonical_locations,
+                             DexFile::GetDexCanonicalLocation(location.c_str()));
+    });
+    parsed.erase(kept_it, parsed.end());
+    return parsed;
+  }
+
+  // Opens requested class path files and appends them to opened_dex_files.
+  static void OpenClassPathFiles(const std::vector<std::string>& class_path_locations,
+                                 std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
+    DCHECK(opened_dex_files != nullptr) << "OpenClassPathFiles out-param is nullptr";
+    for (const std::string& location : class_path_locations) {
       std::string error_msg;
-      if (!DexFile::Open(parsed[i].c_str(), parsed[i].c_str(), &error_msg, opened_dex_files)) {
-        LOG(WARNING) << "Failed to open dex file '" << parsed[i] << "': " << error_msg;
+      if (!DexFile::Open(location.c_str(), location.c_str(), &error_msg, opened_dex_files)) {
+        LOG(WARNING) << "Failed to open dex file '" << location << "': " << error_msg;
       }
     }
   }
@@ -1978,6 +2019,63 @@
     return true;
   }
 
+  void PruneNonExistentDexFiles() {
+    DCHECK_EQ(dex_filenames_.size(), dex_locations_.size());
+    size_t kept = 0u;
+    for (size_t i = 0, size = dex_filenames_.size(); i != size; ++i) {
+      if (!OS::FileExists(dex_filenames_[i])) {
+        LOG(WARNING) << "Skipping non-existent dex file '" << dex_filenames_[i] << "'";
+      } else {
+        dex_filenames_[kept] = dex_filenames_[i];
+        dex_locations_[kept] = dex_locations_[i];
+        ++kept;
+      }
+    }
+    dex_filenames_.resize(kept);
+    dex_locations_.resize(kept);
+  }
+
+  bool AddDexFileSources() {
+    TimingLogger::ScopedTiming t2("AddDexFileSources", timings_);
+    if (zip_fd_ != -1) {
+      DCHECK_EQ(oat_writers_.size(), 1u);
+      if (!oat_writers_[0]->AddZippedDexFilesSource(ScopedFd(zip_fd_), zip_location_.c_str())) {
+        return false;
+      }
+    } else if (oat_writers_.size() > 1u) {
+      // Multi-image.
+      DCHECK_EQ(oat_writers_.size(), dex_filenames_.size());
+      DCHECK_EQ(oat_writers_.size(), dex_locations_.size());
+      for (size_t i = 0, size = oat_writers_.size(); i != size; ++i) {
+        if (!oat_writers_[i]->AddDexFileSource(dex_filenames_[i], dex_locations_[i])) {
+          return false;
+        }
+      }
+    } else {
+      DCHECK_EQ(oat_writers_.size(), 1u);
+      DCHECK_EQ(dex_filenames_.size(), dex_locations_.size());
+      DCHECK_NE(dex_filenames_.size(), 0u);
+      for (size_t i = 0; i != dex_filenames_.size(); ++i) {
+        if (!oat_writers_[0]->AddDexFileSource(dex_filenames_[i], dex_locations_[i])) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  void CreateOatWriters() {
+    TimingLogger::ScopedTiming t2("CreateOatWriters", timings_);
+    elf_writers_.reserve(oat_files_.size());
+    oat_writers_.reserve(oat_files_.size());
+    for (const std::unique_ptr<File>& oat_file : oat_files_) {
+      elf_writers_.emplace_back(
+          CreateElfWriterQuick(instruction_set_, compiler_options_.get(), oat_file.get()));
+      elf_writers_.back()->Start();
+      oat_writers_.emplace_back(new OatWriter(IsBootImage(), timings_));
+    }
+  }
+
   void SaveDexInput() {
     for (size_t i = 0; i < dex_files_.size(); ++i) {
       const DexFile* dex_file = dex_files_[i];
@@ -2028,6 +2126,25 @@
     // Disable libsigchain. We don't don't need it during compilation and it prevents us
     // from getting a statically linked version of dex2oat (because of dlsym and RTLD_NEXT).
     raw_options.push_back(std::make_pair("-Xno-sig-chain", nullptr));
+    // Disable Hspace compaction to save heap size virtual space.
+    // Only need disable Hspace for OOM becasue background collector is equal to
+    // foreground collector by default for dex2oat.
+    raw_options.push_back(std::make_pair("-XX:DisableHSpaceCompactForOOM", nullptr));
+
+    // If we're asked to be deterministic, ensure non-concurrent GC for determinism. Also
+    // force the free-list implementation for large objects.
+    if (compiler_options_->IsForceDeterminism()) {
+      raw_options.push_back(std::make_pair("-Xgc:nonconcurrent", nullptr));
+      raw_options.push_back(std::make_pair("-XX:LargeObjectSpace=freelist", nullptr));
+
+      // We also need to turn off the nonmoving space. For that, we need to disable HSpace
+      // compaction (done above) and ensure that neither foreground nor background collectors
+      // are concurrent.
+      raw_options.push_back(std::make_pair("-XX:BackgroundGC=nonconcurrent", nullptr));
+
+      // To make identity hashcode deterministic, set a known seed.
+      mirror::Object::SetHashCodeSeed(987654321U);
+    }
 
     if (!Runtime::ParseOptions(raw_options, false, runtime_options)) {
       LOG(ERROR) << "Failed to parse runtime options";
@@ -2037,8 +2154,8 @@
   }
 
   // Create a runtime necessary for compilation.
-  bool CreateRuntime(RuntimeArgumentMap&& runtime_options)
-      SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_) {
+  bool CreateRuntime(RuntimeArgumentMap&& runtime_options) {
+    TimingLogger::ScopedTiming t_runtime("Create runtime", timings_);
     if (!Runtime::Create(std::move(runtime_options))) {
       LOG(ERROR) << "Failed to create runtime";
       return false;
@@ -2059,18 +2176,12 @@
 
     runtime_->GetClassLinker()->RunRootClinits();
 
-    return true;
-  }
+    // Runtime::Create acquired the mutator_lock_ that is normally given away when we
+    // Runtime::Start, give it away now so that we don't starve GC.
+    Thread* self = Thread::Current();
+    self->TransitionFromRunnableToSuspended(kNative);
 
-  void PrepareImageWriter(uintptr_t image_base) {
-    DCHECK(IsImage());
-    image_writer_.reset(new ImageWriter(*driver_,
-                                        image_base,
-                                        compiler_options_->GetCompilePic(),
-                                        IsAppImage(),
-                                        image_storage_mode_,
-                                        oat_filenames_,
-                                        dex_file_oat_filename_map_));
+    return true;
   }
 
   // Let the ImageWriter write the image files. If we do not compile PIC, also fix up the oat files.
@@ -2078,9 +2189,14 @@
       REQUIRES(!Locks::mutator_lock_) {
     CHECK(image_writer_ != nullptr);
     if (!IsBootImage()) {
+      CHECK(image_filenames_.empty());
       image_filenames_.push_back(app_image_file_name_.c_str());
     }
-    if (!image_writer_->Write(app_image_fd_, image_filenames_, oat_filenames_)) {
+    if (!image_writer_->Write(app_image_fd_,
+                              image_filenames_,
+                              oat_fd_,
+                              oat_filenames_,
+                              oat_location_)) {
       LOG(ERROR) << "Failure during image file creation";
       return false;
     }
@@ -2249,6 +2365,9 @@
   InstructionSet instruction_set_;
   std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
 
+  uint32_t image_file_location_oat_checksum_;
+  uintptr_t image_file_location_oat_data_begin_;
+  int32_t image_patch_delta_;
   std::unique_ptr<SafeMap<std::string, std::string> > key_value_store_;
 
   std::unique_ptr<VerificationResults> verification_results_;
@@ -2256,11 +2375,11 @@
   DexFileToMethodInlinerMap method_inliner_map_;
   std::unique_ptr<QuickCompilerCallbacks> callbacks_;
 
+  std::unique_ptr<Runtime> runtime_;
+
   // Ownership for the class path files.
   std::vector<std::unique_ptr<const DexFile>> class_path_files_;
 
-  std::unique_ptr<Runtime> runtime_;
-
   size_t thread_count_;
   uint64_t start_ns_;
   std::unique_ptr<WatchDog> watchdog_;
@@ -2295,11 +2414,19 @@
   std::vector<const DexFile*> dex_files_;
   std::string no_inline_from_string_;
   std::vector<jobject> dex_caches_;
-  std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
+  jobject class_loader_;
 
+  std::vector<std::unique_ptr<ElfWriter>> elf_writers_;
+  std::vector<std::unique_ptr<OatWriter>> oat_writers_;
+  std::vector<OutputStream*> rodata_;
   std::unique_ptr<ImageWriter> image_writer_;
   std::unique_ptr<CompilerDriver> driver_;
 
+  std::vector<std::unique_ptr<MemMap>> opened_dex_files_maps_;
+  std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
+
+  std::vector<const DexFile*> no_inline_from_dex_files_;
+
   std::vector<std::string> verbose_methods_;
   bool dump_stats_;
   bool dump_passes_;
@@ -2311,6 +2438,8 @@
   int app_image_fd_;
   std::vector<std::string> profile_files_;
   std::vector<std::string> reference_profile_files_;
+  std::vector<uint32_t> profile_files_fd_;
+  std::vector<uint32_t> reference_profile_files_fd_;
   std::unique_ptr<ProfileCompilationInfo> profile_compilation_info_;
   TimingLogger* timings_;
   std::unique_ptr<CumulativeLogger> compiler_phases_timings_;
@@ -2320,6 +2449,9 @@
   // Backing storage.
   std::vector<std::string> char_backing_storage_;
 
+  // See CompilerOptions.force_determinism_.
+  bool force_determinism_;
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(Dex2Oat);
 };
 
@@ -2346,14 +2478,19 @@
 static int CompileImage(Dex2Oat& dex2oat) {
   dex2oat.Compile();
 
-  if (!dex2oat.CreateOatFiles()) {
+  if (!dex2oat.WriteOatFiles()) {
     dex2oat.EraseOatFiles();
     return EXIT_FAILURE;
   }
 
-  // Close the image oat files. We always expect the output file by name, and it will be
-  // re-opened from the unstripped name. Note: it's easier to *flush* and close...
-  if (!dex2oat.FlushCloseOatFiles()) {
+  // Flush boot.oat. We always expect the output file by name, and it will be re-opened from the
+  // unstripped name. Do not close the file if we are compiling the image with an oat fd since the
+  // image writer will require this fd to generate the image.
+  if (dex2oat.ShouldKeepOatFileOpen()) {
+    if (!dex2oat.FlushOatFiles()) {
+      return EXIT_FAILURE;
+    }
+  } else if (!dex2oat.FlushCloseOatFiles()) {
     return EXIT_FAILURE;
   }
 
@@ -2385,7 +2522,7 @@
 static int CompileApp(Dex2Oat& dex2oat) {
   dex2oat.Compile();
 
-  if (!dex2oat.CreateOatFiles()) {
+  if (!dex2oat.WriteOatFiles()) {
     dex2oat.EraseOatFiles();
     return EXIT_FAILURE;
   }
@@ -2442,6 +2579,11 @@
     }
   }
 
+  // Check early that the result of compilation can be written
+  if (!dex2oat.OpenFile()) {
+    return EXIT_FAILURE;
+  }
+
   // Print the complete line when any of the following is true:
   //   1) Debug build
   //   2) Compiling an image
@@ -2455,11 +2597,6 @@
   }
 
   if (!dex2oat.Setup()) {
-    return EXIT_FAILURE;
-  }
-
-  // Check early that the result of compilation can be written
-  if (!dex2oat.OpenFile()) {
     dex2oat.EraseOatFiles();
     return EXIT_FAILURE;
   }
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index d4bef0f..1f74c93 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -938,6 +938,11 @@
         has_modrm = true;
         load = true;
         break;
+      case 0xB8:
+        opcode1 = "popcnt";
+        has_modrm = true;
+        load = true;
+        break;
       case 0xBE:
         opcode1 = "movsxb";
         has_modrm = true;
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 7b9ce5b..af08fc4 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -40,7 +40,7 @@
 #include "gc/space/image_space.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
-#include "image.h"
+#include "image-inl.h"
 #include "indenter.h"
 #include "linker/buffered_output_stream.h"
 #include "linker/file_output_stream.h"
@@ -142,9 +142,7 @@
     text->End();
 
     if (oat_file_->BssSize() != 0) {
-      bss->Start();
-      bss->SetSize(oat_file_->BssSize());
-      bss->End();
+      bss->WriteNoBitsSection(oat_file_->BssSize());
     }
 
     builder_->WriteDynamicSection(elf_file->GetPath());
@@ -155,6 +153,7 @@
 
     strtab->Start();
     strtab->Write("");  // strtab should start with empty string.
+    AddTrampolineSymbols();
     Walk(&art::OatSymbolizer::AddSymbol);
     strtab->End();
 
@@ -167,6 +166,35 @@
     return builder_->Good();
   }
 
+  void AddTrampolineSymbol(const char* name, uint32_t code_offset) {
+    if (code_offset != 0) {
+      uint32_t name_offset = builder_->GetStrTab()->Write(name);
+      uint64_t symbol_value = code_offset - oat_file_->GetOatHeader().GetExecutableOffset();
+      // Specifying 0 as the symbol size means that the symbol lasts until the next symbol or until
+      // the end of the section in case of the last symbol.
+      builder_->GetSymTab()->Add(name_offset, builder_->GetText(), symbol_value,
+          /* is_relative */ true, /* size */ 0, STB_GLOBAL, STT_FUNC);
+    }
+  }
+
+  void AddTrampolineSymbols() {
+    const OatHeader& oat_header = oat_file_->GetOatHeader();
+    AddTrampolineSymbol("interpreterToInterpreterBridge",
+                        oat_header.GetInterpreterToInterpreterBridgeOffset());
+    AddTrampolineSymbol("interpreterToCompiledCodeBridge",
+                        oat_header.GetInterpreterToCompiledCodeBridgeOffset());
+    AddTrampolineSymbol("jniDlsymLookup",
+                        oat_header.GetJniDlsymLookupOffset());
+    AddTrampolineSymbol("quickGenericJniTrampoline",
+                        oat_header.GetQuickGenericJniTrampolineOffset());
+    AddTrampolineSymbol("quickImtConflictTrampoline",
+                        oat_header.GetQuickImtConflictTrampolineOffset());
+    AddTrampolineSymbol("quickResolutionTrampoline",
+                        oat_header.GetQuickResolutionTrampolineOffset());
+    AddTrampolineSymbol("quickToInterpreterBridge",
+                        oat_header.GetQuickToInterpreterBridgeOffset());
+  }
+
   void Walk(Callback callback) {
     std::vector<const OatFile::OatDexFile*> oat_dex_files = oat_file_->GetOatDexFiles();
     for (size_t i = 0; i < oat_dex_files.size(); i++) {
@@ -1656,7 +1684,7 @@
       stats_.file_bytes = file->GetLength();
     }
     size_t header_bytes = sizeof(ImageHeader);
-    const auto& bitmap_section = image_header_.GetImageSection(ImageHeader::kSectionImageBitmap);
+    const auto& object_section = image_header_.GetImageSection(ImageHeader::kSectionObjects);
     const auto& field_section = image_header_.GetImageSection(ImageHeader::kSectionArtFields);
     const auto& method_section = image_header_.GetMethodsSection();
     const auto& dex_cache_arrays_section = image_header_.GetImageSection(
@@ -1665,17 +1693,46 @@
         ImageHeader::kSectionInternedStrings);
     const auto& class_table_section = image_header_.GetImageSection(
         ImageHeader::kSectionClassTable);
+    const auto& bitmap_section = image_header_.GetImageSection(ImageHeader::kSectionImageBitmap);
+
     stats_.header_bytes = header_bytes;
-    stats_.alignment_bytes += RoundUp(header_bytes, kObjectAlignment) - header_bytes;
-    // Add padding between the field and method section.
-    // (Field section is 4-byte aligned, method section is 8-byte aligned on 64-bit targets.)
-    stats_.alignment_bytes += method_section.Offset() -
-        (field_section.Offset() + field_section.Size());
-    // Add padding between the dex cache arrays section and the intern table. (Dex cache
-    // arrays section is 4-byte aligned on 32-bit targets, intern table is 8-byte aligned.)
-    stats_.alignment_bytes += intern_section.Offset() -
-        (dex_cache_arrays_section.Offset() + dex_cache_arrays_section.Size());
-    stats_.alignment_bytes += bitmap_section.Offset() - image_header_.GetImageSize();
+
+    // Objects are kObjectAlignment-aligned.
+    // CHECK_EQ(RoundUp(header_bytes, kObjectAlignment), object_section.Offset());
+    if (object_section.Offset() > header_bytes) {
+      stats_.alignment_bytes += object_section.Offset() - header_bytes;
+    }
+
+    // Field section is 4-byte aligned.
+    constexpr size_t kFieldSectionAlignment = 4U;
+    uint32_t end_objects = object_section.Offset() + object_section.Size();
+    CHECK_EQ(RoundUp(end_objects, kFieldSectionAlignment), field_section.Offset());
+    stats_.alignment_bytes += field_section.Offset() - end_objects;
+
+    // Method section is 4/8 byte aligned depending on target. Just check for 4-byte alignment.
+    uint32_t end_fields = field_section.Offset() + field_section.Size();
+    CHECK_ALIGNED(method_section.Offset(), 4);
+    stats_.alignment_bytes += method_section.Offset() - end_fields;
+
+    // Dex cache arrays section is aligned depending on the target. Just check for 4-byte alignment.
+    uint32_t end_methods = method_section.Offset() + method_section.Size();
+    CHECK_ALIGNED(dex_cache_arrays_section.Offset(), 4);
+    stats_.alignment_bytes += dex_cache_arrays_section.Offset() - end_methods;
+
+    // Intern table is 8-byte aligned.
+    uint32_t end_caches = dex_cache_arrays_section.Offset() + dex_cache_arrays_section.Size();
+    CHECK_EQ(RoundUp(end_caches, 8U), intern_section.Offset());
+    stats_.alignment_bytes += intern_section.Offset() - end_caches;
+
+    // Add space between intern table and class table.
+    uint32_t end_intern = intern_section.Offset() + intern_section.Size();
+    stats_.alignment_bytes += class_table_section.Offset() - end_intern;
+
+    // Add space between class table and bitmap. Expect the bitmap to be page-aligned.
+    uint32_t end_ctable = class_table_section.Offset() + class_table_section.Size();
+    CHECK_ALIGNED(bitmap_section.Offset(), kPageSize);
+    stats_.alignment_bytes += bitmap_section.Offset() - end_ctable;
+
     stats_.bitmap_bytes += bitmap_section.Size();
     stats_.art_field_bytes += field_section.Size();
     stats_.art_method_bytes += method_section.Size();
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index d836532..1668dc5 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -35,7 +35,7 @@
 #include "elf_file.h"
 #include "elf_file_impl.h"
 #include "gc/space/image_space.h"
-#include "image.h"
+#include "image-inl.h"
 #include "mirror/abstract_method.h"
 #include "mirror/object-inl.h"
 #include "mirror/method.h"
@@ -547,6 +547,9 @@
 
 void PatchOat::PatchClassTable(const ImageHeader* image_header) {
   const auto& section = image_header->GetImageSection(ImageHeader::kSectionClassTable);
+  if (section.Size() == 0) {
+    return;
+  }
   // Note that we require that ReadFromMemory does not make an internal copy of the elements.
   // This also relies on visit roots not doing any verification which could fail after we update
   // the roots to be the image addresses.
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 04645d1..7bf6d21 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -274,7 +274,6 @@
   arch/arm64/fault_handler_arm64.cc
 
 LIBART_SRC_FILES_x86 := \
-  interpreter/mterp/mterp_stub.cc \
   arch/x86/context_x86.cc \
   arch/x86/entrypoints_init_x86.cc \
   arch/x86/jni_entrypoints_x86.S \
@@ -286,6 +285,20 @@
 LIBART_TARGET_SRC_FILES_x86 := \
   $(LIBART_SRC_FILES_x86)
 
+# Darwin uses non-standard x86 assembly syntax.  Don't build x86 Darwin host mterp there.
+ifeq ($(HOST_OS),darwin)
+  LIBART_SRC_FILES_x86 += \
+    interpreter/mterp/mterp_stub.cc
+else
+  LIBART_SRC_FILES_x86 += \
+    interpreter/mterp/mterp.cc \
+    interpreter/mterp/out/mterp_x86.S
+endif
+# But do support x86 mterp for target build regardless of host platform.
+LIBART_TARGET_SRC_FILES_x86 += \
+  interpreter/mterp/mterp.cc \
+  interpreter/mterp/out/mterp_x86.S
+
 # Note that the fault_handler_x86.cc is not a mistake.  This file is
 # shared between the x86 and x86_64 architectures.
 LIBART_SRC_FILES_x86_64 := \
diff --git a/runtime/arch/x86/instruction_set_features_x86.cc b/runtime/arch/x86/instruction_set_features_x86.cc
index 42f5df4..b97a8db 100644
--- a/runtime/arch/x86/instruction_set_features_x86.cc
+++ b/runtime/arch/x86/instruction_set_features_x86.cc
@@ -50,6 +50,10 @@
     "silvermont",
 };
 
+static constexpr const char* x86_variants_with_popcnt[] = {
+    "silvermont",
+};
+
 const X86InstructionSetFeatures* X86InstructionSetFeatures::FromVariant(
     const std::string& variant, std::string* error_msg ATTRIBUTE_UNUSED,
     bool x86_64) {
@@ -69,6 +73,11 @@
                                                arraysize(x86_variants_prefer_locked_add_sync),
                                                variant);
 
+  bool has_POPCNT = FindVariantInArray(x86_variants_with_popcnt,
+                                       arraysize(x86_variants_with_popcnt),
+                                       variant);
+
+  // Verify that variant is known.
   bool known_variant = FindVariantInArray(x86_known_variants, arraysize(x86_known_variants),
                                           variant);
   if (!known_variant && variant != "default") {
@@ -77,10 +86,10 @@
 
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add);
+                                            has_AVX2, prefers_locked_add, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add);
+                                            has_AVX2, prefers_locked_add, has_POPCNT);
   }
 }
 
@@ -93,12 +102,15 @@
   bool has_AVX = (bitmap & kAvxBitfield) != 0;
   bool has_AVX2 = (bitmap & kAvxBitfield) != 0;
   bool prefers_locked_add = (bitmap & kPrefersLockedAdd) != 0;
+  bool has_POPCNT = (bitmap & kPopCntBitfield) != 0;
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
-                                                has_AVX, has_AVX2, prefers_locked_add);
+                                            has_AVX, has_AVX2, prefers_locked_add,
+                                            has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
-                                             has_AVX, has_AVX2, prefers_locked_add);
+                                         has_AVX, has_AVX2, prefers_locked_add,
+                                         has_POPCNT);
   }
 }
 
@@ -138,12 +150,18 @@
   // No #define for memory synchronization preference.
   const bool prefers_locked_add = false;
 
+#ifndef __POPCNT__
+  const bool has_POPCNT = false;
+#else
+  const bool has_POPCNT = true;
+#endif
+
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                                has_AVX2, prefers_locked_add);
+                                            has_AVX2, prefers_locked_add, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add);
+                                         has_AVX2, prefers_locked_add, has_POPCNT);
   }
 }
 
@@ -158,6 +176,7 @@
   bool has_AVX2 = false;
   // No cpuinfo for memory synchronization preference.
   const bool prefers_locked_add = false;
+  bool has_POPCNT = false;
 
   std::ifstream in("/proc/cpuinfo");
   if (!in.fail()) {
@@ -183,6 +202,9 @@
           if (line.find("avx2") != std::string::npos) {
             has_AVX2 = true;
           }
+          if (line.find("popcnt") != std::string::npos) {
+            has_POPCNT = true;
+          }
         } else if (line.find("processor") != std::string::npos &&
             line.find(": 1") != std::string::npos) {
           smp = true;
@@ -195,10 +217,10 @@
   }
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                                has_AVX2, prefers_locked_add);
+                                            has_AVX2, prefers_locked_add, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add);
+                                         has_AVX2, prefers_locked_add, has_POPCNT);
   }
 }
 
@@ -223,7 +245,8 @@
       (has_SSE4_2_ == other_as_x86->has_SSE4_2_) &&
       (has_AVX_ == other_as_x86->has_AVX_) &&
       (has_AVX2_ == other_as_x86->has_AVX2_) &&
-      (prefers_locked_add_ == other_as_x86->prefers_locked_add_);
+      (prefers_locked_add_ == other_as_x86->prefers_locked_add_) &&
+      (has_POPCNT_ == other_as_x86->has_POPCNT_);
 }
 
 uint32_t X86InstructionSetFeatures::AsBitmap() const {
@@ -233,7 +256,8 @@
       (has_SSE4_2_ ? kSse4_2Bitfield : 0) |
       (has_AVX_ ? kAvxBitfield : 0) |
       (has_AVX2_ ? kAvx2Bitfield : 0) |
-      (prefers_locked_add_ ? kPrefersLockedAdd : 0);
+      (prefers_locked_add_ ? kPrefersLockedAdd : 0) |
+      (has_POPCNT_ ? kPopCntBitfield : 0);
 }
 
 std::string X86InstructionSetFeatures::GetFeatureString() const {
@@ -273,6 +297,11 @@
   } else {
     result += ",-lock_add";
   }
+  if (has_POPCNT_) {
+    result += ",popcnt";
+  } else {
+    result += ",-popcnt";
+  }
   return result;
 }
 
@@ -285,6 +314,7 @@
   bool has_AVX = has_AVX_;
   bool has_AVX2 = has_AVX2_;
   bool prefers_locked_add = prefers_locked_add_;
+  bool has_POPCNT = has_POPCNT_;
   for (auto i = features.begin(); i != features.end(); i++) {
     std::string feature = Trim(*i);
     if (feature == "ssse3") {
@@ -311,6 +341,10 @@
       prefers_locked_add = true;
     } else if (feature == "-lock_add") {
       prefers_locked_add = false;
+    } else if (feature == "popcnt") {
+      has_POPCNT = true;
+    } else if (feature == "-popcnt") {
+      has_POPCNT = false;
     } else {
       *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str());
       return nullptr;
@@ -318,10 +352,10 @@
   }
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                                has_AVX2, prefers_locked_add);
+                                            has_AVX2, prefers_locked_add, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add);
+                                         has_AVX2, prefers_locked_add, has_POPCNT);
   }
 }
 
diff --git a/runtime/arch/x86/instruction_set_features_x86.h b/runtime/arch/x86/instruction_set_features_x86.h
index 2b845f8..1819654 100644
--- a/runtime/arch/x86/instruction_set_features_x86.h
+++ b/runtime/arch/x86/instruction_set_features_x86.h
@@ -62,6 +62,8 @@
 
   bool PrefersLockedAddSynchronization() const { return prefers_locked_add_; }
 
+  bool HasPopCnt() const { return has_POPCNT_; }
+
  protected:
   // Parse a string of the form "ssse3" adding these to a new InstructionSetFeatures.
   virtual const InstructionSetFeatures*
@@ -75,10 +77,17 @@
                                  bool x86_64, std::string* error_msg) const;
 
   X86InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
-                            bool has_AVX, bool has_AVX2, bool prefers_locked_add)
-      : InstructionSetFeatures(smp), has_SSSE3_(has_SSSE3), has_SSE4_1_(has_SSE4_1),
-        has_SSE4_2_(has_SSE4_2), has_AVX_(has_AVX), has_AVX2_(has_AVX2),
-        prefers_locked_add_(prefers_locked_add) {
+                            bool has_AVX, bool has_AVX2,
+                            bool prefers_locked_add,
+                            bool has_POPCNT)
+      : InstructionSetFeatures(smp),
+        has_SSSE3_(has_SSSE3),
+        has_SSE4_1_(has_SSE4_1),
+        has_SSE4_2_(has_SSE4_2),
+        has_AVX_(has_AVX),
+        has_AVX2_(has_AVX2),
+        prefers_locked_add_(prefers_locked_add),
+        has_POPCNT_(has_POPCNT) {
   }
 
  private:
@@ -91,6 +100,7 @@
     kAvxBitfield = 16,
     kAvx2Bitfield = 32,
     kPrefersLockedAdd = 64,
+    kPopCntBitfield = 128,
   };
 
   const bool has_SSSE3_;   // x86 128bit SIMD - Supplemental SSE.
@@ -99,6 +109,7 @@
   const bool has_AVX_;     // x86 256bit SIMD AVX.
   const bool has_AVX2_;    // x86 256bit SIMD AVX 2.0.
   const bool prefers_locked_add_;  // x86 use locked add for memory synchronization.
+  const bool has_POPCNT_;  // x86 population count
 
   DISALLOW_COPY_AND_ASSIGN(X86InstructionSetFeatures);
 };
diff --git a/runtime/arch/x86/instruction_set_features_x86_test.cc b/runtime/arch/x86/instruction_set_features_x86_test.cc
index e8d01e6..a062c12 100644
--- a/runtime/arch/x86/instruction_set_features_x86_test.cc
+++ b/runtime/arch/x86/instruction_set_features_x86_test.cc
@@ -27,7 +27,7 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
                x86_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_features->AsBitmap(), 1U);
 }
@@ -40,7 +40,7 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add",
+  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add,-popcnt",
                x86_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_features->AsBitmap(), 67U);
 
@@ -50,7 +50,7 @@
   ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
                x86_default_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
 
@@ -60,7 +60,7 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add",
+  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add,-popcnt",
                x86_64_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_64_features->AsBitmap(), 67U);
 
@@ -77,9 +77,9 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add",
+  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add,popcnt",
                x86_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_features->AsBitmap(), 79U);
+  EXPECT_EQ(x86_features->AsBitmap(), 207U);
 
   // Build features for a 32-bit x86 default processor.
   std::unique_ptr<const InstructionSetFeatures> x86_default_features(
@@ -87,7 +87,7 @@
   ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
                x86_default_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
 
@@ -97,9 +97,9 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add",
+  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add,popcnt",
                x86_64_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_64_features->AsBitmap(), 79U);
+  EXPECT_EQ(x86_64_features->AsBitmap(), 207U);
 
   EXPECT_FALSE(x86_64_features->Equals(x86_features.get()));
   EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get()));
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64.h b/runtime/arch/x86_64/instruction_set_features_x86_64.h
index b8000d0..aba7234 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64.h
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64.h
@@ -74,9 +74,10 @@
 
  private:
   X86_64InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
-                               bool has_AVX, bool has_AVX2, bool prefers_locked_add)
+                               bool has_AVX, bool has_AVX2, bool prefers_locked_add,
+                               bool has_POPCNT)
       : X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                  has_AVX2, prefers_locked_add) {
+                                  has_AVX2, prefers_locked_add, has_POPCNT) {
   }
 
   friend class X86InstructionSetFeatures;
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
index 4562c64..78aeacf 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
@@ -27,7 +27,7 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
                x86_64_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_64_features->AsBitmap(), 1U);
 }
diff --git a/runtime/art_field-inl.h b/runtime/art_field-inl.h
index 4166e22..3463b0d 100644
--- a/runtime/art_field-inl.h
+++ b/runtime/art_field-inl.h
@@ -37,7 +37,7 @@
   GcRootSource gc_root_source(this);
   mirror::Class* result = declaring_class_.Read(&gc_root_source);
   DCHECK(result != nullptr);
-  DCHECK(result->IsLoaded() || result->IsErroneous());
+  DCHECK(result->IsLoaded() || result->IsErroneous()) << result->GetStatus();
   return result;
 }
 
@@ -334,6 +334,15 @@
   visitor.VisitRoot(declaring_class_.AddressWithoutBarrier());
 }
 
+template <typename Visitor>
+inline void ArtField::UpdateObjects(const Visitor& visitor) {
+  mirror::Class* old_class = DeclaringClassRoot().Read<kWithoutReadBarrier>();
+  mirror::Class* new_class = visitor(old_class);
+  if (old_class != new_class) {
+    SetDeclaringClass(new_class);
+  }
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_ART_FIELD_INL_H_
diff --git a/runtime/art_field.h b/runtime/art_field.h
index a943a34..ee1ba1f 100644
--- a/runtime/art_field.h
+++ b/runtime/art_field.h
@@ -190,6 +190,11 @@
     return declaring_class_;
   }
 
+  // Update the declaring class with the passed in visitor. Does not use read barrier.
+  template <typename Visitor>
+  ALWAYS_INLINE void UpdateObjects(const Visitor& visitor)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   mirror::Class* ProxyFindSystemClass(const char* descriptor)
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index a5f5c49..74eb722 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -467,6 +467,43 @@
   }
 }
 
+template <typename Visitor>
+inline void ArtMethod::UpdateObjectsForImageRelocation(const Visitor& visitor) {
+  mirror::Class* old_class = GetDeclaringClassNoBarrier();
+  mirror::Class* new_class = visitor(old_class);
+  if (old_class != new_class) {
+    SetDeclaringClass(new_class);
+  }
+  ArtMethod** old_methods = GetDexCacheResolvedMethods(sizeof(void*));
+  ArtMethod** new_methods = visitor(old_methods);
+  if (old_methods != new_methods) {
+    SetDexCacheResolvedMethods(new_methods, sizeof(void*));
+  }
+  GcRoot<mirror::Class>* old_types = GetDexCacheResolvedTypes(sizeof(void*));
+  GcRoot<mirror::Class>* new_types = visitor(old_types);
+  if (old_types != new_types) {
+    SetDexCacheResolvedTypes(new_types, sizeof(void*));
+  }
+}
+
+template <typename Visitor>
+inline void ArtMethod::UpdateEntrypoints(const Visitor& visitor) {
+  if (IsNative()) {
+    const void* old_native_code = GetEntryPointFromJni();
+    const void* new_native_code = visitor(old_native_code);
+    if (old_native_code != new_native_code) {
+      SetEntryPointFromJni(new_native_code);
+    }
+  } else {
+    DCHECK(GetEntryPointFromJni() == nullptr);
+  }
+  const void* old_code = GetEntryPointFromQuickCompiledCode();
+  const void* new_code = visitor(old_code);
+  if (old_code != new_code) {
+    SetEntryPointFromQuickCompiledCode(new_code);
+  }
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_ART_METHOD_INL_H_
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 0be2fa2..440e796 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -477,6 +477,17 @@
   // Returns whether the method has any compiled code, JIT or AOT.
   bool HasAnyCompiledCode() SHARED_REQUIRES(Locks::mutator_lock_);
 
+
+  // Update heap objects and non-entrypoint pointers by the passed in visitor for image relocation.
+  // Does not use read barrier.
+  template <typename Visitor>
+  ALWAYS_INLINE void UpdateObjectsForImageRelocation(const Visitor& visitor)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Update entry points by passing them through the visitor.
+  template <typename Visitor>
+  ALWAYS_INLINE void UpdateEntrypoints(const Visitor& visitor);
+
  protected:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   // The class we are a part of.
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index 115c260..de46b0c 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -53,6 +53,7 @@
   bool third_party_jni;  // Enabled with "-verbose:third-party-jni".
   bool threads;
   bool verifier;
+  bool image;
 };
 
 // Global log verbosity setting, initialized by InitLogging.
diff --git a/runtime/base/scoped_flock.cc b/runtime/base/scoped_flock.cc
index 71e0590..814cbd0 100644
--- a/runtime/base/scoped_flock.cc
+++ b/runtime/base/scoped_flock.cc
@@ -26,16 +26,25 @@
 namespace art {
 
 bool ScopedFlock::Init(const char* filename, std::string* error_msg) {
+  return Init(filename, O_CREAT | O_RDWR, true, error_msg);
+}
+
+bool ScopedFlock::Init(const char* filename, int flags, bool block, std::string* error_msg) {
   while (true) {
     if (file_.get() != nullptr) {
       UNUSED(file_->FlushCloseOrErase());  // Ignore result.
     }
-    file_.reset(OS::OpenFileWithFlags(filename, O_CREAT | O_RDWR));
+    file_.reset(OS::OpenFileWithFlags(filename, flags));
     if (file_.get() == nullptr) {
       *error_msg = StringPrintf("Failed to open file '%s': %s", filename, strerror(errno));
       return false;
     }
-    int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_EX));
+    int operation = block ? LOCK_EX : (LOCK_EX | LOCK_NB);
+    int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), operation));
+    if (flock_result == EWOULDBLOCK) {
+      // File is locked by someone else and we are required not to block;
+      return false;
+    }
     if (flock_result != 0) {
       *error_msg = StringPrintf("Failed to lock file '%s': %s", filename, strerror(errno));
       return false;
@@ -51,11 +60,23 @@
     if (stat_result != 0) {
       PLOG(WARNING) << "Failed to stat, will retry: " << filename;
       // ENOENT can happen if someone racing with us unlinks the file we created so just retry.
-      continue;
+      if (block) {
+        continue;
+      } else {
+        // Note that in theory we could race with someone here for a long time and end up retrying
+        // over and over again. This potential behavior does not fit well in the non-blocking
+        // semantics. Thus, if we are not require to block return failure when racing.
+        return false;
+      }
     }
     if (fstat_stat.st_dev != stat_stat.st_dev || fstat_stat.st_ino != stat_stat.st_ino) {
       LOG(WARNING) << "File changed while locking, will retry: " << filename;
-      continue;
+      if (block) {
+        continue;
+      } else {
+        // See comment above.
+        return false;
+      }
     }
     return true;
   }
@@ -78,7 +99,7 @@
   return true;
 }
 
-File* ScopedFlock::GetFile() {
+File* ScopedFlock::GetFile() const {
   CHECK(file_.get() != nullptr);
   return file_.get();
 }
diff --git a/runtime/base/scoped_flock.h b/runtime/base/scoped_flock.h
index 08612e3..cc22056 100644
--- a/runtime/base/scoped_flock.h
+++ b/runtime/base/scoped_flock.h
@@ -32,10 +32,15 @@
   // Attempts to acquire an exclusive file lock (see flock(2)) on the file
   // at filename, and blocks until it can do so.
   //
-  // Returns true if the lock could be acquired, or false if an error
-  // occurred. It is an error if the file does not exist, or if its inode
-  // changed (usually due to a new file being created at the same path)
-  // between attempts to lock it.
+  // Returns true if the lock could be acquired, or false if an error occurred.
+  // It is an error if its inode changed (usually due to a new file being
+  // created at the same path) between attempts to lock it. In blocking mode,
+  // locking will be retried if the file changed. In non-blocking mode, false
+  // is returned and no attempt is made to re-acquire the lock.
+  //
+  // The file is opened with the provided flags.
+  bool Init(const char* filename, int flags, bool block, std::string* error_msg);
+  // Calls Init(filename, O_CREAT | O_RDWR, true, errror_msg)
   bool Init(const char* filename, std::string* error_msg);
   // Attempt to acquire an exclusive file lock (see flock(2)) on 'file'.
   // Returns true if the lock could be acquired or false if an error
@@ -43,7 +48,7 @@
   bool Init(File* file, std::string* error_msg);
 
   // Returns the (locked) file associated with this instance.
-  File* GetFile();
+  File* GetFile() const;
 
   // Returns whether a file is held.
   bool HasFile();
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index 78bc3d5..e17bebb 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -316,4 +316,21 @@
   guard_state_ = GuardState::kNoCheck;
 }
 
+bool FdFile::ClearContent() {
+  if (SetLength(0) < 0) {
+    PLOG(art::ERROR) << "Failed to reset the length";
+    return false;
+  }
+  return ResetOffset();
+}
+
+bool FdFile::ResetOffset() {
+  off_t rc =  TEMP_FAILURE_RETRY(lseek(fd_, 0, SEEK_SET));
+  if (rc == static_cast<off_t>(-1)) {
+    PLOG(art::ERROR) << "Failed to reset the offset";
+    return false;
+  }
+  return true;
+}
+
 }  // namespace unix_file
diff --git a/runtime/base/unix_file/fd_file.h b/runtime/base/unix_file/fd_file.h
index 231a1ab..1e2d8af 100644
--- a/runtime/base/unix_file/fd_file.h
+++ b/runtime/base/unix_file/fd_file.h
@@ -79,6 +79,11 @@
 
   // Copy data from another file.
   bool Copy(FdFile* input_file, int64_t offset, int64_t size);
+  // Clears the file content and resets the file offset to 0.
+  // Returns true upon success, false otherwise.
+  bool ClearContent();
+  // Resets the file offset to the beginning of the file.
+  bool ResetOffset();
 
   // This enum is public so that we can define the << operator over it.
   enum class GuardState {
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index ed833c4..262505b 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -53,6 +53,7 @@
 #include "gc/heap.h"
 #include "gc/space/image_space.h"
 #include "handle_scope-inl.h"
+#include "image-inl.h"
 #include "intern_table.h"
 #include "interpreter/interpreter.h"
 #include "jit/jit.h"
@@ -336,6 +337,10 @@
 
   // Use the pointer size from the runtime since we are probably creating the image.
   image_pointer_size_ = InstructionSetPointerSize(runtime->GetInstructionSet());
+  if (!ValidPointerSize(image_pointer_size_)) {
+    *error_msg = StringPrintf("Invalid image pointer size: %zu", image_pointer_size_);
+    return false;
+  }
 
   // java_lang_Class comes first, it's needed for AllocClass
   // The GC can't handle an object with a null class since we can't get the size of this object.
@@ -489,7 +494,7 @@
       return false;
     }
     AppendToBootClassPath(self, *dex_file);
-    opened_dex_files_.push_back(std::move(dex_file));
+    boot_dex_files_.push_back(std::move(dex_file));
   }
 
   // now we can use FindSystemClass
@@ -878,6 +883,7 @@
   ArtMethod* m;
   bool error;
 };
+
 static void CheckTrampolines(mirror::Object* obj, void* arg) NO_THREAD_SAFETY_ANALYSIS {
   if (obj->IsClass()) {
     mirror::Class* klass = obj->AsClass();
@@ -896,8 +902,8 @@
   }
 }
 
-bool ClassLinker::InitFromImage(std::string* error_msg) {
-  VLOG(startup) << "ClassLinker::InitFromImage entering";
+bool ClassLinker::InitFromBootImage(std::string* error_msg) {
+  VLOG(startup) << __FUNCTION__ << " entering";
   CHECK(!init_done_);
 
   Runtime* const runtime = Runtime::Current();
@@ -906,6 +912,21 @@
   std::vector<gc::space::ImageSpace*> spaces = heap->GetBootImageSpaces();
   CHECK(!spaces.empty());
   image_pointer_size_ = spaces[0]->GetImageHeader().GetPointerSize();
+  if (!ValidPointerSize(image_pointer_size_)) {
+    *error_msg = StringPrintf("Invalid image pointer size: %zu", image_pointer_size_);
+    return false;
+  }
+  if (!runtime->IsAotCompiler()) {
+    // Only the Aot compiler supports having an image with a different pointer size than the
+    // runtime. This happens on the host for compiling 32 bit tests since we use a 64 bit libart
+    // compiler. We may also use 32 bit dex2oat on a system with 64 bit apps.
+    if (image_pointer_size_ != sizeof(void*)) {
+      *error_msg = StringPrintf("Runtime must use current image pointer size: %zu vs %zu",
+                                image_pointer_size_,
+                                sizeof(void*));
+      return false;
+    }
+  }
   dex_cache_boot_image_class_lookup_required_ = true;
   std::vector<const OatFile*> oat_files =
       runtime->GetOatFileManager().RegisterImageOatFiles(spaces);
@@ -957,19 +978,10 @@
     }
   }
 
-  StackHandleScopeCollection handles(self);
-  std::vector<Handle<mirror::ObjectArray<mirror::DexCache>>> dex_caches_vector;
-  for (gc::space::ImageSpace* space : spaces) {
-    Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches(handles.NewHandle(
-        space->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches)->
-        AsObjectArray<mirror::DexCache>()));
-    dex_caches_vector.push_back(dex_caches);
-  }
-
-  Handle<mirror::ObjectArray<mirror::Class>> class_roots(handles.NewHandle(
-      spaces[0]->GetImageHeader().GetImageRoot(ImageHeader::kClassRoots)->
-      AsObjectArray<mirror::Class>()));
-  class_roots_ = GcRoot<mirror::ObjectArray<mirror::Class>>(class_roots.Get());
+  class_roots_ = GcRoot<mirror::ObjectArray<mirror::Class>>(
+      down_cast<mirror::ObjectArray<mirror::Class>*>(
+          spaces[0]->GetImageHeader().GetImageRoot(ImageHeader::kClassRoots)));
+  mirror::Class::SetClassClass(class_roots_.Read()->Get(kJavaLangClass));
 
   // Special case of setting up the String class early so that we can test arbitrary objects
   // as being Strings or not
@@ -982,116 +994,6 @@
   runtime->SetSentinel(heap->AllocNonMovableObject<true>(
       self, java_lang_Object, java_lang_Object->GetObjectSize(), VoidFunctor()));
 
-  uint32_t dex_file_count = 0;
-  for (const OatFile* oat_file : oat_files) {
-    dex_file_count += oat_file->GetOatHeader().GetDexFileCount();
-  }
-  uint32_t dex_caches_count = 0;
-  for (auto dex_caches : dex_caches_vector) {
-    dex_caches_count += dex_caches->GetLength();
-  }
-  if (dex_file_count != dex_caches_count) {
-    *error_msg = "Dex cache count and dex file count mismatch while trying to initialize from "
-                 "image";
-    return false;
-  }
-  for (auto dex_caches : dex_caches_vector) {
-    for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
-      StackHandleScope<1> hs2(self);
-      Handle<mirror::DexCache> dex_cache(hs2.NewHandle(dex_caches->Get(i)));
-      const std::string& dex_file_location(dex_cache->GetLocation()->ToModifiedUtf8());
-      const OatFile::OatDexFile* oat_dex_file = nullptr;
-      for (const OatFile* oat_file : oat_files) {
-        const OatFile::OatDexFile* oat_dex =
-            oat_file->GetOatDexFile(dex_file_location.c_str(), nullptr, false);
-        if (oat_dex != nullptr) {
-          DCHECK(oat_dex_file == nullptr);
-          oat_dex_file = oat_dex;
-        }
-      }
-
-      if (oat_dex_file == nullptr) {
-        *error_msg = StringPrintf("Failed finding oat dex file for %s",
-                                  dex_file_location.c_str());
-        return false;
-      }
-      std::string inner_error_msg;
-      std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&inner_error_msg);
-      if (dex_file == nullptr) {
-        *error_msg = StringPrintf("Failed to open dex file %s error '%s'",
-                                  dex_file_location.c_str(),
-                                  inner_error_msg.c_str());
-        return false;
-      }
-
-      if (kSanityCheckObjects) {
-        SanityCheckArtMethodPointerArray(dex_cache->GetResolvedMethods(),
-                                         dex_cache->NumResolvedMethods(),
-                                         image_pointer_size_,
-                                         spaces);
-      }
-
-      if (dex_file->GetLocationChecksum() != oat_dex_file->GetDexFileLocationChecksum()) {
-        *error_msg = StringPrintf("Checksums do not match for %s: %x vs %x",
-                                  dex_file_location.c_str(),
-                                  dex_file->GetLocationChecksum(),
-                                  oat_dex_file->GetDexFileLocationChecksum());
-        return false;
-      }
-
-      AppendToBootClassPath(*dex_file.get(), dex_cache);
-      opened_dex_files_.push_back(std::move(dex_file));
-    }
-  }
-
-  if (!ValidPointerSize(image_pointer_size_)) {
-    *error_msg = StringPrintf("Invalid image pointer size: %zu", image_pointer_size_);
-    return false;
-  }
-
-  // Set classes on AbstractMethod early so that IsMethod tests can be performed during the live
-  // bitmap walk.
-  if (!runtime->IsAotCompiler()) {
-    // Only the Aot compiler supports having an image with a different pointer size than the
-    // runtime. This happens on the host for compile 32 bit tests since we use a 64 bit libart
-    // compiler. We may also use 32 bit dex2oat on a system with 64 bit apps.
-    if (image_pointer_size_ != sizeof(void*)) {
-      *error_msg = StringPrintf("Runtime must use current image pointer size: %zu vs %zu",
-                                image_pointer_size_ ,
-                                sizeof(void*));
-      return false;
-    }
-  }
-
-  if (kSanityCheckObjects) {
-    for (auto dex_caches : dex_caches_vector) {
-      for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
-        auto* dex_cache = dex_caches->Get(i);
-        for (size_t j = 0; j < dex_cache->NumResolvedFields(); ++j) {
-          auto* field = dex_cache->GetResolvedField(j, image_pointer_size_);
-          if (field != nullptr) {
-            CHECK(field->GetDeclaringClass()->GetClass() != nullptr);
-          }
-        }
-      }
-    }
-    heap->VisitObjects(SanityCheckObjectsCallback, nullptr);
-  }
-
-  // Set entry point to interpreter if in InterpretOnly mode.
-  if (!runtime->IsAotCompiler() && runtime->GetInstrumentation()->InterpretOnly()) {
-    for (gc::space::ImageSpace* space : spaces) {
-      const ImageHeader& header = space->GetImageHeader();
-      const ImageSection& methods = header.GetMethodsSection();
-      SetInterpreterEntrypointArtMethodVisitor visitor(image_pointer_size_);
-      methods.VisitPackedArtMethods(&visitor, space->Begin(), image_pointer_size_);
-    }
-  }
-
-  // reinit class_roots_
-  mirror::Class::SetClassClass(class_roots->Get(kJavaLangClass));
-  class_roots_ = GcRoot<mirror::ObjectArray<mirror::Class>>(class_roots.Get());
-
   // reinit array_iftable_ from any array class instance, they should be ==
   array_iftable_ = GcRoot<mirror::IfTable>(GetClassRoot(kObjectArrayClass)->GetIfTable());
   DCHECK_EQ(array_iftable_.Read(), GetClassRoot(kBooleanArrayClass)->GetIfTable());
@@ -1114,30 +1016,581 @@
   mirror::Throwable::SetClass(GetClassRoot(kJavaLangThrowable));
   mirror::StackTraceElement::SetClass(GetClassRoot(kJavaLangStackTraceElement));
 
-  size_t class_tables_added = 0;
-  for (gc::space::ImageSpace* space : spaces) {
-    const ImageHeader& header = space->GetImageHeader();
-    const ImageSection& section = header.GetImageSection(ImageHeader::kSectionClassTable);
-    if (section.Size() > 0u) {
-      WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-      ClassTable* const class_table = InsertClassTableForClassLoader(nullptr);
-      class_table->ReadFromMemory(space->Begin() + section.Offset());
-      ++class_tables_added;
+  for (gc::space::ImageSpace* image_space : spaces) {
+    // Boot class loader, use a null handle.
+    std::vector<std::unique_ptr<const DexFile>> dex_files;
+    if (!AddImageSpace(image_space,
+                       ScopedNullHandle<mirror::ClassLoader>(),
+                       /*dex_elements*/nullptr,
+                       /*dex_location*/nullptr,
+                       /*out*/&dex_files,
+                       error_msg)) {
+      return false;
     }
+    // Append opened dex files at the end.
+    boot_dex_files_.insert(boot_dex_files_.end(),
+                           std::make_move_iterator(dex_files.begin()),
+                           std::make_move_iterator(dex_files.end()));
   }
-  if (class_tables_added != 0) {
-    // Either all of the image spaces have an empty class section or none do. In the case where
-    // an image space has no classes, it will still have a non-empty class section that contains
-    // metadata.
-    CHECK_EQ(spaces.size(), class_tables_added)
-        << "Expected non-empty class section for each image space.";
-    dex_cache_boot_image_class_lookup_required_ = false;
-  }
-
   FinishInit(self);
 
-  VLOG(startup) << "ClassLinker::InitFromImage exiting";
+  VLOG(startup) << __FUNCTION__ << " exiting";
+  return true;
+}
 
+static bool IsBootClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
+                              mirror::ClassLoader* class_loader)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  return class_loader == nullptr ||
+      class_loader->GetClass() ==
+          soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_BootClassLoader);
+}
+
+static mirror::String* GetDexPathListElementName(ScopedObjectAccessUnchecked& soa,
+                                                 mirror::Object* element)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtField* const dex_file_field =
+      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+  ArtField* const dex_file_name_field =
+      soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_fileName);
+  DCHECK(dex_file_field != nullptr);
+  DCHECK(dex_file_name_field != nullptr);
+  DCHECK(element != nullptr);
+  CHECK_EQ(dex_file_field->GetDeclaringClass(), element->GetClass()) << PrettyTypeOf(element);
+  mirror::Object* dex_file = dex_file_field->GetObject(element);
+  if (dex_file == nullptr) {
+    return nullptr;
+  }
+  mirror::Object* const name_object = dex_file_name_field->GetObject(dex_file);
+  if (name_object != nullptr) {
+    return name_object->AsString();
+  }
+  return nullptr;
+}
+
+static bool FlattenPathClassLoader(mirror::ClassLoader* class_loader,
+                                   std::list<mirror::String*>* out_dex_file_names,
+                                   std::string* error_msg)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  DCHECK(out_dex_file_names != nullptr);
+  DCHECK(error_msg != nullptr);
+  ScopedObjectAccessUnchecked soa(Thread::Current());
+  ArtField* const dex_path_list_field =
+      soa.DecodeField(WellKnownClasses::dalvik_system_PathClassLoader_pathList);
+  ArtField* const dex_elements_field =
+      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements);
+  CHECK(dex_path_list_field != nullptr);
+  CHECK(dex_elements_field != nullptr);
+  while (!IsBootClassLoader(soa, class_loader)) {
+    if (class_loader->GetClass() !=
+        soa.Decode<mirror::Class*>(WellKnownClasses::dalvik_system_PathClassLoader)) {
+      *error_msg = StringPrintf("Unknown class loader type %s", PrettyTypeOf(class_loader).c_str());
+      // Unsupported class loader.
+      return false;
+    }
+    mirror::Object* dex_path_list = dex_path_list_field->GetObject(class_loader);
+    if (dex_path_list != nullptr) {
+      // DexPathList has an array dexElements of Elements[] which each contain a dex file.
+      mirror::Object* dex_elements_obj = dex_elements_field->GetObject(dex_path_list);
+      // Loop through each dalvik.system.DexPathList$Element's dalvik.system.DexFile and look
+      // at the mCookie which is a DexFile vector.
+      if (dex_elements_obj != nullptr) {
+        mirror::ObjectArray<mirror::Object>* dex_elements =
+            dex_elements_obj->AsObjectArray<mirror::Object>();
+        // Reverse order since we insert the parent at the front.
+        for (int32_t i = dex_elements->GetLength() - 1; i >= 0; --i) {
+          mirror::Object* const element = dex_elements->GetWithoutChecks(i);
+          if (element == nullptr) {
+            *error_msg = StringPrintf("Null dex element at index %d", i);
+            return false;
+          }
+          mirror::String* const name = GetDexPathListElementName(soa, element);
+          if (name == nullptr) {
+            *error_msg = StringPrintf("Null name for dex element at index %d", i);
+            return false;
+          }
+          out_dex_file_names->push_front(name);
+        }
+      }
+    }
+    class_loader = class_loader->GetParent();
+  }
+  return true;
+}
+
+class FixupArtMethodArrayVisitor : public ArtMethodVisitor {
+ public:
+  explicit FixupArtMethodArrayVisitor(const ImageHeader& header) : header_(header) {}
+
+  virtual void Visit(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) {
+    GcRoot<mirror::Class>* resolved_types = method->GetDexCacheResolvedTypes(sizeof(void*));
+    const bool is_miranda = method->IsMiranda();
+    if (resolved_types != nullptr) {
+      bool in_image_space = false;
+      if (kIsDebugBuild || is_miranda) {
+        in_image_space = header_.GetImageSection(ImageHeader::kSectionDexCacheArrays).Contains(
+            reinterpret_cast<const uint8_t*>(resolved_types) - header_.GetImageBegin());
+      }
+      // Must be in image space for non-miranda method.
+      DCHECK(is_miranda || in_image_space)
+          << resolved_types << " is not in image starting at "
+          << reinterpret_cast<void*>(header_.GetImageBegin());
+      if (!is_miranda || in_image_space) {
+        // Go through the array so that we don't need to do a slow map lookup.
+        method->SetDexCacheResolvedTypes(*reinterpret_cast<GcRoot<mirror::Class>**>(resolved_types),
+                                         sizeof(void*));
+      }
+    }
+    ArtMethod** resolved_methods = method->GetDexCacheResolvedMethods(sizeof(void*));
+    if (resolved_methods != nullptr) {
+      bool in_image_space = false;
+      if (kIsDebugBuild || is_miranda) {
+        in_image_space = header_.GetImageSection(ImageHeader::kSectionDexCacheArrays).Contains(
+              reinterpret_cast<const uint8_t*>(resolved_methods) - header_.GetImageBegin());
+      }
+      // Must be in image space for non-miranda method.
+      DCHECK(is_miranda || in_image_space)
+          << resolved_methods << " is not in image starting at "
+          << reinterpret_cast<void*>(header_.GetImageBegin());
+      if (!is_miranda || in_image_space) {
+        // Go through the array so that we don't need to do a slow map lookup.
+        method->SetDexCacheResolvedMethods(*reinterpret_cast<ArtMethod***>(resolved_methods),
+                                           sizeof(void*));
+      }
+    }
+  }
+
+ private:
+  const ImageHeader& header_;
+};
+
+class VerifyClassInTableArtMethodVisitor : public ArtMethodVisitor {
+ public:
+  explicit VerifyClassInTableArtMethodVisitor(ClassTable* table) : table_(table) {}
+
+  virtual void Visit(ArtMethod* method)
+      SHARED_REQUIRES(Locks::mutator_lock_, Locks::classlinker_classes_lock_) {
+    mirror::Class* klass = method->GetDeclaringClass();
+    if (klass != nullptr && !Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass)) {
+      CHECK_EQ(table_->LookupByDescriptor(klass), klass) << PrettyClass(klass);
+    }
+  }
+
+ private:
+  ClassTable* const table_;
+};
+
+void ClassLinker::UpdateAppImageClassLoadersAndDexCaches(
+    gc::space::ImageSpace* space,
+    Handle<mirror::ClassLoader> class_loader,
+    Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches,
+    bool added_class_table) {
+  Thread* const self = Thread::Current();
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
+  const ImageHeader& header = space->GetImageHeader();
+  // Add image classes into the class table for the class loader, and fixup the dex caches and
+  // class loader fields.
+  WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+  ClassTable* table = InsertClassTableForClassLoader(class_loader.Get());
+  // TODO: Store class table in the image to avoid manually adding the classes.
+  for (int32_t i = 0, num_dex_caches = dex_caches->GetLength(); i < num_dex_caches; i++) {
+    mirror::DexCache* const dex_cache = dex_caches->Get(i);
+    const DexFile* const dex_file = dex_cache->GetDexFile();
+    // If the oat file expects the dex cache arrays to be in the BSS, then allocate there and
+    // copy over the arrays.
+    DCHECK(dex_file != nullptr);
+    const size_t num_strings = dex_file->NumStringIds();
+    const size_t num_types = dex_file->NumTypeIds();
+    const size_t num_methods = dex_file->NumMethodIds();
+    const size_t num_fields = dex_file->NumFieldIds();
+    CHECK_EQ(num_strings, dex_cache->NumStrings());
+    CHECK_EQ(num_types, dex_cache->NumResolvedTypes());
+    CHECK_EQ(num_methods, dex_cache->NumResolvedMethods());
+    CHECK_EQ(num_fields, dex_cache->NumResolvedFields());
+    if (dex_file->GetOatDexFile() != nullptr &&
+        dex_file->GetOatDexFile()->GetDexCacheArrays() != nullptr) {
+      DexCacheArraysLayout layout(image_pointer_size_, dex_file);
+      uint8_t* const raw_arrays = dex_file->GetOatDexFile()->GetDexCacheArrays();
+      // The space is not yet visible to the GC, we can avoid the read barriers and use
+      // std::copy_n.
+      if (num_strings != 0u) {
+        GcRoot<mirror::String>* const strings =
+            reinterpret_cast<GcRoot<mirror::String>*>(raw_arrays + layout.StringsOffset());
+        for (size_t j = 0; kIsDebugBuild && j < num_strings; ++j) {
+          DCHECK(strings[j].IsNull());
+        }
+        std::copy_n(dex_cache->GetStrings(), num_strings, strings);
+        dex_cache->SetStrings(strings);
+      }
+
+      if (num_types != 0u) {
+        GcRoot<mirror::Class>* const image_resolved_types = dex_cache->GetResolvedTypes();
+        GcRoot<mirror::Class>* const types =
+            reinterpret_cast<GcRoot<mirror::Class>*>(raw_arrays + layout.TypesOffset());
+        for (size_t j = 0; kIsDebugBuild && j < num_types; ++j) {
+          DCHECK(types[j].IsNull());
+        }
+        std::copy_n(image_resolved_types, num_types, types);
+        // Store a pointer to the new location for fast ArtMethod patching without requiring map.
+        // This leaves random garbage at the start of the dex cache array, but nobody should ever
+        // read from it again.
+        *reinterpret_cast<GcRoot<mirror::Class>**>(image_resolved_types) = types;
+        dex_cache->SetResolvedTypes(types);
+      }
+      if (num_methods != 0u) {
+        ArtMethod** const methods = reinterpret_cast<ArtMethod**>(
+            raw_arrays + layout.MethodsOffset());
+        ArtMethod** const image_resolved_methods = dex_cache->GetResolvedMethods();
+        for (size_t j = 0; kIsDebugBuild && j < num_methods; ++j) {
+          DCHECK(methods[j] == nullptr);
+        }
+        std::copy_n(image_resolved_methods, num_methods, methods);
+        // Store a pointer to the new location for fast ArtMethod patching without requiring map.
+        *reinterpret_cast<ArtMethod***>(image_resolved_methods) = methods;
+        dex_cache->SetResolvedMethods(methods);
+      }
+      if (num_fields != 0u) {
+        ArtField** const fields = reinterpret_cast<ArtField**>(raw_arrays + layout.FieldsOffset());
+        for (size_t j = 0; kIsDebugBuild && j < num_fields; ++j) {
+          DCHECK(fields[j] == nullptr);
+        }
+        std::copy_n(dex_cache->GetResolvedFields(), num_fields, fields);
+        dex_cache->SetResolvedFields(fields);
+      }
+    }
+    {
+      WriterMutexLock mu2(self, dex_lock_);
+      // Make sure to do this after we update the arrays since we store the resolved types array
+      // in DexCacheData in RegisterDexFileLocked. We need the array pointer to be the one in the
+      // BSS.
+      mirror::DexCache* existing_dex_cache = FindDexCacheLocked(self,
+                                                                *dex_file,
+                                                                /*allow_failure*/true);
+      CHECK(existing_dex_cache == nullptr);
+      StackHandleScope<1> hs3(self);
+      RegisterDexFileLocked(*dex_file, hs3.NewHandle(dex_cache));
+    }
+    GcRoot<mirror::Class>* const types = dex_cache->GetResolvedTypes();
+    if (!added_class_table) {
+      for (int32_t j = 0; j < static_cast<int32_t>(num_types); j++) {
+        // The image space is not yet added to the heap, avoid read barriers.
+        mirror::Class* klass = types[j].Read<kWithoutReadBarrier>();
+        if (klass != nullptr) {
+          DCHECK_NE(klass->GetStatus(), mirror::Class::kStatusError);
+          // Update the class loader from the one in the image class loader to the one that loaded
+          // the app image.
+          klass->SetClassLoader(class_loader.Get());
+          // If there are multiple dex caches, there may be the same class multiple times
+          // in different dex caches. Check for this since inserting will add duplicates
+          // otherwise.
+          if (num_dex_caches > 1) {
+            mirror::Class* existing = table->LookupByDescriptor(klass);
+            if (existing != nullptr) {
+              DCHECK_EQ(existing, klass) << PrettyClass(klass);
+            } else {
+              table->Insert(klass);
+            }
+          } else {
+            table->Insert(klass);
+          }
+          // Double checked VLOG to avoid overhead.
+          if (VLOG_IS_ON(image)) {
+            VLOG(image) << PrettyClass(klass) << " " << klass->GetStatus();
+            if (!klass->IsArrayClass()) {
+              VLOG(image) << "From " << klass->GetDexCache()->GetDexFile()->GetBaseLocation();
+            }
+            VLOG(image) << "Direct methods";
+            for (ArtMethod& m : klass->GetDirectMethods(sizeof(void*))) {
+              VLOG(image) << PrettyMethod(&m);
+            }
+            VLOG(image) << "Virtual methods";
+            for (ArtMethod& m : klass->GetVirtualMethods(sizeof(void*))) {
+              VLOG(image) << PrettyMethod(&m);
+            }
+          }
+        }
+      }
+    }
+    if (kIsDebugBuild) {
+      for (int32_t j = 0; j < static_cast<int32_t>(num_types); j++) {
+        // The image space is not yet added to the heap, avoid read barriers.
+        mirror::Class* klass = types[j].Read<kWithoutReadBarrier>();
+        if (klass != nullptr) {
+          DCHECK_NE(klass->GetStatus(), mirror::Class::kStatusError);
+          if (kIsDebugBuild) {
+            DCHECK_EQ(table->LookupByDescriptor(klass), klass);
+            mirror::Class* super_class = klass->GetSuperClass();
+            if (super_class != nullptr && !heap->ObjectIsInBootImageSpace(super_class)) {
+              CHECK_EQ(table->LookupByDescriptor(super_class), super_class);
+            }
+          }
+          DCHECK_EQ(klass->GetClassLoader(), class_loader.Get());
+          if (kIsDebugBuild) {
+            for (ArtMethod& m : klass->GetDirectMethods(sizeof(void*))) {
+              const void* code = m.GetEntryPointFromQuickCompiledCode();
+              const void* oat_code = m.IsInvokable() ? GetQuickOatCodeFor(&m) : code;
+              if (!IsQuickResolutionStub(code) &&
+                  !IsQuickGenericJniStub(code) &&
+                  !IsQuickToInterpreterBridge(code) &&
+                  !m.IsNative()) {
+                DCHECK_EQ(code, oat_code) << PrettyMethod(&m);
+              }
+            }
+            VLOG(image) << "Virtual methods";
+            for (ArtMethod& m : klass->GetVirtualMethods(sizeof(void*))) {
+              const void* code = m.GetEntryPointFromQuickCompiledCode();
+              const void* oat_code = m.IsInvokable() ? GetQuickOatCodeFor(&m) : code;
+              if (!IsQuickResolutionStub(code) &&
+                  !IsQuickGenericJniStub(code) &&
+                  !IsQuickToInterpreterBridge(code) &&
+                  !m.IsNative()) {
+                DCHECK_EQ(code, oat_code) << PrettyMethod(&m);
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  {
+    FixupArtMethodArrayVisitor visitor(header);
+    header.GetImageSection(ImageHeader::kSectionArtMethods).VisitPackedArtMethods(
+        &visitor, space->Begin(), sizeof(void*));
+    Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader.Get());
+  }
+  if (kIsDebugBuild) {
+    ClassTable* const class_table = class_loader.Get()->GetClassTable();
+    VerifyClassInTableArtMethodVisitor visitor2(class_table);
+    header.GetImageSection(ImageHeader::kSectionArtMethods).VisitPackedArtMethods(
+        &visitor2, space->Begin(), sizeof(void*));
+  }
+}
+
+bool ClassLinker::AddImageSpace(
+    gc::space::ImageSpace* space,
+    Handle<mirror::ClassLoader> class_loader,
+    jobjectArray dex_elements,
+    const char* dex_location,
+    std::vector<std::unique_ptr<const DexFile>>* out_dex_files,
+    std::string* error_msg) {
+  DCHECK(out_dex_files != nullptr);
+  DCHECK(error_msg != nullptr);
+  const uint64_t start_time = NanoTime();
+  const bool app_image = class_loader.Get() != nullptr;
+  const ImageHeader& header = space->GetImageHeader();
+  mirror::Object* dex_caches_object = header.GetImageRoot(ImageHeader::kDexCaches);
+  DCHECK(dex_caches_object != nullptr);
+  Runtime* const runtime = Runtime::Current();
+  gc::Heap* const heap = runtime->GetHeap();
+  Thread* const self = Thread::Current();
+  StackHandleScope<2> hs(self);
+  Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches(
+      hs.NewHandle(dex_caches_object->AsObjectArray<mirror::DexCache>()));
+  Handle<mirror::ObjectArray<mirror::Class>> class_roots(hs.NewHandle(
+      header.GetImageRoot(ImageHeader::kClassRoots)->AsObjectArray<mirror::Class>()));
+  const OatFile* oat_file = space->GetOatFile();
+  std::unordered_set<mirror::ClassLoader*> image_class_loaders;
+  // Check that the image is what we are expecting.
+  if (image_pointer_size_ != space->GetImageHeader().GetPointerSize()) {
+    *error_msg = StringPrintf("Application image pointer size does not match runtime: %zu vs %zu",
+                              static_cast<size_t>(space->GetImageHeader().GetPointerSize()),
+                              image_pointer_size_);
+    return false;
+  }
+  DCHECK(class_roots.Get() != nullptr);
+  if (class_roots->GetLength() != static_cast<int32_t>(kClassRootsMax)) {
+    *error_msg = StringPrintf("Expected %d class roots but got %d",
+                              class_roots->GetLength(),
+                              static_cast<int32_t>(kClassRootsMax));
+    return false;
+  }
+  // Check against existing class roots to make sure they match the ones in the boot image.
+  for (size_t i = 0; i < kClassRootsMax; i++) {
+    if (class_roots->Get(i) != GetClassRoot(static_cast<ClassRoot>(i))) {
+      *error_msg = "App image class roots must have pointer equality with runtime ones.";
+      return false;
+    }
+  }
+  if (oat_file->GetOatHeader().GetDexFileCount() !=
+      static_cast<uint32_t>(dex_caches->GetLength())) {
+    *error_msg = "Dex cache count and dex file count mismatch while trying to initialize from "
+                 "image";
+    return false;
+  }
+
+  StackHandleScope<1> hs2(self);
+  MutableHandle<mirror::DexCache> h_dex_cache(hs2.NewHandle<mirror::DexCache>(nullptr));
+  for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
+    h_dex_cache.Assign(dex_caches->Get(i));
+    std::string dex_file_location(h_dex_cache->GetLocation()->ToModifiedUtf8());
+    // TODO: Only store qualified paths.
+    // If non qualified, qualify it.
+    if (dex_file_location.find('/') == std::string::npos) {
+      std::string dex_location_path = dex_location;
+      const size_t pos = dex_location_path.find_last_of('/');
+      CHECK_NE(pos, std::string::npos);
+      dex_location_path = dex_location_path.substr(0, pos + 1);  // Keep trailing '/'
+      dex_file_location = dex_location_path + dex_file_location;
+    }
+    const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_file_location.c_str(),
+                                                                      nullptr);
+    if (oat_dex_file == nullptr) {
+      *error_msg = StringPrintf("Failed finding oat dex file for %s %s",
+                                oat_file->GetLocation().c_str(),
+                                dex_file_location.c_str());
+      return false;
+    }
+    std::string inner_error_msg;
+    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&inner_error_msg);
+    if (dex_file == nullptr) {
+      *error_msg = StringPrintf("Failed to open dex file %s from within oat file %s error '%s'",
+                                dex_file_location.c_str(),
+                                oat_file->GetLocation().c_str(),
+                                inner_error_msg.c_str());
+      return false;
+    }
+
+    if (dex_file->GetLocationChecksum() != oat_dex_file->GetDexFileLocationChecksum()) {
+      *error_msg = StringPrintf("Checksums do not match for %s: %x vs %x",
+                                dex_file_location.c_str(),
+                                dex_file->GetLocationChecksum(),
+                                oat_dex_file->GetDexFileLocationChecksum());
+      return false;
+    }
+
+    if (app_image) {
+      // The current dex file field is bogus, overwrite it so that we can get the dex file in the
+      // loop below.
+      h_dex_cache->SetDexFile(dex_file.get());
+      // Check that each class loader resolved the same way.
+      // TODO: Store image class loaders as image roots.
+      GcRoot<mirror::Class>* const types = h_dex_cache->GetResolvedTypes();
+      for (int32_t j = 0, num_types = h_dex_cache->NumResolvedTypes(); j < num_types; j++) {
+        mirror::Class* klass = types[j].Read();
+        if (klass != nullptr) {
+          DCHECK_NE(klass->GetStatus(), mirror::Class::kStatusError);
+          mirror::ClassLoader* image_class_loader = klass->GetClassLoader();
+          image_class_loaders.insert(image_class_loader);
+        }
+      }
+    } else {
+      if (kSanityCheckObjects) {
+        SanityCheckArtMethodPointerArray(h_dex_cache->GetResolvedMethods(),
+                                         h_dex_cache->NumResolvedMethods(),
+                                         image_pointer_size_,
+                                         heap->GetBootImageSpaces());
+      }
+      // Register dex files, keep track of existing ones that are conflicts.
+      AppendToBootClassPath(*dex_file.get(), h_dex_cache);
+    }
+    out_dex_files->push_back(std::move(dex_file));
+  }
+
+  if (app_image) {
+    ScopedObjectAccessUnchecked soa(Thread::Current());
+    // Check that the class loader resolves the same way as the ones in the image.
+    // Image class loader [A][B][C][image dex files]
+    // Class loader = [???][dex_elements][image dex files]
+    // Need to ensure that [???][dex_elements] == [A][B][C].
+    // For each class loader, PathClassLoader, the laoder checks the parent first. Also the logic
+    // for PathClassLoader does this by looping through the array of dex files. To ensure they
+    // resolve the same way, simply flatten the hierarchy in the way the resolution order would be,
+    // and check that the dex file names are the same.
+    for (mirror::ClassLoader* image_class_loader : image_class_loaders) {
+      std::list<mirror::String*> image_dex_file_names;
+      std::string temp_error_msg;
+      if (!FlattenPathClassLoader(image_class_loader, &image_dex_file_names, &temp_error_msg)) {
+        *error_msg = StringPrintf("Failed to flatten image class loader hierarchy '%s'",
+                                  temp_error_msg.c_str());
+        return false;
+      }
+      std::list<mirror::String*> loader_dex_file_names;
+      if (!FlattenPathClassLoader(class_loader.Get(), &loader_dex_file_names, &temp_error_msg)) {
+        *error_msg = StringPrintf("Failed to flatten class loader hierarchy '%s'",
+                                  temp_error_msg.c_str());
+        return false;
+      }
+      // Add the temporary dex path list elements at the end.
+      auto* elements = soa.Decode<mirror::ObjectArray<mirror::Object>*>(dex_elements);
+      for (size_t i = 0, num_elems = elements->GetLength(); i < num_elems; ++i) {
+        mirror::Object* element = elements->GetWithoutChecks(i);
+        if (element != nullptr) {
+          // If we are somewhere in the middle of the array, there may be nulls at the end.
+          loader_dex_file_names.push_back(GetDexPathListElementName(soa, element));
+        }
+      }
+      // Ignore the number of image dex files since we are adding those to the class loader anyways.
+      CHECK_GE(static_cast<size_t>(image_dex_file_names.size()),
+               static_cast<size_t>(dex_caches->GetLength()));
+      size_t image_count = image_dex_file_names.size() - dex_caches->GetLength();
+      // Check that the dex file names match.
+      bool equal = image_count == loader_dex_file_names.size();
+      if (equal) {
+        auto it1 = image_dex_file_names.begin();
+        auto it2 = loader_dex_file_names.begin();
+        for (size_t i = 0; equal && i < image_count; ++i, ++it1, ++it2) {
+          equal = equal && (*it1)->Equals(*it2);
+        }
+      }
+      if (!equal) {
+        VLOG(image) << "Image dex files " << image_dex_file_names.size();
+        for (mirror::String* name : image_dex_file_names) {
+          VLOG(image) << name->ToModifiedUtf8();
+        }
+        VLOG(image) << "Loader dex files " << loader_dex_file_names.size();
+        for (mirror::String* name : loader_dex_file_names) {
+          VLOG(image) << name->ToModifiedUtf8();
+        }
+        *error_msg = "Rejecting application image due to class loader mismatch";
+        return false;
+      }
+    }
+  }
+
+  if (kSanityCheckObjects) {
+    for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
+      auto* dex_cache = dex_caches->Get(i);
+      for (size_t j = 0; j < dex_cache->NumResolvedFields(); ++j) {
+        auto* field = dex_cache->GetResolvedField(j, image_pointer_size_);
+        if (field != nullptr) {
+          CHECK(field->GetDeclaringClass()->GetClass() != nullptr);
+        }
+      }
+    }
+    if (!app_image) {
+      heap->VisitObjects(SanityCheckObjectsCallback, nullptr);
+    }
+  }
+
+  // Set entry point to interpreter if in InterpretOnly mode.
+  if (!runtime->IsAotCompiler() && runtime->GetInstrumentation()->InterpretOnly()) {
+    const ImageSection& methods = header.GetMethodsSection();
+    SetInterpreterEntrypointArtMethodVisitor visitor(image_pointer_size_);
+    methods.VisitPackedArtMethods(&visitor, space->Begin(), image_pointer_size_);
+  }
+
+  const ImageSection& class_table_section = header.GetImageSection(ImageHeader::kSectionClassTable);
+  bool added_class_table = false;
+  if (app_image) {
+    GetOrCreateAllocatorForClassLoader(class_loader.Get());  // Make sure we have a linear alloc.
+  }
+  if (class_table_section.Size() > 0u) {
+    const uint64_t start_time2 = NanoTime();
+    WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+    ClassTable* const class_table = InsertClassTableForClassLoader(class_loader.Get());
+    class_table->ReadFromMemory(space->Begin() + class_table_section.Offset());
+    if (app_image) {
+      class_table->SetClassLoader(class_loader.Get());
+    } else {
+      dex_cache_boot_image_class_lookup_required_ = false;
+    }
+    VLOG(image) << "Adding class table classes took " << PrettyDuration(NanoTime() - start_time2);
+    added_class_table = true;
+  }
+  if (app_image) {
+    UpdateAppImageClassLoadersAndDexCaches(space, class_loader, dex_caches, added_class_table);
+  }
+  VLOG(class_linker) << "Adding image space took " << PrettyDuration(NanoTime() - start_time);
   return true;
 }
 
@@ -1527,14 +1980,6 @@
   return ClassPathEntry(nullptr, nullptr);
 }
 
-static bool IsBootClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
-                              mirror::ClassLoader* class_loader)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  return class_loader == nullptr ||
-      class_loader->GetClass() ==
-          soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_BootClassLoader);
-}
-
 bool ClassLinker::FindClassInPathClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
                                              Thread* self,
                                              const char* descriptor,
@@ -1820,6 +2265,7 @@
   // inserted before we allocate / fill in these fields.
   LoadClass(self, dex_file, dex_class_def, klass);
   if (self->IsExceptionPending()) {
+    VLOG(class_linker) << self->GetException()->Dump();
     // An exception occured during load, set status to erroneous while holding klass' lock in case
     // notification is necessary.
     if (!klass->IsErroneous()) {
@@ -2487,8 +2933,21 @@
   Thread* const self = Thread::Current();
   dex_lock_.AssertExclusiveHeld(self);
   CHECK(dex_cache.Get() != nullptr) << dex_file.GetLocation();
-  CHECK(dex_cache->GetLocation()->Equals(dex_file.GetLocation()))
-      << dex_cache->GetLocation()->ToModifiedUtf8() << " " << dex_file.GetLocation();
+  // For app images, the dex cache location may be a suffix of the dex file location since the
+  // dex file location is an absolute path.
+  const std::string dex_cache_location = dex_cache->GetLocation()->ToModifiedUtf8();
+  const size_t dex_cache_length = dex_cache_location.length();
+  CHECK_GT(dex_cache_length, 0u) << dex_file.GetLocation();
+  std::string dex_file_location = dex_file.GetLocation();
+  CHECK_GE(dex_file_location.length(), dex_cache_length)
+      << dex_cache_location << " " << dex_file.GetLocation();
+  // Take suffix.
+  const std::string dex_file_suffix = dex_file_location.substr(
+      dex_file_location.length() - dex_cache_length,
+      dex_cache_length);
+  // Example dex_cache location is SettingsProvider.apk and
+  // dex file location is /system/priv-app/SettingsProvider/SettingsProvider.apk
+  CHECK_EQ(dex_cache_location, dex_file_suffix);
   // Clean up pass to remove null dex caches.
   // Null dex caches can occur due to class unloading and we are lazily removing null entries.
   JavaVMExt* const vm = self->GetJniEnv()->vm;
@@ -5161,10 +5620,10 @@
                                         k,
                                         iface,
                                         image_pointer_size_)) {
-          LOG(WARNING) << "Conflicting default method implementations found: "
-                       << PrettyMethod(current_method) << " and "
-                       << PrettyMethod(*out_default_method) << " in class "
-                       << PrettyClass(klass.Get()) << " conflict.";
+          VLOG(class_linker) << "Conflicting default method implementations found: "
+                             << PrettyMethod(current_method) << " and "
+                             << PrettyMethod(*out_default_method) << " in class "
+                             << PrettyClass(klass.Get()) << " conflict.";
           *out_default_method = nullptr;
           return DefaultMethodSearchResult::kDefaultConflict;
         } else {
@@ -5523,15 +5982,6 @@
   Runtime* const runtime = Runtime::Current();
 
   const bool is_interface = klass->IsInterface();
-  // TODO It might in the future prove useful to make interfaces have full iftables, allowing a
-  // faster invoke-super implementation in the interpreter/across dex-files.
-  // We will just skip doing any of this on non-debug builds for speed.
-  if (is_interface &&
-      !kIsDebugBuild &&
-      !runtime->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods)) {
-    return true;
-  }
-
   const bool has_superclass = klass->HasSuperClass();
   const bool fill_tables = !is_interface;
   const size_t super_ifcount = has_superclass ? klass->GetSuperClass()->GetIfTableCount() : 0U;
@@ -6931,10 +7381,13 @@
   ArtField* cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
   DCHECK_EQ(cookie_field->GetDeclaringClass(), element_file_field->GetType<false>());
 
+  ArtField* file_name_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_fileName);
+  DCHECK_EQ(file_name_field->GetDeclaringClass(), element_file_field->GetType<false>());
+
   // Fill the elements array.
   int32_t index = 0;
   for (const DexFile* dex_file : dex_files) {
-    StackHandleScope<3> hs2(self);
+    StackHandleScope<4> hs2(self);
 
     // CreatePathClassLoader is only used by gtests. Index 0 of h_long_array is supposed to be the
     // oat file but we can leave it null.
@@ -6949,6 +7402,11 @@
     DCHECK(h_dex_file.Get() != nullptr);
     cookie_field->SetObject<false>(h_dex_file.Get(), h_long_array.Get());
 
+    Handle<mirror::String> h_file_name = hs2.NewHandle(
+        mirror::String::AllocFromModifiedUtf8(self, dex_file->GetLocation().c_str()));
+    DCHECK(h_file_name.Get() != nullptr);
+    file_name_field->SetObject<false>(h_dex_file.Get(), h_file_name.Get());
+
     Handle<mirror::Object> h_element = hs2.NewHandle(h_dex_element_class->AllocObject(self));
     DCHECK(h_element.Get() != nullptr);
     element_file_field->SetObject<false>(h_element.Get(), h_dex_file.Get());
@@ -7048,6 +7506,7 @@
     if (class_loader != nullptr) {
       ++it;
     } else {
+      VLOG(class_linker) << "Freeing class loader";
       DeleteClassLoader(self, data);
       it = class_loaders_.erase(it);
     }
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index f1fd0c3..4975c29 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -120,11 +120,25 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
-  // Initialize class linker from one or more images.
-  bool InitFromImage(std::string* error_msg)
+  // Initialize class linker from one or more boot images.
+  bool InitFromBootImage(std::string* error_msg)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
+  // Add an image space to the class linker, may fix up classloader fields and dex cache fields.
+  // The dex files that were newly opened for the space are placed in the out argument
+  // out_dex_files. Returns true if the operation succeeded.
+  // The space must be already added to the heap before calling AddImageSpace since we need to
+  // properly handle read barriers and object marking.
+  bool AddImageSpace(gc::space::ImageSpace* space,
+                     Handle<mirror::ClassLoader> class_loader,
+                     jobjectArray dex_elements,
+                     const char* dex_location,
+                     std::vector<std::unique_ptr<const DexFile>>* out_dex_files,
+                     std::string* error_msg)
+      REQUIRES(!dex_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Finds a class by its descriptor, loading it if necessary.
   // If class_loader is null, searches boot_class_path_.
   mirror::Class* FindClass(Thread* self,
@@ -985,8 +999,16 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Locks::classlinker_classes_lock_);
 
+  void UpdateAppImageClassLoadersAndDexCaches(
+      gc::space::ImageSpace* space,
+      Handle<mirror::ClassLoader> class_loader,
+      Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches,
+      bool added_class_table)
+      REQUIRES(!dex_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   std::vector<const DexFile*> boot_class_path_;
-  std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
+  std::vector<std::unique_ptr<const DexFile>> boot_dex_files_;
 
   mutable ReaderWriterMutex dex_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   // JNI weak globals and side data to allow dex caches to get unloaded. We lazily delete weak
@@ -1041,8 +1063,8 @@
   friend class ImageWriter;  // for GetClassRoots
   friend class JniCompilerTest;  // for GetRuntimeQuickGenericJniStub
   friend class JniInternalTest;  // for GetRuntimeQuickGenericJniStub
+  ART_FRIEND_TEST(ClassLinkerTest, RegisterDexFileName);  // for DexLock, and RegisterDexFileLocked
   ART_FRIEND_TEST(mirror::DexCacheTest, Open);  // for AllocDexCache
-
   DISALLOW_COPY_AND_ASSIGN(ClassLinker);
 };
 
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 471d7ca..40dfda9 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -1201,4 +1201,42 @@
   EXPECT_FALSE(statics.Get()->IsBootStrapClassLoaded());
 }
 
+// Regression test for b/26799552.
+TEST_F(ClassLinkerTest, RegisterDexFileName) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<2> hs(soa.Self());
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  MutableHandle<mirror::DexCache> dex_cache(hs.NewHandle<mirror::DexCache>(nullptr));
+  {
+    ReaderMutexLock mu(soa.Self(), *class_linker->DexLock());
+    for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
+      dex_cache.Assign(down_cast<mirror::DexCache*>(soa.Self()->DecodeJObject(data.weak_root)));
+      if (dex_cache.Get() != nullptr) {
+        break;
+      }
+    }
+    ASSERT_TRUE(dex_cache.Get() != nullptr);
+  }
+  // Make a copy of the dex cache and change the name.
+  dex_cache.Assign(dex_cache->Clone(soa.Self())->AsDexCache());
+  const uint16_t data[] = { 0x20AC, 0x20A1 };
+  Handle<mirror::String> location(hs.NewHandle(mirror::String::AllocFromUtf16(soa.Self(),
+                                                                              arraysize(data),
+                                                                              data)));
+  dex_cache->SetLocation(location.Get());
+  const DexFile* old_dex_file = dex_cache->GetDexFile();
+
+  DexFile* dex_file = new DexFile(old_dex_file->Begin(),
+                                  old_dex_file->Size(),
+                                  location->ToModifiedUtf8(),
+                                  0u,
+                                  nullptr,
+                                  nullptr);
+  {
+    WriterMutexLock mu(soa.Self(), *class_linker->DexLock());
+    // Check that inserting with a UTF16 name works.
+    class_linker->RegisterDexFileLocked(*dex_file, dex_cache);
+  }
+}
+
 }  // namespace art
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index df2dbf4..2a4f0e01 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -40,6 +40,16 @@
   return false;
 }
 
+mirror::Class* ClassTable::LookupByDescriptor(mirror::Class* klass) {
+  for (ClassSet& class_set : classes_) {
+    auto it = class_set.Find(GcRoot<mirror::Class>(klass));
+    if (it != class_set.end()) {
+      return it->Read();
+    }
+  }
+  return nullptr;
+}
+
 mirror::Class* ClassTable::UpdateClass(const char* descriptor, mirror::Class* klass, size_t hash) {
   // Should only be updating latest table.
   auto existing_it = classes_.back().FindWithHash(descriptor, hash);
@@ -173,4 +183,12 @@
   return read_count;
 }
 
+void ClassTable::SetClassLoader(mirror::ClassLoader* class_loader) {
+  for (const ClassSet& class_set : classes_) {
+    for (const GcRoot<mirror::Class>& root : class_set) {
+      root.Read()->SetClassLoader(class_loader);
+    }
+  }
+}
+
 }  // namespace art
diff --git a/runtime/class_table.h b/runtime/class_table.h
index 911f3c2..0b42035 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -84,9 +84,14 @@
   bool Visit(ClassVisitor* visitor)
       SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
 
+  // Return the first class that matches the descriptor. Returns null if there are none.
   mirror::Class* Lookup(const char* descriptor, size_t hash)
       SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
 
+  // Return the first class that matches the descriptor of klass. Returns null if there are none.
+  mirror::Class* LookupByDescriptor(mirror::Class* klass)
+      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
+
   void Insert(mirror::Class* klass)
       REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -107,10 +112,17 @@
   // Combines all of the tables into one class set.
   size_t WriteToMemory(uint8_t* ptr) const
       SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
+
+  // Read a table from ptr and put it at the front of the class set.
   size_t ReadFromMemory(uint8_t* ptr)
       REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Change the class loader of all the contained classes.
+  void SetClassLoader(mirror::ClassLoader* class_loader)
+    REQUIRES(Locks::classlinker_classes_lock_)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   class ClassDescriptorHashEquals {
    public:
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 6e11cf8..a0f875d 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -230,11 +230,11 @@
     Dbg::PostException(exception_object);
   }
 
-  // We only care about how many backward branches were executed in the Jit.
-  void BackwardBranch(Thread* /*thread*/, ArtMethod* method, int32_t dex_pc_offset)
+  // We only care about branches in the Jit.
+  void Branch(Thread* /*thread*/, ArtMethod* method, uint32_t dex_pc, int32_t dex_pc_offset)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
-    LOG(ERROR) << "Unexpected backward branch event in debugger " << PrettyMethod(method)
-               << " " << dex_pc_offset;
+    LOG(ERROR) << "Unexpected branch event in debugger " << PrettyMethod(method)
+               << " " << dex_pc << ", " << dex_pc_offset;
   }
 
   // We only care about invokes in the Jit.
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index bc8ba97..81a3e4b 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -195,6 +195,30 @@
   }
 }
 
+std::unique_ptr<const DexFile> DexFile::Open(const uint8_t* base, size_t size,
+                                             const std::string& location,
+                                             uint32_t location_checksum,
+                                             const OatDexFile* oat_dex_file,
+                                             bool verify,
+                                             std::string* error_msg) {
+  std::unique_ptr<const DexFile> dex_file = OpenMemory(base,
+                                                       size,
+                                                       location,
+                                                       location_checksum,
+                                                       nullptr,
+                                                       oat_dex_file,
+                                                       error_msg);
+  if (verify && !DexFileVerifier::Verify(dex_file.get(),
+                                         dex_file->Begin(),
+                                         dex_file->Size(),
+                                         location.c_str(),
+                                         error_msg)) {
+    return nullptr;
+  }
+
+  return dex_file;
+}
+
 std::unique_ptr<const DexFile> DexFile::OpenFile(int fd, const char* location, bool verify,
                                                  std::string* error_msg) {
   CHECK(location != nullptr);
@@ -687,8 +711,8 @@
   return nullptr;
 }
 
-void DexFile::CreateTypeLookupTable() const {
-  lookup_table_.reset(TypeLookupTable::Create(*this));
+void DexFile::CreateTypeLookupTable(uint8_t* storage) const {
+  lookup_table_.reset(TypeLookupTable::Create(*this, storage));
 }
 
 // Given a signature place the type ids into the given vector
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 108b8d2..e497e9c 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -417,9 +417,8 @@
                                              const std::string& location,
                                              uint32_t location_checksum,
                                              const OatDexFile* oat_dex_file,
-                                             std::string* error_msg) {
-    return OpenMemory(base, size, location, location_checksum, nullptr, oat_dex_file, error_msg);
-  }
+                                             bool verify,
+                                             std::string* error_msg);
 
   // Open all classesXXX.dex files from a zip archive.
   static bool OpenFromZip(const ZipArchive& zip_archive, const std::string& location,
@@ -1157,7 +1156,7 @@
     return lookup_table_.get();
   }
 
-  void CreateTypeLookupTable() const;
+  void CreateTypeLookupTable(uint8_t* storage = nullptr) const;
 
  private:
   // Opens a .dex file
@@ -1260,6 +1259,7 @@
   mutable std::unique_ptr<TypeLookupTable> lookup_table_;
 
   friend class DexFileVerifierTest;
+  ART_FRIEND_TEST(ClassLinkerTest, RegisterDexFileName);  // for constructor
 };
 
 struct DexFileReference {
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 727f4fc..7a852e2 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -2538,20 +2538,6 @@
     return false;
   }
 
-  // Only the static initializer may have code in an interface.
-  // TODO We should have some way determine whether to allow this experimental flag without the
-  // runtime being started.
-  // We assume experimental flags are enabled when running without a runtime to enable tools like
-  // dexdump to handle dex files with these features.
-  if (((class_access_flags & kAccInterface) != 0)
-      && !is_clinit_by_name
-      && Runtime::Current() != nullptr
-      && !Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods)) {
-    *error_msg = StringPrintf("Non-clinit interface method %" PRIu32 " should not have code",
-                              method_index);
-    return false;
-  }
-
   // Instance constructors must not be synchronized and a few other flags.
   if (is_init_by_name) {
     static constexpr uint32_t kInitAllowed =
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index 272249c..b67af53 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -686,31 +686,6 @@
 // Set of dex files for interface method tests. As it's not as easy to mutate method names, it's
 // just easier to break up bad cases.
 
-// Interface with an instance constructor.
-//
-// .class public interface LInterfaceMethodFlags;
-// .super Ljava/lang/Object;
-//
-// .method public static constructor <clinit>()V
-// .registers 1
-//     return-void
-// .end method
-//
-// .method public constructor <init>()V
-// .registers 1
-//     return-void
-// .end method
-static const char kMethodFlagsInterfaceWithInit[] =
-    "ZGV4CjAzNQDRNt+hZ6X3I+xe66iVlCW7h9I38HmN4SvUAQAAcAAAAHhWNBIAAAAAAAAAAEwBAAAF"
-    "AAAAcAAAAAMAAACEAAAAAQAAAJAAAAAAAAAAAAAAAAIAAACcAAAAAQAAAKwAAAAIAQAAzAAAAMwA"
-    "AADWAAAA3gAAAPYAAAAKAQAAAgAAAAMAAAAEAAAABAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAQAA"
-    "AAAAAAABAgAAAQAAAAAAAAD/////AAAAADoBAAAAAAAACDxjbGluaXQ+AAY8aW5pdD4AFkxJbnRl"
-    "cmZhY2VNZXRob2RGbGFnczsAEkxqYXZhL2xhbmcvT2JqZWN0OwABVgAAAAAAAAAAAQAAAAAAAAAA"
-    "AAAAAQAAAA4AAAABAAEAAAAAAAAAAAABAAAADgAAAAIAAImABJQCAYGABKgCAAALAAAAAAAAAAEA"
-    "AAAAAAAAAQAAAAUAAABwAAAAAgAAAAMAAACEAAAAAwAAAAEAAACQAAAABQAAAAIAAACcAAAABgAA"
-    "AAEAAACsAAAAAiAAAAUAAADMAAAAAxAAAAEAAAAQAQAAASAAAAIAAAAUAQAAACAAAAEAAAA6AQAA"
-    "ABAAAAEAAABMAQAA";
-
 // Standard interface. Use declared-synchronized again for 3B encoding.
 //
 // .class public interface LInterfaceMethodFlags;
@@ -751,13 +726,6 @@
 }
 
 TEST_F(DexFileVerifierTest, MethodAccessFlagsInterfaces) {
-  // Reject interface with <init>.
-  VerifyModification(
-      kMethodFlagsInterfaceWithInit,
-      "method_flags_interface_with_init",
-      [](DexFile* dex_file ATTRIBUTE_UNUSED) {},
-      "Non-clinit interface method 1 should not have code");
-
   VerifyModification(
       kMethodFlagsInterface,
       "method_flags_interface_ok",
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 08c9b49..638fdb4 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -1779,8 +1779,7 @@
 
   void FinalizeHandleScope(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  StackReference<mirror::Object>* GetFirstHandleScopeEntry()
-      SHARED_REQUIRES(Locks::mutator_lock_) {
+  StackReference<mirror::Object>* GetFirstHandleScopeEntry() {
     return handle_scope_->GetHandle(0).GetReference();
   }
 
diff --git a/runtime/experimental_flags.h b/runtime/experimental_flags.h
index 2e674e9..198f3fa 100644
--- a/runtime/experimental_flags.h
+++ b/runtime/experimental_flags.h
@@ -27,7 +27,6 @@
   enum {
     kNone           = 0x0000,
     kLambdas        = 0x0001,
-    kDefaultMethods = 0x0002,
   };
 
   constexpr ExperimentalFlags() : value_(0x0000) {}
@@ -69,10 +68,6 @@
     stream << (started ? "|" : "") << "kLambdas";
     started = true;
   }
-  if (e & ExperimentalFlags::kDefaultMethods) {
-    stream << (started ? "|" : "") << "kDefaultMethods";
-    started = true;
-  }
   if (!started) {
     stream << "kNone";
   }
diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h
index 88a6c6c..b6af908 100644
--- a/runtime/gc/accounting/card_table.h
+++ b/runtime/gc/accounting/card_table.h
@@ -115,6 +115,8 @@
 
   // Resets all of the bytes in the card table to clean.
   void ClearCardTable();
+
+  // Clear a range of cards that covers start to end, start and end must be aligned to kCardSize.
   void ClearCardRange(uint8_t* start, uint8_t* end);
 
   // Resets all of the bytes in the card table which do not map to the image space.
diff --git a/runtime/gc/accounting/mod_union_table_test.cc b/runtime/gc/accounting/mod_union_table_test.cc
index edab1b0..349d6ff 100644
--- a/runtime/gc/accounting/mod_union_table_test.cc
+++ b/runtime/gc/accounting/mod_union_table_test.cc
@@ -22,6 +22,7 @@
 #include "mirror/array-inl.h"
 #include "space_bitmap-inl.h"
 #include "thread-inl.h"
+#include "thread_list.h"
 
 namespace art {
 namespace gc {
@@ -184,7 +185,11 @@
   std::unique_ptr<space::DlMallocSpace> other_space(space::DlMallocSpace::Create(
       "other space", 128 * KB, 4 * MB, 4 * MB, nullptr, false));
   ASSERT_TRUE(other_space.get() != nullptr);
-  heap->AddSpace(other_space.get());
+  {
+    ScopedThreadSuspension sts(self, kSuspended);
+    ScopedSuspendAll ssa("Add image space");
+    heap->AddSpace(other_space.get());
+  }
   std::unique_ptr<ModUnionTable> table(ModUnionTableFactory::Create(
       type, space, other_space.get()));
   ASSERT_TRUE(table.get() != nullptr);
@@ -253,6 +258,8 @@
   std::ostringstream oss2;
   table->Dump(oss2);
   // Remove the space we added so it doesn't persist to the next test.
+  ScopedThreadSuspension sts(self, kSuspended);
+  ScopedSuspendAll ssa("Add image space");
   heap->RemoveSpace(other_space.get());
 }
 
diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h
index 61c67f8..4cf5b4f 100644
--- a/runtime/gc/accounting/space_bitmap-inl.h
+++ b/runtime/gc/accounting/space_bitmap-inl.h
@@ -167,8 +167,12 @@
   uintptr_t* address = &bitmap_begin_[index];
   uintptr_t old_word = *address;
   if (kSetBit) {
+    // Check the bit before setting the word incase we are trying to mark a read only bitmap
+    // like an image space bitmap. This bitmap is mapped as read only and will fault if we
+    // attempt to change any words. Since all of the objects are marked, this will never
+    // occur if we check before setting the bit. This also prevents dirty pages that would
+    // occur if the bitmap was read write and we did not check the bit.
     if ((old_word & mask) == 0) {
-      // Avoid dirtying the page if possible.
       *address = old_word | mask;
     }
   } else {
diff --git a/runtime/gc/allocator/dlmalloc.cc b/runtime/gc/allocator/dlmalloc.cc
index e747f00..dc4e312 100644
--- a/runtime/gc/allocator/dlmalloc.cc
+++ b/runtime/gc/allocator/dlmalloc.cc
@@ -36,7 +36,7 @@
 #pragma GCC diagnostic ignored "-Wredundant-decls"
 #pragma GCC diagnostic ignored "-Wempty-body"
 #pragma GCC diagnostic ignored "-Wstrict-aliasing"
-#include "../../../bionic/libc/upstream-dlmalloc/malloc.c"
+#include "../../../external/dlmalloc/malloc.c"
 #pragma GCC diagnostic pop
 
 static void* art_heap_morecore(void* m, intptr_t increment) {
diff --git a/runtime/gc/allocator/dlmalloc.h b/runtime/gc/allocator/dlmalloc.h
index 0558921..50e2622 100644
--- a/runtime/gc/allocator/dlmalloc.h
+++ b/runtime/gc/allocator/dlmalloc.h
@@ -32,7 +32,7 @@
 
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wredundant-decls"
-#include "../../bionic/libc/upstream-dlmalloc/malloc.h"
+#include "../../external/dlmalloc/malloc.h"
 #pragma GCC diagnostic pop
 
 #ifdef __ANDROID__
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index bcfcb89..9397c35 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -24,6 +24,7 @@
 #include "gc/reference_processor.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space-inl.h"
+#include "image-inl.h"
 #include "intern_table.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
@@ -2066,8 +2067,9 @@
 }
 
 void ConcurrentCopying::FinishPhase() {
+  Thread* const self = Thread::Current();
   {
-    MutexLock mu(Thread::Current(), mark_stack_lock_);
+    MutexLock mu(self, mark_stack_lock_);
     CHECK_EQ(pooled_mark_stacks_.size(), kMarkStackPoolSize);
   }
   region_space_ = nullptr;
@@ -2075,7 +2077,8 @@
     MutexLock mu(Thread::Current(), skipped_blocks_lock_);
     skipped_blocks_map_.clear();
   }
-  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  ReaderMutexLock mu(self, *Locks::mutator_lock_);
+  WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_);
   heap_->ClearMarkedObjects();
 }
 
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 5d21c59..76315fe 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -57,7 +57,7 @@
   // Enable the from-space bytes/objects check.
   static constexpr bool kEnableFromSpaceAccountingCheck = true;
   // Enable verbose mode.
-  static constexpr bool kVerboseMode = true;
+  static constexpr bool kVerboseMode = false;
 
   ConcurrentCopying(Heap* heap, const std::string& name_prefix = "");
   ~ConcurrentCopying();
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 954c80e..580486a 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -153,7 +153,9 @@
   void ResetCumulativeStatistics() REQUIRES(!pause_histogram_lock_);
   // Swap the live and mark bitmaps of spaces that are active for the collector. For partial GC,
   // this is the allocation space, for full GC then we swap the zygote bitmaps too.
-  void SwapBitmaps() REQUIRES(Locks::heap_bitmap_lock_);
+  void SwapBitmaps()
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   uint64_t GetTotalPausedTimeNs() REQUIRES(!pause_histogram_lock_);
   int64_t GetTotalFreedBytes() const {
     return total_freed_bytes_;
diff --git a/runtime/gc/collector/immune_spaces_test.cc b/runtime/gc/collector/immune_spaces_test.cc
index 4884e66..ea290dd 100644
--- a/runtime/gc/collector/immune_spaces_test.cc
+++ b/runtime/gc/collector/immune_spaces_test.cc
@@ -112,8 +112,13 @@
         /*oat_data_begin*/PointerToLowMemUInt32(map->End()),
         /*oat_data_end*/PointerToLowMemUInt32(map->End() + oat_size),
         /*oat_file_end*/PointerToLowMemUInt32(map->End() + oat_size),
+        /*boot_image_begin*/0u,
+        /*boot_image_size*/0u,
+        /*boot_oat_begin*/0u,
+        /*boot_oat_size*/0u,
         /*pointer_size*/sizeof(void*),
         /*compile_pic*/false,
+        /*is_pic*/false,
         ImageHeader::kStorageModeUncompressed,
         /*storage_size*/0u);
     return new DummyImageSpace(map.release(), live_bitmap.release());
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
index 8a12094..4831157 100644
--- a/runtime/gc/collector/mark_compact.h
+++ b/runtime/gc/collector/mark_compact.h
@@ -106,7 +106,7 @@
       REQUIRES(Locks::mutator_lock_);
 
   // Sweeps unmarked objects to complete the garbage collection.
-  void Sweep(bool swap_bitmaps) REQUIRES(Locks::heap_bitmap_lock_);
+  void Sweep(bool swap_bitmaps) REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   // Sweeps unmarked objects to complete the garbage collection.
   void SweepLargeObjects(bool swap_bitmaps) REQUIRES(Locks::heap_bitmap_lock_);
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 5427f88..64c8e9a 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -1467,7 +1467,9 @@
   }
   CHECK(mark_stack_->IsEmpty());  // Ensure that the mark stack is empty.
   mark_stack_->Reset();
-  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  Thread* const self = Thread::Current();
+  ReaderMutexLock mu(self, *Locks::mutator_lock_);
+  WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_);
   heap_->ClearMarkedObjects();
 }
 
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 245f96b..b61bef7 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -85,7 +85,7 @@
   void Init();
 
   // Find the default mark bitmap.
-  void FindDefaultSpaceBitmap();
+  void FindDefaultSpaceBitmap() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Marks all objects in the root set at the start of a garbage collection.
   void MarkRoots(Thread* self)
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index a905904..0199e1a 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -135,7 +135,9 @@
       REQUIRES(Locks::mutator_lock_);
 
   // Sweeps unmarked objects to complete the garbage collection.
-  virtual void Sweep(bool swap_bitmaps) REQUIRES(Locks::heap_bitmap_lock_);
+  virtual void Sweep(bool swap_bitmaps)
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Sweeps unmarked objects to complete the garbage collection.
   void SweepLargeObjects(bool swap_bitmaps) REQUIRES(Locks::heap_bitmap_lock_);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index d6c1817..84483b4 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -114,6 +114,9 @@
 // timeout on how long we wait for finalizers to run. b/21544853
 static constexpr uint64_t kNativeAllocationFinalizeTimeout = MsToNs(250u);
 
+// For deterministic compilation, we need the heap to be at a well-known address.
+static constexpr uint32_t kAllocSpaceBeginForDeterministicAoT = 0x40000000;
+
 Heap::Heap(size_t initial_size,
            size_t growth_limit,
            size_t min_free,
@@ -273,10 +276,11 @@
       std::string& image_name = image_file_names[index];
       ATRACE_BEGIN("ImageSpace::Create");
       std::string error_msg;
-      space::ImageSpace* boot_image_space = space::ImageSpace::Create(image_name.c_str(),
-                                                                      image_instruction_set,
-                                                                      index > 0,
-                                                                      &error_msg);
+      space::ImageSpace* boot_image_space = space::ImageSpace::CreateBootImage(
+          image_name.c_str(),
+          image_instruction_set,
+          index > 0,
+          &error_msg);
       ATRACE_END();
       if (boot_image_space != nullptr) {
         AddSpace(boot_image_space);
@@ -346,11 +350,16 @@
   bool separate_non_moving_space = is_zygote ||
       support_homogeneous_space_compaction || IsMovingGc(foreground_collector_type_) ||
       IsMovingGc(background_collector_type_);
-  if (foreground_collector_type == kCollectorTypeGSS) {
+  if (foreground_collector_type_ == kCollectorTypeGSS) {
     separate_non_moving_space = false;
   }
   std::unique_ptr<MemMap> main_mem_map_1;
   std::unique_ptr<MemMap> main_mem_map_2;
+
+  // Gross hack to make dex2oat deterministic.
+  if (requested_alloc_space_begin == nullptr && Runtime::Current()->IsAotCompiler()) {
+    requested_alloc_space_begin = reinterpret_cast<uint8_t*>(kAllocSpaceBeginForDeterministicAoT);
+  }
   uint8_t* request_begin = requested_alloc_space_begin;
   if (request_begin != nullptr && separate_non_moving_space) {
     request_begin += non_moving_space_capacity;
@@ -375,12 +384,15 @@
   }
   // Attempt to create 2 mem maps at or after the requested begin.
   if (foreground_collector_type_ != kCollectorTypeCC) {
-    if (separate_non_moving_space) {
-      main_mem_map_1.reset(MapAnonymousPreferredAddress(kMemMapSpaceName[0], request_begin,
-                                                        capacity_, &error_str));
+    if (separate_non_moving_space || !is_zygote) {
+      main_mem_map_1.reset(MapAnonymousPreferredAddress(kMemMapSpaceName[0],
+                                                        request_begin,
+                                                        capacity_,
+                                                        &error_str));
     } else {
-      // If no separate non-moving space, the main space must come
-      // right after the image space to avoid a gap.
+      // If no separate non-moving space and we are the zygote, the main space must come right
+      // after the image space to avoid a gap. This is required since we want the zygote space to
+      // be adjacent to the image space.
       main_mem_map_1.reset(MemMap::MapAnonymous(kMemMapSpaceName[0], request_begin, capacity_,
                                                 PROT_READ | PROT_WRITE, true, false,
                                                 &error_str));
@@ -488,7 +500,15 @@
   ATRACE_END();
   // Allocate the card table.
   ATRACE_BEGIN("Create card table");
-  card_table_.reset(accounting::CardTable::Create(heap_begin, heap_capacity));
+  // We currently don't support dynamically resizing the card table.
+  // Since we don't know where in the low_4gb the app image will be located, make the card table
+  // cover the whole low_4gb. TODO: Extend the card table in AddSpace.
+  UNUSED(heap_capacity);
+  // Start at 64 KB, we can be sure there are no spaces mapped this low since the address range is
+  // reserved by the kernel.
+  static constexpr size_t kMinHeapAddress = 4 * KB;
+  card_table_.reset(accounting::CardTable::Create(reinterpret_cast<uint8_t*>(kMinHeapAddress),
+                                                  4 * GB - kMinHeapAddress));
   CHECK(card_table_.get() != nullptr) << "Failed to create card table";
   ATRACE_END();
   if (foreground_collector_type_ == kCollectorTypeCC && kUseTableLookupReadBarrier) {
@@ -789,6 +809,7 @@
   if (!Runtime::Current()->IsAotCompiler()) {
     return false;
   }
+  ScopedObjectAccess soa(Thread::Current());
   for (const auto& space : continuous_spaces_) {
     if (space->IsImageSpace() || space->IsZygoteSpace()) {
       return false;
@@ -1249,10 +1270,6 @@
   return FindDiscontinuousSpaceFromObject(obj, fail_ok);
 }
 
-std::vector<space::ImageSpace*> Heap::GetBootImageSpaces() const {
-  return boot_image_spaces_;
-}
-
 void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type) {
   std::ostringstream oss;
   size_t total_bytes_free = GetFreeMemory();
@@ -1373,15 +1390,18 @@
   uint64_t total_alloc_space_allocated = 0;
   uint64_t total_alloc_space_size = 0;
   uint64_t managed_reclaimed = 0;
-  for (const auto& space : continuous_spaces_) {
-    if (space->IsMallocSpace()) {
-      gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
-      if (malloc_space->IsRosAllocSpace() || !CareAboutPauseTimes()) {
-        // Don't trim dlmalloc spaces if we care about pauses since this can hold the space lock
-        // for a long period of time.
-        managed_reclaimed += malloc_space->Trim();
+  {
+    ScopedObjectAccess soa(self);
+    for (const auto& space : continuous_spaces_) {
+      if (space->IsMallocSpace()) {
+        gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
+        if (malloc_space->IsRosAllocSpace() || !CareAboutPauseTimes()) {
+          // Don't trim dlmalloc spaces if we care about pauses since this can hold the space lock
+          // for a long period of time.
+          managed_reclaimed += malloc_space->Trim();
+        }
+        total_alloc_space_size += malloc_space->Size();
       }
-      total_alloc_space_size += malloc_space->Size();
     }
   }
   total_alloc_space_allocated = GetBytesAllocated();
@@ -1512,6 +1532,7 @@
 }
 
 void Heap::DumpSpaces(std::ostream& stream) const {
+  ScopedObjectAccess soa(Thread::Current());
   for (const auto& space : continuous_spaces_) {
     accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
     accounting::ContinuousSpaceBitmap* mark_bitmap = space->GetMarkBitmap();
@@ -1590,6 +1611,9 @@
 }
 
 space::RosAllocSpace* Heap::GetRosAllocSpace(gc::allocator::RosAlloc* rosalloc) const {
+  if (rosalloc_space_ != nullptr && rosalloc_space_->GetRosAlloc() == rosalloc) {
+    return rosalloc_space_;
+  }
   for (const auto& space : continuous_spaces_) {
     if (space->AsContinuousSpace()->IsRosAllocSpace()) {
       if (space->AsContinuousSpace()->AsRosAllocSpace()->GetRosAlloc() == rosalloc) {
@@ -2330,6 +2354,9 @@
     // We still want to GC in case there is some unreachable non moving objects that could cause a
     // suboptimal bin packing when we compact the zygote space.
     CollectGarbageInternal(collector::kGcTypeFull, kGcCauseBackground, false);
+    // Trim the pages at the end of the non moving space. Trim while not holding zygote lock since
+    // the trim process may require locking the mutator lock.
+    non_moving_space_->Trim();
   }
   Thread* self = Thread::Current();
   MutexLock mu(self, zygote_creation_lock_);
@@ -2340,8 +2367,6 @@
   Runtime::Current()->GetInternTable()->AddNewTable();
   Runtime::Current()->GetClassLinker()->MoveClassTableToPreZygote();
   VLOG(heap) << "Starting PreZygoteFork";
-  // Trim the pages at the end of the non moving space.
-  non_moving_space_->Trim();
   // The end of the non-moving space may be protected, unprotect it so that we can copy the zygote
   // there.
   non_moving_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
@@ -3191,7 +3216,13 @@
     } else if (process_alloc_space_cards) {
       TimingLogger::ScopedTiming t2("AllocSpaceClearCards", timings);
       if (clear_alloc_space_cards) {
-        card_table_->ClearCardRange(space->Begin(), space->End());
+        uint8_t* end = space->End();
+        if (space->IsImageSpace()) {
+          // Image space end is the end of the mirror objects, it is not necessarily page or card
+          // aligned. Align up so that the check in ClearCardRange does not fail.
+          end = AlignUp(end, accounting::CardTable::kCardSize);
+        }
+        card_table_->ClearCardRange(space->Begin(), end);
       } else {
         // No mod union table for the AllocSpace. Age the cards so that the GC knows that these
         // cards were dirty before the GC started.
@@ -3516,7 +3547,8 @@
 
 void Heap::ClampGrowthLimit() {
   // Use heap bitmap lock to guard against races with BindLiveToMarkBitmap.
-  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  ScopedObjectAccess soa(Thread::Current());
+  WriterMutexLock mu(soa.Self(), *Locks::heap_bitmap_lock_);
   capacity_ = growth_limit_;
   for (const auto& space : continuous_spaces_) {
     if (space->IsMallocSpace()) {
@@ -3532,6 +3564,7 @@
 
 void Heap::ClearGrowthLimit() {
   growth_limit_ = capacity_;
+  ScopedObjectAccess soa(Thread::Current());
   for (const auto& space : continuous_spaces_) {
     if (space->IsMallocSpace()) {
       gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
@@ -3986,5 +4019,43 @@
   gc_disabled_for_shutdown_ = true;
 }
 
+bool Heap::ObjectIsInBootImageSpace(mirror::Object* obj) const {
+  for (gc::space::ImageSpace* space : boot_image_spaces_) {
+    if (space->HasAddress(obj)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void Heap::GetBootImagesSize(uint32_t* boot_image_begin,
+                             uint32_t* boot_image_end,
+                             uint32_t* boot_oat_begin,
+                             uint32_t* boot_oat_end) {
+  DCHECK(boot_image_begin != nullptr);
+  DCHECK(boot_image_end != nullptr);
+  DCHECK(boot_oat_begin != nullptr);
+  DCHECK(boot_oat_end != nullptr);
+  *boot_image_begin = 0u;
+  *boot_image_end = 0u;
+  *boot_oat_begin = 0u;
+  *boot_oat_end = 0u;
+  for (gc::space::ImageSpace* space_ : GetBootImageSpaces()) {
+    const uint32_t image_begin = PointerToLowMemUInt32(space_->Begin());
+    const uint32_t image_size = space_->GetImageHeader().GetImageSize();
+    if (*boot_image_begin == 0 || image_begin < *boot_image_begin) {
+      *boot_image_begin = image_begin;
+    }
+    *boot_image_end = std::max(*boot_image_end, image_begin + image_size);
+    const OatFile* boot_oat_file = space_->GetOatFile();
+    const uint32_t oat_begin = PointerToLowMemUInt32(boot_oat_file->Begin());
+    const uint32_t oat_size = boot_oat_file->Size();
+    if (*boot_oat_begin == 0 || oat_begin < *boot_oat_begin) {
+      *boot_oat_begin = oat_begin;
+    }
+    *boot_oat_end = std::max(*boot_oat_end, oat_begin + oat_size);
+  }
+}
+
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index e7ea983..c02e2d3 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -89,7 +89,6 @@
   class RegionSpace;
   class RosAllocSpace;
   class Space;
-  class SpaceTest;
   class ZygoteSpace;
 }  // namespace space
 
@@ -309,7 +308,10 @@
   void ThreadFlipEnd(Thread* self) REQUIRES(!*thread_flip_lock_);
 
   // Clear all of the mark bits, doesn't clear bitmaps which have the same live bits as mark bits.
-  void ClearMarkedObjects() REQUIRES(Locks::heap_bitmap_lock_);
+  // Mutator lock is required for GetContinuousSpaces.
+  void ClearMarkedObjects()
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Initiates an explicit garbage collection.
   void CollectGarbage(bool clear_soft_references)
@@ -360,8 +362,12 @@
   // due to usage by tests.
   void SetSpaceAsDefault(space::ContinuousSpace* continuous_space)
       REQUIRES(!Locks::heap_bitmap_lock_);
-  void AddSpace(space::Space* space) REQUIRES(!Locks::heap_bitmap_lock_);
-  void RemoveSpace(space::Space* space) REQUIRES(!Locks::heap_bitmap_lock_);
+  void AddSpace(space::Space* space)
+      REQUIRES(!Locks::heap_bitmap_lock_)
+      REQUIRES(Locks::mutator_lock_);
+  void RemoveSpace(space::Space* space)
+    REQUIRES(!Locks::heap_bitmap_lock_)
+    REQUIRES(Locks::mutator_lock_);
 
   // Set target ideal heap utilization ratio, implements
   // dalvik.system.VMRuntime.setTargetHeapUtilization.
@@ -379,7 +385,13 @@
   void UpdateProcessState(ProcessState process_state)
       REQUIRES(!*pending_task_lock_, !*gc_complete_lock_);
 
-  const std::vector<space::ContinuousSpace*>& GetContinuousSpaces() const {
+  bool HaveContinuousSpaces() const NO_THREAD_SAFETY_ANALYSIS {
+    // No lock since vector empty is thread safe.
+    return !continuous_spaces_.empty();
+  }
+
+  const std::vector<space::ContinuousSpace*>& GetContinuousSpaces() const
+      SHARED_REQUIRES(Locks::mutator_lock_) {
     return continuous_spaces_;
   }
 
@@ -519,10 +531,13 @@
   // get the space that corresponds to an object's address. Current implementation searches all
   // spaces in turn. If fail_ok is false then failing to find a space will cause an abort.
   // TODO: consider using faster data structure like binary tree.
-  space::ContinuousSpace* FindContinuousSpaceFromObject(const mirror::Object*, bool fail_ok) const;
+  space::ContinuousSpace* FindContinuousSpaceFromObject(const mirror::Object*, bool fail_ok) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
   space::DiscontinuousSpace* FindDiscontinuousSpaceFromObject(const mirror::Object*,
-                                                              bool fail_ok) const;
-  space::Space* FindSpaceFromObject(const mirror::Object*, bool fail_ok) const;
+                                                              bool fail_ok) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  space::Space* FindSpaceFromObject(const mirror::Object*, bool fail_ok) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   void DumpForSigQuit(std::ostream& os) REQUIRES(!*gc_complete_lock_);
 
@@ -578,10 +593,22 @@
       REQUIRES(Locks::heap_bitmap_lock_);
 
   // Unbind any bound bitmaps.
-  void UnBindBitmaps() REQUIRES(Locks::heap_bitmap_lock_);
+  void UnBindBitmaps()
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns the boot image spaces. There may be multiple boot image spaces.
-  std::vector<space::ImageSpace*> GetBootImageSpaces() const;
+  const std::vector<space::ImageSpace*>& GetBootImageSpaces() const {
+    return boot_image_spaces_;
+  }
+
+  bool ObjectIsInBootImageSpace(mirror::Object* obj) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void GetBootImagesSize(uint32_t* boot_image_begin,
+                         uint32_t* boot_image_end,
+                         uint32_t* boot_oat_begin,
+                         uint32_t* boot_oat_end);
 
   // Permenantly disable moving garbage collection.
   void DisableMovingGc() REQUIRES(!*gc_complete_lock_);
@@ -595,7 +622,8 @@
   }
 
   // Return the corresponding rosalloc space.
-  space::RosAllocSpace* GetRosAllocSpace(gc::allocator::RosAlloc* rosalloc) const;
+  space::RosAllocSpace* GetRosAllocSpace(gc::allocator::RosAlloc* rosalloc) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   space::MallocSpace* GetNonMovingSpace() const {
     return non_moving_space_;
@@ -953,7 +981,8 @@
   void ProcessCards(TimingLogger* timings,
                     bool use_rem_sets,
                     bool process_alloc_space_cards,
-                    bool clear_alloc_space_cards);
+                    bool clear_alloc_space_cards)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Push an object onto the allocation stack.
   void PushOnAllocationStack(Thread* self, mirror::Object** obj)
@@ -996,10 +1025,10 @@
       REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !*backtrace_lock_);
 
   // All-known continuous spaces, where objects lie within fixed bounds.
-  std::vector<space::ContinuousSpace*> continuous_spaces_;
+  std::vector<space::ContinuousSpace*> continuous_spaces_ GUARDED_BY(Locks::mutator_lock_);
 
   // All-known discontinuous spaces, where objects may be placed throughout virtual memory.
-  std::vector<space::DiscontinuousSpace*> discontinuous_spaces_;
+  std::vector<space::DiscontinuousSpace*> discontinuous_spaces_ GUARDED_BY(Locks::mutator_lock_);
 
   // All-known alloc spaces, where objects may be or have been allocated.
   std::vector<space::AllocSpace*> alloc_spaces_;
@@ -1335,7 +1364,6 @@
   friend class VerifyReferenceCardVisitor;
   friend class VerifyReferenceVisitor;
   friend class VerifyObjectVisitor;
-  friend class space::SpaceTest;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(Heap);
 };
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index e754a52..455d28e 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -319,7 +319,7 @@
 namespace allocator {
 
 // Implement the dlmalloc morecore callback.
-void* ArtDlMallocMoreCore(void* mspace, intptr_t increment) {
+void* ArtDlMallocMoreCore(void* mspace, intptr_t increment) SHARED_REQUIRES(Locks::mutator_lock_) {
   Runtime* runtime = Runtime::Current();
   Heap* heap = runtime->GetHeap();
   ::art::gc::space::DlMallocSpace* dlmalloc_space = heap->GetDlMallocSpace();
diff --git a/runtime/gc/space/dlmalloc_space_base_test.cc b/runtime/gc/space/dlmalloc_space_base_test.cc
deleted file mode 100644
index 93fe155..0000000
--- a/runtime/gc/space/dlmalloc_space_base_test.cc
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "space_test.h"
-
-#include "dlmalloc_space.h"
-#include "scoped_thread_state_change.h"
-
-namespace art {
-namespace gc {
-namespace space {
-
-MallocSpace* CreateDlMallocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
-                                 size_t capacity, uint8_t* requested_begin) {
-  return DlMallocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin, false);
-}
-
-TEST_SPACE_CREATE_FN_BASE(DlMallocSpace, CreateDlMallocSpace)
-
-
-}  // namespace space
-}  // namespace gc
-}  // namespace art
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 5f6bb8e..891e280 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -30,6 +30,7 @@
 #include "base/time_utils.h"
 #include "base/unix_file/fd_file.h"
 #include "gc/accounting/space_bitmap-inl.h"
+#include "image-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "oat_file.h"
@@ -475,10 +476,10 @@
   return true;
 }
 
-ImageSpace* ImageSpace::Create(const char* image_location,
-                               const InstructionSet image_isa,
-                               bool secondary_image,
-                               std::string* error_msg) {
+ImageSpace* ImageSpace::CreateBootImage(const char* image_location,
+                                        const InstructionSet image_isa,
+                                        bool secondary_image,
+                                        std::string* error_msg) {
   std::string system_filename;
   bool has_system = false;
   std::string cache_filename;
@@ -584,8 +585,13 @@
       // assume this if we are using a relocated image (i.e. image checksum
       // matches) since this is only different by the offset. We need this to
       // make sure that host tests continue to work.
-      space = ImageSpace::Init(image_filename->c_str(), image_location,
-                               !(is_system || relocated_version_used), error_msg);
+      // Since we are the boot image, pass null since we load the oat file from the boot image oat
+      // file name.
+      space = ImageSpace::Init(image_filename->c_str(),
+                               image_location,
+                               !(is_system || relocated_version_used),
+                               /* oat_file */nullptr,
+                               error_msg);
     }
     if (space != nullptr) {
       return space;
@@ -646,7 +652,7 @@
     // we leave Create.
     ScopedFlock image_lock;
     image_lock.Init(cache_filename.c_str(), error_msg);
-    space = ImageSpace::Init(cache_filename.c_str(), image_location, true, error_msg);
+    space = ImageSpace::Init(cache_filename.c_str(), image_location, true, nullptr, error_msg);
     if (space == nullptr) {
       *error_msg = StringPrintf("Failed to load generated image '%s': %s",
                                 cache_filename.c_str(), error_msg->c_str());
@@ -669,34 +675,495 @@
   }
 }
 
-ImageSpace* ImageSpace::Init(const char* image_filename, const char* image_location,
-                             bool validate_oat_file, std::string* error_msg) {
+// Helper class for relocating from one range of memory to another.
+class RelocationRange {
+ public:
+  RelocationRange() = default;
+  RelocationRange(const RelocationRange&) = default;
+  RelocationRange(uintptr_t source, uintptr_t dest, uintptr_t length)
+      : source_(source),
+        dest_(dest),
+        length_(length) {}
+
+  bool ContainsSource(uintptr_t address) const {
+    return address - source_ < length_;
+  }
+
+  // Translate a source address to the destination space.
+  uintptr_t ToDest(uintptr_t address) const {
+    DCHECK(ContainsSource(address));
+    return address + Delta();
+  }
+
+  // Returns the delta between the dest from the source.
+  off_t Delta() const {
+    return dest_ - source_;
+  }
+
+  uintptr_t Source() const {
+    return source_;
+  }
+
+  uintptr_t Dest() const {
+    return dest_;
+  }
+
+  uintptr_t Length() const {
+    return length_;
+  }
+
+ private:
+  const uintptr_t source_;
+  const uintptr_t dest_;
+  const uintptr_t length_;
+};
+
+class FixupVisitor : public ValueObject {
+ public:
+  FixupVisitor(const RelocationRange& boot_image,
+               const RelocationRange& boot_oat,
+               const RelocationRange& app_image,
+               const RelocationRange& app_oat)
+      : boot_image_(boot_image),
+        boot_oat_(boot_oat),
+        app_image_(app_image),
+        app_oat_(app_oat) {}
+
+  // Return the relocated address of a heap object.
+  template <typename T>
+  ALWAYS_INLINE T* ForwardObject(T* src) const {
+    const uintptr_t uint_src = reinterpret_cast<uintptr_t>(src);
+    if (boot_image_.ContainsSource(uint_src)) {
+      return reinterpret_cast<T*>(boot_image_.ToDest(uint_src));
+    }
+    if (app_image_.ContainsSource(uint_src)) {
+      return reinterpret_cast<T*>(app_image_.ToDest(uint_src));
+    }
+    return src;
+  }
+
+  // Return the relocated address of a code pointer (contained by an oat file).
+  ALWAYS_INLINE const void* ForwardCode(const void* src) const {
+    const uintptr_t uint_src = reinterpret_cast<uintptr_t>(src);
+    if (boot_oat_.ContainsSource(uint_src)) {
+     return reinterpret_cast<const void*>(boot_oat_.ToDest(uint_src));
+    }
+    if (app_oat_.ContainsSource(uint_src)) {
+      return reinterpret_cast<const void*>(app_oat_.ToDest(uint_src));
+    }
+    return src;
+  }
+
+ protected:
+  // Source section.
+  const RelocationRange boot_image_;
+  const RelocationRange boot_oat_;
+  const RelocationRange app_image_;
+  const RelocationRange app_oat_;
+};
+
+std::ostream& operator<<(std::ostream& os, const RelocationRange& reloc) {
+  return os << "(" << reinterpret_cast<const void*>(reloc.Source()) << "-"
+            << reinterpret_cast<const void*>(reloc.Source() + reloc.Length()) << ")->("
+            << reinterpret_cast<const void*>(reloc.Dest()) << "-"
+            << reinterpret_cast<const void*>(reloc.Dest() + reloc.Length()) << ")";
+}
+
+// Adapt for mirror::Class::FixupNativePointers.
+class FixupObjectAdapter : public FixupVisitor {
+ public:
+  template<typename... Args>
+  explicit FixupObjectAdapter(Args... args) : FixupVisitor(args...) {}
+
+  template <typename T>
+  T* operator()(T* obj) const {
+    return ForwardObject(obj);
+  }
+};
+
+class FixupClassVisitor : public FixupVisitor {
+ public:
+  template<typename... Args>
+  explicit FixupClassVisitor(Args... args) : FixupVisitor(args...) {}
+
+  // The image space is contained so the GC doesn't need to know about it. Avoid requiring mutator
+  // lock to prevent possible pauses.
+  ALWAYS_INLINE void operator()(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
+    mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
+    DCHECK(klass != nullptr) << "Null class in image";
+    // No AsClass since our fields aren't quite fixed up yet.
+    mirror::Class* new_klass = down_cast<mirror::Class*>(ForwardObject(klass));
+    // Keep clean if possible.
+    if (klass != new_klass) {
+      obj->SetClass<kVerifyNone>(new_klass);
+    }
+  }
+};
+
+class FixupRootVisitor : public FixupVisitor {
+ public:
+  template<typename... Args>
+  explicit FixupRootVisitor(Args... args) : FixupVisitor(args...) {}
+
+  ALWAYS_INLINE void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (!root->IsNull()) {
+      VisitRoot(root);
+    }
+  }
+
+  ALWAYS_INLINE void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    mirror::Object* ref = root->AsMirrorPtr();
+    mirror::Object* new_ref = ForwardObject(ref);
+    if (ref != new_ref) {
+      root->Assign(new_ref);
+    }
+  }
+};
+
+class FixupObjectVisitor : public FixupVisitor {
+ public:
+  template<typename... Args>
+  explicit FixupObjectVisitor(Args... args) : FixupVisitor(args...) {}
+
+  // Fix up separately since we also need to fix up method entrypoints.
+  ALWAYS_INLINE void VisitRootIfNonNull(
+      mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED) const {}
+
+  ALWAYS_INLINE void VisitRoot(mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED)
+      const {}
+
+  ALWAYS_INLINE void operator()(mirror::Object* obj,
+                                MemberOffset offset,
+                                bool is_static ATTRIBUTE_UNUSED) const
+      NO_THREAD_SAFETY_ANALYSIS {
+    // There could be overlap between ranges, we must avoid visiting the same reference twice.
+    // Avoid the class field since we already fixed it up in FixupClassVisitor.
+    if (offset.Uint32Value() != mirror::Object::ClassOffset().Uint32Value()) {
+      // Space is not yet added to the heap, don't do a read barrier.
+      mirror::Object* ref = obj->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier>(
+          offset);
+      // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
+      // image.
+      obj->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(offset, ForwardObject(ref));
+    }
+  }
+
+  // java.lang.ref.Reference visitor.
+  void operator()(mirror::Class* klass ATTRIBUTE_UNUSED, mirror::Reference* ref) const
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
+    mirror::Object* obj = ref->GetReferent<kWithoutReadBarrier>();
+    ref->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
+        mirror::Reference::ReferentOffset(),
+        ForwardObject(obj));
+  }
+
+  ALWAYS_INLINE void operator()(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
+    obj->VisitReferences</*visit native roots*/false, kVerifyNone, kWithoutReadBarrier>(
+        *this,
+        *this);
+    // We want to use our own class loader and not the one in the image.
+    if (obj->IsClass<kVerifyNone, kWithoutReadBarrier>()) {
+      mirror::Class* klass = obj->AsClass<kVerifyNone, kWithoutReadBarrier>();
+      FixupObjectAdapter visitor(boot_image_, boot_oat_, app_image_, app_oat_);
+      klass->FixupNativePointers(klass, sizeof(void*), visitor);
+      // Deal with the arrays.
+      mirror::PointerArray* vtable = klass->GetVTable<kVerifyNone, kWithoutReadBarrier>();
+      if (vtable != nullptr) {
+        vtable->Fixup(vtable, sizeof(void*), visitor);
+      }
+      mirror::IfTable* iftable = klass->GetIfTable<kVerifyNone, kWithoutReadBarrier>();
+      if (iftable != nullptr) {
+        for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
+          if (iftable->GetMethodArrayCount(i) > 0) {
+            mirror::PointerArray* methods =
+                iftable->GetMethodArray<kVerifyNone, kWithoutReadBarrier>(i);
+            DCHECK(methods != nullptr);
+            methods->Fixup(methods, sizeof(void*), visitor);
+          }
+        }
+      }
+    }
+  }
+};
+
+class ForwardObjectAdapter {
+ public:
+  ALWAYS_INLINE ForwardObjectAdapter(const FixupVisitor* visitor) : visitor_(visitor) {}
+
+  template <typename T>
+  ALWAYS_INLINE T* operator()(T* src) const {
+    return visitor_->ForwardObject(src);
+  }
+
+ private:
+  const FixupVisitor* const visitor_;
+};
+
+class ForwardCodeAdapter {
+ public:
+  ALWAYS_INLINE ForwardCodeAdapter(const FixupVisitor* visitor) : visitor_(visitor) {}
+
+  template <typename T>
+  ALWAYS_INLINE T* operator()(T* src) const {
+    return visitor_->ForwardCode(src);
+  }
+
+ private:
+  const FixupVisitor* const visitor_;
+};
+
+class FixupArtMethodVisitor : public FixupVisitor, public ArtMethodVisitor {
+ public:
+  template<typename... Args>
+  explicit FixupArtMethodVisitor(bool fixup_heap_objects, Args... args)
+      : FixupVisitor(args...),
+        fixup_heap_objects_(fixup_heap_objects) {}
+
+  virtual void Visit(ArtMethod* method) NO_THREAD_SAFETY_ANALYSIS {
+    if (fixup_heap_objects_) {
+      method->UpdateObjectsForImageRelocation(ForwardObjectAdapter(this));
+    }
+    method->UpdateEntrypoints(ForwardCodeAdapter(this));
+  }
+
+ private:
+  const bool fixup_heap_objects_;
+};
+
+class FixupArtFieldVisitor : public FixupVisitor, public ArtFieldVisitor {
+ public:
+  template<typename... Args>
+  explicit FixupArtFieldVisitor(Args... args) : FixupVisitor(args...) {}
+
+  virtual void Visit(ArtField* field) NO_THREAD_SAFETY_ANALYSIS {
+    field->UpdateObjects(ForwardObjectAdapter(this));
+  }
+};
+
+// Relocate an image space mapped at target_base which possibly used to be at a different base
+// address. Only needs a single image space, not one for both source and destination.
+// In place means modifying a single ImageSpace in place rather than relocating from one ImageSpace
+// to another.
+static bool RelocateInPlace(ImageHeader& image_header,
+                            uint8_t* target_base,
+                            accounting::ContinuousSpaceBitmap* bitmap,
+                            const OatFile* app_oat_file,
+                            std::string* error_msg) {
+  DCHECK(error_msg != nullptr);
+  if (!image_header.IsPic()) {
+    if (image_header.GetImageBegin() == target_base) {
+      return true;
+    }
+    *error_msg = StringPrintf("Cannot relocate non-pic image for oat file %s",
+                              (app_oat_file != nullptr) ? app_oat_file->GetLocation().c_str() : "");
+    return false;
+  }
+  // Set up sections.
+  uint32_t boot_image_begin = 0;
+  uint32_t boot_image_end = 0;
+  uint32_t boot_oat_begin = 0;
+  uint32_t boot_oat_end = 0;
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
+  heap->GetBootImagesSize(&boot_image_begin, &boot_image_end, &boot_oat_begin, &boot_oat_end);
+  CHECK_NE(boot_image_begin, boot_image_end)
+      << "Can not relocate app image without boot image space";
+  CHECK_NE(boot_oat_begin, boot_oat_end) << "Can not relocate app image without boot oat file";
+  const uint32_t boot_image_size = boot_image_end - boot_image_begin;
+  const uint32_t boot_oat_size = boot_oat_end - boot_oat_begin;
+  const uint32_t image_header_boot_image_size = image_header.GetBootImageSize();
+  const uint32_t image_header_boot_oat_size = image_header.GetBootOatSize();
+  if (boot_image_size != image_header_boot_image_size) {
+    *error_msg = StringPrintf("Boot image size %" PRIu64 " does not match expected size %"
+                                  PRIu64,
+                              static_cast<uint64_t>(boot_image_size),
+                              static_cast<uint64_t>(image_header_boot_image_size));
+    return false;
+  }
+  if (boot_oat_size != image_header_boot_oat_size) {
+    *error_msg = StringPrintf("Boot oat size %" PRIu64 " does not match expected size %"
+                                  PRIu64,
+                              static_cast<uint64_t>(boot_oat_size),
+                              static_cast<uint64_t>(image_header_boot_oat_size));
+    return false;
+  }
+  TimingLogger logger(__FUNCTION__, true, false);
+  RelocationRange boot_image(image_header.GetBootImageBegin(),
+                             boot_image_begin,
+                             boot_image_size);
+  RelocationRange boot_oat(image_header.GetBootOatBegin(),
+                           boot_oat_begin,
+                           boot_oat_size);
+  RelocationRange app_image(reinterpret_cast<uintptr_t>(image_header.GetImageBegin()),
+                            reinterpret_cast<uintptr_t>(target_base),
+                            image_header.GetImageSize());
+  // Use the oat data section since this is where the OatFile::Begin is.
+  RelocationRange app_oat(reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin()),
+                          // Not necessarily in low 4GB.
+                          reinterpret_cast<uintptr_t>(app_oat_file->Begin()),
+                          image_header.GetOatDataEnd() - image_header.GetOatDataBegin());
+  VLOG(image) << "App image " << app_image;
+  VLOG(image) << "App oat " << app_oat;
+  VLOG(image) << "Boot image " << boot_image;
+  VLOG(image) << "Boot oat " << boot_oat;
+  // True if we need to fixup any heap pointers, otherwise only code pointers.
+  const bool fixup_image = boot_image.Delta() != 0 || app_image.Delta() != 0;
+  const bool fixup_code = boot_oat.Delta() != 0 || app_oat.Delta() != 0;
+  if (!fixup_image && !fixup_code) {
+    // Nothing to fix up.
+    return true;
+  }
+  // Need to update the image to be at the target base.
+  const ImageSection& objects_section = image_header.GetImageSection(ImageHeader::kSectionObjects);
+  uintptr_t objects_begin = reinterpret_cast<uintptr_t>(target_base + objects_section.Offset());
+  uintptr_t objects_end = reinterpret_cast<uintptr_t>(target_base + objects_section.End());
+  // Two pass approach, fix up all classes first, then fix up non class-objects.
+  FixupObjectVisitor fixup_object_visitor(boot_image, boot_oat, app_image, app_oat);
+  if (fixup_image) {
+    TimingLogger::ScopedTiming timing("Fixup classes", &logger);
+    // Fixup class only touches app image classes, don't need the mutator lock since the space is
+    // not yet visible to the GC.
+    FixupClassVisitor fixup_class_visitor(boot_image, boot_oat, app_image, app_oat);
+    bitmap->VisitMarkedRange(objects_begin, objects_end, fixup_class_visitor);
+    // Fixup objects may read fields in the boot image, use the mutator lock here for sanity. Though
+    // its probably not required.
+    ScopedObjectAccess soa(Thread::Current());
+    timing.NewTiming("Fixup objects");
+    bitmap->VisitMarkedRange(objects_begin, objects_end, fixup_object_visitor);
+    FixupObjectAdapter fixup_adapter(boot_image, boot_oat, app_image, app_oat);
+    // Fixup image roots.
+    CHECK(app_image.ContainsSource(reinterpret_cast<uintptr_t>(
+        image_header.GetImageRoots<kWithoutReadBarrier>())));
+    image_header.RelocateImageObjects(app_image.Delta());
+    CHECK_EQ(image_header.GetImageBegin(), target_base);
+    // Fix up dex cache DexFile pointers.
+    auto* dex_caches = image_header.GetImageRoot<kWithoutReadBarrier>(ImageHeader::kDexCaches)->
+        AsObjectArray<mirror::DexCache>();
+    for (int32_t i = 0, count = dex_caches->GetLength(); i < count; ++i) {
+      mirror::DexCache* dex_cache = dex_caches->Get(i);
+      // Fix up dex cache pointers.
+      GcRoot<mirror::String>* strings = dex_cache->GetStrings();
+      if (strings != nullptr) {
+        GcRoot<mirror::String>* new_strings = fixup_adapter.ForwardObject(strings);
+        if (strings != new_strings) {
+          dex_cache->SetFieldPtr64<false>(mirror::DexCache::StringsOffset(), new_strings);
+        }
+        dex_cache->FixupStrings(new_strings, fixup_adapter);
+      }
+      GcRoot<mirror::Class>* types = dex_cache->GetResolvedTypes();
+      if (types != nullptr) {
+        GcRoot<mirror::Class>* new_types = fixup_adapter.ForwardObject(types);
+        if (types != new_types) {
+          dex_cache->SetFieldPtr64<false>(mirror::DexCache::ResolvedTypesOffset(), new_types);
+        }
+        dex_cache->FixupResolvedTypes(new_types, fixup_adapter);
+      }
+      ArtMethod** methods = dex_cache->GetResolvedMethods();
+      if (methods != nullptr) {
+        ArtMethod** new_methods = fixup_adapter.ForwardObject(methods);
+        if (methods != new_methods) {
+          dex_cache->SetFieldPtr64<false>(mirror::DexCache::ResolvedMethodsOffset(), new_methods);
+        }
+        for (size_t j = 0, num = dex_cache->NumResolvedMethods(); j != num; ++j) {
+          ArtMethod* orig = mirror::DexCache::GetElementPtrSize(new_methods, j, sizeof(void*));
+          ArtMethod* copy = fixup_adapter.ForwardObject(orig);
+          if (orig != copy) {
+            mirror::DexCache::SetElementPtrSize(new_methods, j, copy, sizeof(void*));
+          }
+        }
+      }
+      ArtField** fields = dex_cache->GetResolvedFields();
+      if (fields != nullptr) {
+        ArtField** new_fields = fixup_adapter.ForwardObject(fields);
+        if (fields != new_fields) {
+          dex_cache->SetFieldPtr64<false>(mirror::DexCache::ResolvedFieldsOffset(), new_fields);
+        }
+        for (size_t j = 0, num = dex_cache->NumResolvedFields(); j != num; ++j) {
+          ArtField* orig = mirror::DexCache::GetElementPtrSize(new_fields, j, sizeof(void*));
+          ArtField* copy = fixup_adapter.ForwardObject(orig);
+          if (orig != copy) {
+            mirror::DexCache::SetElementPtrSize(new_fields, j, copy, sizeof(void*));
+          }
+        }
+      }
+    }
+  }
+  {
+    // Only touches objects in the app image, no need for mutator lock.
+    TimingLogger::ScopedTiming timing("Fixup methods", &logger);
+    FixupArtMethodVisitor method_visitor(fixup_image, boot_image, boot_oat, app_image, app_oat);
+    image_header.GetImageSection(ImageHeader::kSectionArtMethods).VisitPackedArtMethods(
+        &method_visitor,
+        target_base,
+        sizeof(void*));
+  }
+  if (fixup_image) {
+    {
+      // Only touches objects in the app image, no need for mutator lock.
+      TimingLogger::ScopedTiming timing("Fixup fields", &logger);
+      FixupArtFieldVisitor field_visitor(boot_image, boot_oat, app_image, app_oat);
+      image_header.GetImageSection(ImageHeader::kSectionArtFields).VisitPackedArtFields(
+          &field_visitor,
+          target_base);
+    }
+    // In the app image case, the image methods are actually in the boot image.
+    image_header.RelocateImageMethods(boot_image.Delta());
+    const auto& class_table_section = image_header.GetImageSection(ImageHeader::kSectionClassTable);
+    if (class_table_section.Size() > 0u) {
+      // Note that we require that ReadFromMemory does not make an internal copy of the elements.
+      // This also relies on visit roots not doing any verification which could fail after we update
+      // the roots to be the image addresses.
+      ScopedObjectAccess soa(Thread::Current());
+      WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+      ClassTable temp_table;
+      temp_table.ReadFromMemory(target_base + class_table_section.Offset());
+      FixupRootVisitor root_visitor(boot_image, boot_oat, app_image, app_oat);
+      temp_table.VisitRoots(root_visitor);
+    }
+  }
+  if (VLOG_IS_ON(image)) {
+    logger.Dump(LOG(INFO));
+  }
+  return true;
+}
+
+ImageSpace* ImageSpace::Init(const char* image_filename,
+                             const char* image_location,
+                             bool validate_oat_file,
+                             const OatFile* oat_file,
+                             std::string* error_msg) {
   CHECK(image_filename != nullptr);
   CHECK(image_location != nullptr);
 
-  uint64_t start_time = 0;
-  if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
-    start_time = NanoTime();
-    LOG(INFO) << "ImageSpace::Init entering image_filename=" << image_filename;
-  }
+  TimingLogger logger(__FUNCTION__, true, false);
+  VLOG(image) << "ImageSpace::Init entering image_filename=" << image_filename;
 
-  std::unique_ptr<File> file(OS::OpenFileForReading(image_filename));
-  if (file.get() == nullptr) {
-    *error_msg = StringPrintf("Failed to open '%s'", image_filename);
-    return nullptr;
+  std::unique_ptr<File> file;
+  {
+    TimingLogger::ScopedTiming timing("OpenImageFile", &logger);
+    file.reset(OS::OpenFileForReading(image_filename));
+    if (file == nullptr) {
+      *error_msg = StringPrintf("Failed to open '%s'", image_filename);
+      return nullptr;
+    }
   }
-  ImageHeader image_header;
-  bool success = file->ReadFully(&image_header, sizeof(image_header));
-  if (!success || !image_header.IsValid()) {
-    *error_msg = StringPrintf("Invalid image header in '%s'", image_filename);
-    return nullptr;
+  ImageHeader temp_image_header;
+  ImageHeader* image_header = &temp_image_header;
+  {
+    TimingLogger::ScopedTiming timing("ReadImageHeader", &logger);
+    bool success = file->ReadFully(image_header, sizeof(*image_header));
+    if (!success || !image_header->IsValid()) {
+      *error_msg = StringPrintf("Invalid image header in '%s'", image_filename);
+      return nullptr;
+    }
   }
   // Check that the file is larger or equal to the header size + data size.
   const uint64_t image_file_size = static_cast<uint64_t>(file->GetLength());
-  if (image_file_size < sizeof(ImageHeader) + image_header.GetDataSize()) {
+  if (image_file_size < sizeof(ImageHeader) + image_header->GetDataSize()) {
     *error_msg = StringPrintf("Image file truncated: %" PRIu64 " vs. %" PRIu64 ".",
                               image_file_size,
-                              image_header.GetDataSize());
+                              sizeof(ImageHeader) + image_header->GetDataSize());
     return nullptr;
   }
 
@@ -704,17 +1171,17 @@
     LOG(INFO) << "Dumping image sections";
     for (size_t i = 0; i < ImageHeader::kSectionCount; ++i) {
       const auto section_idx = static_cast<ImageHeader::ImageSections>(i);
-      auto& section = image_header.GetImageSection(section_idx);
+      auto& section = image_header->GetImageSection(section_idx);
       LOG(INFO) << section_idx << " start="
-          << reinterpret_cast<void*>(image_header.GetImageBegin() + section.Offset()) << " "
-          << section;
+                << reinterpret_cast<void*>(image_header->GetImageBegin() + section.Offset()) << " "
+                << section;
     }
   }
 
-  const auto& bitmap_section = image_header.GetImageSection(ImageHeader::kSectionImageBitmap);
+  const auto& bitmap_section = image_header->GetImageSection(ImageHeader::kSectionImageBitmap);
   // The location we want to map from is the first aligned page after the end of the stored
   // (possibly compressed) data.
-  const size_t image_bitmap_offset = RoundUp(sizeof(image_header) + image_header.GetDataSize(),
+  const size_t image_bitmap_offset = RoundUp(sizeof(ImageHeader) + image_header->GetDataSize(),
                                              kPageSize);
   const size_t end_of_bitmap = image_bitmap_offset + bitmap_section.Size();
   if (end_of_bitmap != image_file_size) {
@@ -724,67 +1191,84 @@
     return nullptr;
   }
 
+  // The preferred address to map the image, null specifies any address. If we manage to map the
+  // image at the image begin, the amount of fixup work required is minimized.
+  std::vector<uint8_t*> addresses(1, image_header->GetImageBegin());
+  if (image_header->IsPic()) {
+    // Can also map at a random low_4gb address since we can relocate in-place.
+    addresses.push_back(nullptr);
+  }
+
   // Note: The image header is part of the image due to mmap page alignment required of offset.
   std::unique_ptr<MemMap> map;
-  if (image_header.GetStorageMode() == ImageHeader::kStorageModeUncompressed) {
-    map.reset(MemMap::MapFileAtAddress(image_header.GetImageBegin(),
-                                       image_header.GetImageSize(),
-                                       PROT_READ | PROT_WRITE,
-                                       MAP_PRIVATE,
-                                       file->Fd(),
-                                       0,
-                                       /*low_4gb*/false,
-                                       /*reuse*/false,
-                                       image_filename,
-                                       error_msg));
-  } else {
-    // Reserve output and decompress into it.
-    map.reset(MemMap::MapAnonymous(image_location,
-                                   image_header.GetImageBegin(),
-                                   image_header.GetImageSize(),
-                                   PROT_READ | PROT_WRITE,
-                                   /*low_4gb*/false,
-                                   /*reuse*/false,
-                                   error_msg));
+  std::string temp_error_msg;
+  for (uint8_t* address : addresses) {
+    TimingLogger::ScopedTiming timing("MapImageFile", &logger);
+    // Only care about the error message for the last address in addresses. We want to avoid the
+    // overhead of printing the process maps if we can relocate.
+    std::string* out_error_msg = (address == addresses.back()) ? &temp_error_msg : nullptr;
+    if (image_header->GetStorageMode() == ImageHeader::kStorageModeUncompressed) {
+      map.reset(MemMap::MapFileAtAddress(address,
+                                         image_header->GetImageSize(),
+                                         PROT_READ | PROT_WRITE,
+                                         MAP_PRIVATE,
+                                         file->Fd(),
+                                         0,
+                                         /*low_4gb*/true,
+                                         /*reuse*/false,
+                                         image_filename,
+                                         /*out*/out_error_msg));
+    } else {
+      // Reserve output and decompress into it.
+      map.reset(MemMap::MapAnonymous(image_location,
+                                     address,
+                                     image_header->GetImageSize(),
+                                     PROT_READ | PROT_WRITE,
+                                     /*low_4gb*/true,
+                                     /*reuse*/false,
+                                     out_error_msg));
+      if (map != nullptr) {
+        const size_t stored_size = image_header->GetDataSize();
+        const size_t write_offset = sizeof(ImageHeader);  // Skip the header.
+        std::unique_ptr<MemMap> temp_map(MemMap::MapFile(sizeof(ImageHeader) + stored_size,
+                                                         PROT_READ,
+                                                         MAP_PRIVATE,
+                                                         file->Fd(),
+                                                         /*offset*/0,
+                                                         /*low_4gb*/false,
+                                                         image_filename,
+                                                         out_error_msg));
+        if (temp_map == nullptr) {
+          DCHECK(!out_error_msg->empty());
+          return nullptr;
+        }
+        memcpy(map->Begin(), image_header, sizeof(ImageHeader));
+        const uint64_t start = NanoTime();
+        const size_t decompressed_size = LZ4_decompress_safe(
+            reinterpret_cast<char*>(temp_map->Begin()) + sizeof(ImageHeader),
+            reinterpret_cast<char*>(map->Begin()) + write_offset,
+            stored_size,
+            map->Size());
+        VLOG(image) << "Decompressing image took " << PrettyDuration(NanoTime() - start);
+        if (decompressed_size + sizeof(ImageHeader) != image_header->GetImageSize()) {
+          *error_msg = StringPrintf("Decompressed size does not match expected image size %zu vs %zu",
+                                    decompressed_size + sizeof(ImageHeader),
+                                    image_header->GetImageSize());
+          return nullptr;
+        }
+      }
+    }
     if (map != nullptr) {
-      const size_t stored_size = image_header.GetDataSize();
-      const size_t write_offset = sizeof(image_header);  // Skip the header.
-      std::unique_ptr<MemMap> temp_map(MemMap::MapFile(sizeof(ImageHeader) + stored_size,
-                                                       PROT_READ,
-                                                       MAP_PRIVATE,
-                                                       file->Fd(),
-                                                       /*offset*/0,
-                                                       /*low_4gb*/false,
-                                                       image_filename,
-                                                       error_msg));
-      if (temp_map == nullptr) {
-        DCHECK(!error_msg->empty());
-        return nullptr;
-      }
-      memcpy(map->Begin(), &image_header, sizeof(image_header));
-      const uint64_t start = NanoTime();
-      const size_t decompressed_size = LZ4_decompress_safe(
-          reinterpret_cast<char*>(temp_map->Begin()) + sizeof(ImageHeader),
-          reinterpret_cast<char*>(map->Begin()) + write_offset,
-          stored_size,
-          map->Size());
-      // TODO: VLOG(image)
-      VLOG(class_linker) << "Decompressing image took " << PrettyDuration(NanoTime() - start);
-      if (decompressed_size + sizeof(ImageHeader) != image_header.GetImageSize()) {
-        *error_msg = StringPrintf("Decompressed size does not match expected image size %zu vs %zu",
-                                  decompressed_size + sizeof(ImageHeader),
-                                  image_header.GetImageSize());
-        return nullptr;
-      }
+      break;
     }
   }
 
   if (map == nullptr) {
-    DCHECK(!error_msg->empty());
+    DCHECK(!temp_error_msg.empty());
+    *error_msg = temp_error_msg;
     return nullptr;
   }
-  CHECK_EQ(image_header.GetImageBegin(), map->Begin());
-  DCHECK_EQ(0, memcmp(&image_header, map->Begin(), sizeof(ImageHeader)));
+  DCHECK_EQ(0, memcmp(image_header, map->Begin(), sizeof(ImageHeader)));
 
   std::unique_ptr<MemMap> image_bitmap_map(MemMap::MapFileAtAddress(nullptr,
                                                                     bitmap_section.Size(),
@@ -799,25 +1283,42 @@
     *error_msg = StringPrintf("Failed to map image bitmap: %s", error_msg->c_str());
     return nullptr;
   }
-  uint32_t bitmap_index = bitmap_index_.FetchAndAddSequentiallyConsistent(1);
-  std::string bitmap_name(StringPrintf("imagespace %s live-bitmap %u", image_filename,
+  // Loaded the map, use the image header from the file now in case we patch it with
+  // RelocateInPlace.
+  image_header = reinterpret_cast<ImageHeader*>(map->Begin());
+  const uint32_t bitmap_index = bitmap_index_.FetchAndAddSequentiallyConsistent(1);
+  std::string bitmap_name(StringPrintf("imagespace %s live-bitmap %u",
+                                       image_filename,
                                        bitmap_index));
   // Bitmap only needs to cover until the end of the mirror objects section.
-  const ImageSection& image_objects = image_header.GetImageSection(ImageHeader::kSectionObjects);
-  std::unique_ptr<accounting::ContinuousSpaceBitmap> bitmap(
+  const ImageSection& image_objects = image_header->GetImageSection(ImageHeader::kSectionObjects);
+  // We only want the mirror object, not the ArtFields and ArtMethods.
+  uint8_t* const image_end = map->Begin() + image_objects.End();
+  std::unique_ptr<accounting::ContinuousSpaceBitmap> bitmap;
+  {
+    TimingLogger::ScopedTiming timing("CreateImageBitmap", &logger);
+    bitmap.reset(
       accounting::ContinuousSpaceBitmap::CreateFromMemMap(
           bitmap_name,
           image_bitmap_map.release(),
           reinterpret_cast<uint8_t*>(map->Begin()),
           image_objects.End()));
-  if (bitmap == nullptr) {
-    *error_msg = StringPrintf("Could not create bitmap '%s'", bitmap_name.c_str());
-    return nullptr;
+    if (bitmap == nullptr) {
+      *error_msg = StringPrintf("Could not create bitmap '%s'", bitmap_name.c_str());
+      return nullptr;
+    }
   }
-
+  {
+    TimingLogger::ScopedTiming timing("RelocateImage", &logger);
+    if (!RelocateInPlace(*image_header,
+                         map->Begin(),
+                         bitmap.get(),
+                         oat_file,
+                         error_msg)) {
+      return nullptr;
+    }
+  }
   // We only want the mirror object, not the ArtFields and ArtMethods.
-  uint8_t* const image_end =
-      map->Begin() + image_header.GetImageSection(ImageHeader::kSectionObjects).End();
   std::unique_ptr<ImageSpace> space(new ImageSpace(image_filename,
                                                    image_location,
                                                    map.release(),
@@ -829,38 +1330,61 @@
   // and ArtField::java_lang_reflect_ArtField_, which are used from
   // Object::SizeOf() which VerifyImageAllocations() calls, are not
   // set yet at this point.
-
-  space->oat_file_.reset(space->OpenOatFile(image_filename, error_msg));
-  if (space->oat_file_.get() == nullptr) {
-    DCHECK(!error_msg->empty());
-    return nullptr;
+  if (oat_file == nullptr) {
+    TimingLogger::ScopedTiming timing("OpenOatFile", &logger);
+    space->oat_file_.reset(space->OpenOatFile(image_filename, error_msg));
+    if (space->oat_file_ == nullptr) {
+      DCHECK(!error_msg->empty());
+      return nullptr;
+    }
+    space->oat_file_non_owned_ = space->oat_file_.get();
+  } else {
+    space->oat_file_non_owned_ = oat_file;
   }
-  space->oat_file_non_owned_ = space->oat_file_.get();
 
-  if (validate_oat_file && !space->ValidateOatFile(error_msg)) {
-    DCHECK(!error_msg->empty());
-    return nullptr;
+  if (validate_oat_file) {
+    TimingLogger::ScopedTiming timing("ValidateOatFile", &logger);
+    if (!space->ValidateOatFile(error_msg)) {
+     DCHECK(!error_msg->empty());
+      return nullptr;
+    }
   }
 
   Runtime* runtime = Runtime::Current();
-  runtime->SetInstructionSet(space->oat_file_->GetOatHeader().GetInstructionSet());
 
-  if (!runtime->HasResolutionMethod()) {
-    runtime->SetResolutionMethod(image_header.GetImageMethod(ImageHeader::kResolutionMethod));
-    runtime->SetImtConflictMethod(image_header.GetImageMethod(ImageHeader::kImtConflictMethod));
+  // If oat_file is null, then it is the boot image space. Use oat_file_non_owned_ from the space
+  // to set the runtime methods.
+  CHECK_EQ(oat_file != nullptr, image_header->IsAppImage());
+  if (image_header->IsAppImage()) {
+    CHECK_EQ(runtime->GetResolutionMethod(),
+             image_header->GetImageMethod(ImageHeader::kResolutionMethod));
+    CHECK_EQ(runtime->GetImtConflictMethod(),
+             image_header->GetImageMethod(ImageHeader::kImtConflictMethod));
+    CHECK_EQ(runtime->GetImtUnimplementedMethod(),
+             image_header->GetImageMethod(ImageHeader::kImtUnimplementedMethod));
+    CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kSaveAll),
+             image_header->GetImageMethod(ImageHeader::kCalleeSaveMethod));
+    CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kRefsOnly),
+             image_header->GetImageMethod(ImageHeader::kRefsOnlySaveMethod));
+    CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs),
+             image_header->GetImageMethod(ImageHeader::kRefsAndArgsSaveMethod));
+  } else if (!runtime->HasResolutionMethod()) {
+    runtime->SetInstructionSet(space->oat_file_non_owned_->GetOatHeader().GetInstructionSet());
+    runtime->SetResolutionMethod(image_header->GetImageMethod(ImageHeader::kResolutionMethod));
+    runtime->SetImtConflictMethod(image_header->GetImageMethod(ImageHeader::kImtConflictMethod));
     runtime->SetImtUnimplementedMethod(
-        image_header.GetImageMethod(ImageHeader::kImtUnimplementedMethod));
+        image_header->GetImageMethod(ImageHeader::kImtUnimplementedMethod));
     runtime->SetCalleeSaveMethod(
-        image_header.GetImageMethod(ImageHeader::kCalleeSaveMethod), Runtime::kSaveAll);
+        image_header->GetImageMethod(ImageHeader::kCalleeSaveMethod), Runtime::kSaveAll);
     runtime->SetCalleeSaveMethod(
-        image_header.GetImageMethod(ImageHeader::kRefsOnlySaveMethod), Runtime::kRefsOnly);
+        image_header->GetImageMethod(ImageHeader::kRefsOnlySaveMethod), Runtime::kRefsOnly);
     runtime->SetCalleeSaveMethod(
-        image_header.GetImageMethod(ImageHeader::kRefsAndArgsSaveMethod), Runtime::kRefsAndArgs);
+        image_header->GetImageMethod(ImageHeader::kRefsAndArgsSaveMethod), Runtime::kRefsAndArgs);
   }
 
-  if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
-    LOG(INFO) << "ImageSpace::Init exiting (" << PrettyDuration(NanoTime() - start_time)
-             << ") " << *space.get();
+  VLOG(image) << "ImageSpace::Init exiting " << *space.get();
+  if (VLOG_IS_ON(image)) {
+    logger.Dump(LOG(INFO));
   }
   return space.release();
 }
@@ -1002,6 +1526,16 @@
   }
 }
 
+ImageSpace* ImageSpace::CreateFromAppImage(const char* image,
+                                           const OatFile* oat_file,
+                                           std::string* error_msg) {
+  return gc::space::ImageSpace::Init(image,
+                                     image,
+                                     /*validate_oat_file*/false,
+                                     oat_file,
+                                     /*out*/error_msg);
+}
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index 9c8e8b2..f2f4163 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -35,7 +35,7 @@
     return kSpaceTypeImageSpace;
   }
 
-  // Create a Space from an image file for a specified instruction
+  // Create a boot image space from an image file for a specified instruction
   // set. Cannot be used for future allocation or collected.
   //
   // Create also opens the OatFile associated with the image file so
@@ -43,10 +43,16 @@
   // creation of the alloc space. The ReleaseOatFile will later be
   // used to transfer ownership of the OatFile to the ClassLinker when
   // it is initialized.
-  static ImageSpace* Create(const char* image,
-                            InstructionSet image_isa,
-                            bool secondary_image,
-                            std::string* error_msg)
+  static ImageSpace* CreateBootImage(const char* image,
+                                     InstructionSet image_isa,
+                                     bool secondary_image,
+                                     std::string* error_msg)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Try to open an existing app image space.
+  static ImageSpace* CreateFromAppImage(const char* image,
+                                        const OatFile* oat_file,
+                                        std::string* error_msg)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Reads the image header from the specified image location for the
@@ -144,15 +150,17 @@
   }
 
  protected:
-  // Tries to initialize an ImageSpace from the given image path,
-  // returning null on error.
+  // Tries to initialize an ImageSpace from the given image path, returning null on error.
   //
-  // If validate_oat_file is false (for /system), do not verify that
-  // image's OatFile is up-to-date relative to its DexFile
-  // inputs. Otherwise (for /data), validate the inputs and generate
-  // the OatFile in /data/dalvik-cache if necessary.
-  static ImageSpace* Init(const char* image_filename, const char* image_location,
-                          bool validate_oat_file, std::string* error_msg)
+  // If validate_oat_file is false (for /system), do not verify that image's OatFile is up-to-date
+  // relative to its DexFile inputs. Otherwise (for /data), validate the inputs and generate the
+  // OatFile in /data/dalvik-cache if necessary. If the oat_file is null, it uses the oat file from
+  // the image.
+  static ImageSpace* Init(const char* image_filename,
+                          const char* image_location,
+                          bool validate_oat_file,
+                          const OatFile* oat_file,
+                          std::string* error_msg)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   OatFile* OpenOatFile(const char* image, std::string* error_msg) const
diff --git a/runtime/gc/space/large_object_space_test.cc b/runtime/gc/space/large_object_space_test.cc
index 05b484a..ad38724 100644
--- a/runtime/gc/space/large_object_space_test.cc
+++ b/runtime/gc/space/large_object_space_test.cc
@@ -22,7 +22,7 @@
 namespace gc {
 namespace space {
 
-class LargeObjectSpaceTest : public SpaceTest {
+class LargeObjectSpaceTest : public SpaceTest<CommonRuntimeTest> {
  public:
   void LargeObjectTest();
 
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index 49126d2..fd4d0a1 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -247,7 +247,10 @@
 size_t RosAllocSpace::Trim() {
   VLOG(heap) << "RosAllocSpace::Trim() ";
   {
-    MutexLock mu(Thread::Current(), lock_);
+    Thread* const self = Thread::Current();
+    // SOA required for Rosalloc::Trim() -> ArtRosAllocMoreCore() -> Heap::GetRosAllocSpace.
+    ScopedObjectAccess soa(self);
+    MutexLock mu(self, lock_);
     // Trim to release memory at the end of the space.
     rosalloc_->Trim();
   }
@@ -373,7 +376,8 @@
 namespace allocator {
 
 // Callback from rosalloc when it needs to increase the footprint.
-void* ArtRosAllocMoreCore(allocator::RosAlloc* rosalloc, intptr_t increment) {
+void* ArtRosAllocMoreCore(allocator::RosAlloc* rosalloc, intptr_t increment)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   Heap* heap = Runtime::Current()->GetHeap();
   art::gc::space::RosAllocSpace* rosalloc_space = heap->GetRosAllocSpace(rosalloc);
   DCHECK(rosalloc_space != nullptr);
diff --git a/runtime/gc/space/rosalloc_space_base_test.cc b/runtime/gc/space/rosalloc_space_base_test.cc
deleted file mode 100644
index 0c5be03..0000000
--- a/runtime/gc/space/rosalloc_space_base_test.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "space_test.h"
-
-namespace art {
-namespace gc {
-namespace space {
-
-MallocSpace* CreateRosAllocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
-                                 size_t capacity, uint8_t* requested_begin) {
-  return RosAllocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin,
-                               Runtime::Current()->GetHeap()->IsLowMemoryMode(), false);
-}
-
-TEST_SPACE_CREATE_FN_BASE(RosAllocSpace, CreateRosAllocSpace)
-
-
-}  // namespace space
-}  // namespace gc
-}  // namespace art
diff --git a/runtime/gc/space/space_create_test.cc b/runtime/gc/space/space_create_test.cc
new file mode 100644
index 0000000..170f927
--- /dev/null
+++ b/runtime/gc/space/space_create_test.cc
@@ -0,0 +1,364 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "space_test.h"
+
+#include "dlmalloc_space.h"
+#include "rosalloc_space.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+enum MallocSpaceType {
+  kMallocSpaceDlMalloc,
+  kMallocSpaceRosAlloc,
+};
+
+class SpaceCreateTest : public SpaceTest<CommonRuntimeTestWithParam<MallocSpaceType>> {
+ public:
+  MallocSpace* CreateSpace(const std::string& name,
+                           size_t initial_size,
+                           size_t growth_limit,
+                           size_t capacity,
+                           uint8_t* requested_begin) {
+    const MallocSpaceType type = GetParam();
+    if (type == kMallocSpaceDlMalloc) {
+      return DlMallocSpace::Create(name,
+                                   initial_size,
+                                   growth_limit,
+                                   capacity,
+                                   requested_begin,
+                                   false);
+    }
+    DCHECK_EQ(static_cast<uint32_t>(type), static_cast<uint32_t>(kMallocSpaceRosAlloc));
+    return RosAllocSpace::Create(name,
+                                 initial_size,
+                                 growth_limit,
+                                 capacity,
+                                 requested_begin,
+                                 Runtime::Current()->GetHeap()->IsLowMemoryMode(),
+                                 false);
+  }
+};
+
+TEST_P(SpaceCreateTest, InitTestBody) {
+  // This will lead to error messages in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
+  {
+    // Init < max == growth
+    std::unique_ptr<Space> space(CreateSpace("test", 16 * MB, 32 * MB, 32 * MB, nullptr));
+    EXPECT_TRUE(space != nullptr);
+    // Init == max == growth
+    space.reset(CreateSpace("test", 16 * MB, 16 * MB, 16 * MB, nullptr));
+    EXPECT_TRUE(space != nullptr);
+    // Init > max == growth
+    space.reset(CreateSpace("test", 32 * MB, 16 * MB, 16 * MB, nullptr));
+    EXPECT_TRUE(space == nullptr);
+    // Growth == init < max
+    space.reset(CreateSpace("test", 16 * MB, 16 * MB, 32 * MB, nullptr));
+    EXPECT_TRUE(space != nullptr);
+    // Growth < init < max
+    space.reset(CreateSpace("test", 16 * MB, 8 * MB, 32 * MB, nullptr));
+    EXPECT_TRUE(space == nullptr);
+    // Init < growth < max
+    space.reset(CreateSpace("test", 8 * MB, 16 * MB, 32 * MB, nullptr));
+    EXPECT_TRUE(space != nullptr);
+    // Init < max < growth
+    space.reset(CreateSpace("test", 8 * MB, 32 * MB, 16 * MB, nullptr));
+    EXPECT_TRUE(space == nullptr);
+  }
+}
+
+// TODO: This test is not very good, we should improve it.
+// The test should do more allocations before the creation of the ZygoteSpace, and then do
+// allocations after the ZygoteSpace is created. The test should also do some GCs to ensure that
+// the GC works with the ZygoteSpace.
+TEST_P(SpaceCreateTest, ZygoteSpaceTestBody) {
+  size_t dummy;
+  MallocSpace* space(CreateSpace("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
+  ASSERT_TRUE(space != nullptr);
+
+  // Make space findable to the heap, will also delete space when runtime is cleaned up
+  AddSpace(space);
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  // Succeeds, fits without adjusting the footprint limit.
+  size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated;
+  StackHandleScope<3> hs(soa.Self());
+  MutableHandle<mirror::Object> ptr1(hs.NewHandle(Alloc(space,
+                                                        self,
+                                                        1 * MB,
+                                                        &ptr1_bytes_allocated,
+                                                        &ptr1_usable_size,
+                                                        &ptr1_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr1.Get() != nullptr);
+  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
+  EXPECT_LE(1U * MB, ptr1_usable_size);
+  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
+  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr2 == nullptr);
+
+  // Succeeds, adjusts the footprint.
+  size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated;
+  MutableHandle<mirror::Object> ptr3(hs.NewHandle(AllocWithGrowth(space,
+                                                                  self,
+                                                                  8 * MB,
+                                                                  &ptr3_bytes_allocated,
+                                                                  &ptr3_usable_size,
+                                                                  &ptr3_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr3.Get() != nullptr);
+  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
+  EXPECT_LE(8U * MB, ptr3_usable_size);
+  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
+  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr4 == nullptr);
+
+  // Also fails, requires a higher allowed footprint.
+  mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr5 == nullptr);
+
+  // Release some memory.
+  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
+  EXPECT_EQ(free3, ptr3_bytes_allocated);
+  EXPECT_EQ(free3, space->Free(self, ptr3.Assign(nullptr)));
+  EXPECT_LE(8U * MB, free3);
+
+  // Succeeds, now that memory has been freed.
+  size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated;
+  Handle<mirror::Object> ptr6(hs.NewHandle(AllocWithGrowth(space,
+                                                           self,
+                                                           9 * MB,
+                                                           &ptr6_bytes_allocated,
+                                                           &ptr6_usable_size,
+                                                           &ptr6_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr6.Get() != nullptr);
+  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
+  EXPECT_LE(9U * MB, ptr6_usable_size);
+  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
+  EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated);
+
+  // Final clean up.
+  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
+  space->Free(self, ptr1.Assign(nullptr));
+  EXPECT_LE(1U * MB, free1);
+
+  // Make sure that the zygote space isn't directly at the start of the space.
+  EXPECT_TRUE(space->Alloc(self, 1U * MB, &dummy, nullptr, &dummy) != nullptr);
+
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  space::Space* old_space = space;
+  {
+    ScopedThreadSuspension sts(self, kSuspended);
+    ScopedSuspendAll ssa("Add image space");
+    heap->RemoveSpace(old_space);
+  }
+  heap->RevokeAllThreadLocalBuffers();
+  space::ZygoteSpace* zygote_space = space->CreateZygoteSpace("alloc space",
+                                                              heap->IsLowMemoryMode(),
+                                                              &space);
+  delete old_space;
+  // Add the zygote space.
+  AddSpace(zygote_space, false);
+
+  // Make space findable to the heap, will also delete space when runtime is cleaned up
+  AddSpace(space, false);
+
+  // Succeeds, fits without adjusting the footprint limit.
+  ptr1.Assign(Alloc(space,
+                    self,
+                    1 * MB,
+                    &ptr1_bytes_allocated,
+                    &ptr1_usable_size,
+                    &ptr1_bytes_tl_bulk_allocated));
+  EXPECT_TRUE(ptr1.Get() != nullptr);
+  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
+  EXPECT_LE(1U * MB, ptr1_usable_size);
+  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
+  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr2 == nullptr);
+
+  // Succeeds, adjusts the footprint.
+  ptr3.Assign(AllocWithGrowth(space,
+                              self,
+                              2 * MB,
+                              &ptr3_bytes_allocated,
+                              &ptr3_usable_size,
+                              &ptr3_bytes_tl_bulk_allocated));
+  EXPECT_TRUE(ptr3.Get() != nullptr);
+  EXPECT_LE(2U * MB, ptr3_bytes_allocated);
+  EXPECT_LE(2U * MB, ptr3_usable_size);
+  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
+  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
+  space->Free(self, ptr3.Assign(nullptr));
+
+  // Final clean up.
+  free1 = space->AllocationSize(ptr1.Get(), nullptr);
+  space->Free(self, ptr1.Assign(nullptr));
+  EXPECT_LE(1U * MB, free1);
+}
+
+TEST_P(SpaceCreateTest, AllocAndFreeTestBody) {
+  size_t dummy = 0;
+  MallocSpace* space(CreateSpace("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
+  ASSERT_TRUE(space != nullptr);
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  // Make space findable to the heap, will also delete space when runtime is cleaned up
+  AddSpace(space);
+
+  // Succeeds, fits without adjusting the footprint limit.
+  size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated;
+  StackHandleScope<3> hs(soa.Self());
+  MutableHandle<mirror::Object> ptr1(hs.NewHandle(Alloc(space,
+                                                        self,
+                                                        1 * MB,
+                                                        &ptr1_bytes_allocated,
+                                                        &ptr1_usable_size,
+                                                        &ptr1_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr1.Get() != nullptr);
+  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
+  EXPECT_LE(1U * MB, ptr1_usable_size);
+  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
+  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr2 == nullptr);
+
+  // Succeeds, adjusts the footprint.
+  size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated;
+  MutableHandle<mirror::Object> ptr3(hs.NewHandle(AllocWithGrowth(space,
+                                                                  self,
+                                                                  8 * MB,
+                                                                  &ptr3_bytes_allocated,
+                                                                  &ptr3_usable_size,
+                                                                  &ptr3_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr3.Get() != nullptr);
+  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
+  EXPECT_LE(8U * MB, ptr3_usable_size);
+  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
+  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  mirror::Object* ptr4 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr4 == nullptr);
+
+  // Also fails, requires a higher allowed footprint.
+  mirror::Object* ptr5 = AllocWithGrowth(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr5 == nullptr);
+
+  // Release some memory.
+  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
+  EXPECT_EQ(free3, ptr3_bytes_allocated);
+  space->Free(self, ptr3.Assign(nullptr));
+  EXPECT_LE(8U * MB, free3);
+
+  // Succeeds, now that memory has been freed.
+  size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated;
+  Handle<mirror::Object> ptr6(hs.NewHandle(AllocWithGrowth(space,
+                                                           self,
+                                                           9 * MB,
+                                                           &ptr6_bytes_allocated,
+                                                           &ptr6_usable_size,
+                                                           &ptr6_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr6.Get() != nullptr);
+  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
+  EXPECT_LE(9U * MB, ptr6_usable_size);
+  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
+  EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated);
+
+  // Final clean up.
+  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
+  space->Free(self, ptr1.Assign(nullptr));
+  EXPECT_LE(1U * MB, free1);
+}
+
+TEST_P(SpaceCreateTest, AllocAndFreeListTestBody) {
+  MallocSpace* space(CreateSpace("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
+  ASSERT_TRUE(space != nullptr);
+
+  // Make space findable to the heap, will also delete space when runtime is cleaned up
+  AddSpace(space);
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  // Succeeds, fits without adjusting the max allowed footprint.
+  mirror::Object* lots_of_objects[1024];
+  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
+    size_t allocation_size, usable_size, bytes_tl_bulk_allocated;
+    size_t size_of_zero_length_byte_array = SizeOfZeroLengthByteArray();
+    lots_of_objects[i] = Alloc(space,
+                               self,
+                               size_of_zero_length_byte_array,
+                               &allocation_size,
+                               &usable_size,
+                               &bytes_tl_bulk_allocated);
+    EXPECT_TRUE(lots_of_objects[i] != nullptr);
+    size_t computed_usable_size;
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
+    EXPECT_EQ(usable_size, computed_usable_size);
+    EXPECT_TRUE(bytes_tl_bulk_allocated == 0 ||
+                bytes_tl_bulk_allocated >= allocation_size);
+  }
+
+  // Release memory.
+  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
+
+  // Succeeds, fits by adjusting the max allowed footprint.
+  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
+    size_t allocation_size, usable_size, bytes_tl_bulk_allocated;
+    lots_of_objects[i] = AllocWithGrowth(space,
+                                         self,
+                                         1024,
+                                         &allocation_size,
+                                         &usable_size,
+                                         &bytes_tl_bulk_allocated);
+    EXPECT_TRUE(lots_of_objects[i] != nullptr);
+    size_t computed_usable_size;
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
+    EXPECT_EQ(usable_size, computed_usable_size);
+    EXPECT_TRUE(bytes_tl_bulk_allocated == 0 ||
+                bytes_tl_bulk_allocated >= allocation_size);
+  }
+
+  // Release memory.
+  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
+}
+
+INSTANTIATE_TEST_CASE_P(CreateRosAllocSpace,
+                        SpaceCreateTest,
+                        testing::Values(kMallocSpaceRosAlloc));
+INSTANTIATE_TEST_CASE_P(CreateDlMallocSpace,
+                        SpaceCreateTest,
+                        testing::Values(kMallocSpaceDlMalloc));
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h
index 4d2db11..20ef44a 100644
--- a/runtime/gc/space/space_test.h
+++ b/runtime/gc/space/space_test.h
@@ -27,25 +27,28 @@
 #include "mirror/class_loader.h"
 #include "mirror/object-inl.h"
 #include "scoped_thread_state_change.h"
+#include "thread_list.h"
 #include "zygote_space.h"
 
 namespace art {
 namespace gc {
 namespace space {
 
-class SpaceTest : public CommonRuntimeTest {
+template <class Super>
+class SpaceTest : public Super {
  public:
-  jobject byte_array_class_;
-
-  SpaceTest() : byte_array_class_(nullptr) {
-  }
+  jobject byte_array_class_ = nullptr;
 
   void AddSpace(ContinuousSpace* space, bool revoke = true) {
     Heap* heap = Runtime::Current()->GetHeap();
     if (revoke) {
       heap->RevokeAllThreadLocalBuffers();
     }
-    heap->AddSpace(space);
+    {
+      ScopedThreadStateChange sts(Thread::Current(), kSuspended);
+      ScopedSuspendAll ssa("Add image space");
+      heap->AddSpace(space);
+    }
     heap->SetSpaceAsDefault(space);
   }
 
@@ -62,13 +65,19 @@
     return reinterpret_cast<mirror::Class*>(self->DecodeJObject(byte_array_class_));
   }
 
-  mirror::Object* Alloc(space::MallocSpace* alloc_space, Thread* self, size_t bytes,
-                        size_t* bytes_allocated, size_t* usable_size,
+  mirror::Object* Alloc(space::MallocSpace* alloc_space,
+                        Thread* self,
+                        size_t bytes,
+                        size_t* bytes_allocated,
+                        size_t* usable_size,
                         size_t* bytes_tl_bulk_allocated)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     StackHandleScope<1> hs(self);
     Handle<mirror::Class> byte_array_class(hs.NewHandle(GetByteArrayClass(self)));
-    mirror::Object* obj = alloc_space->Alloc(self, bytes, bytes_allocated, usable_size,
+    mirror::Object* obj = alloc_space->Alloc(self,
+                                             bytes,
+                                             bytes_allocated,
+                                             usable_size,
                                              bytes_tl_bulk_allocated);
     if (obj != nullptr) {
       InstallClass(obj, byte_array_class.Get(), bytes);
@@ -76,8 +85,11 @@
     return obj;
   }
 
-  mirror::Object* AllocWithGrowth(space::MallocSpace* alloc_space, Thread* self, size_t bytes,
-                                  size_t* bytes_allocated, size_t* usable_size,
+  mirror::Object* AllocWithGrowth(space::MallocSpace* alloc_space,
+                                  Thread* self,
+                                  size_t bytes,
+                                  size_t* bytes_allocated,
+                                  size_t* usable_size,
                                   size_t* bytes_tl_bulk_allocated)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     StackHandleScope<1> hs(self);
@@ -117,10 +129,6 @@
 
   typedef MallocSpace* (*CreateSpaceFn)(const std::string& name, size_t initial_size, size_t growth_limit,
                                         size_t capacity, uint8_t* requested_begin);
-  void InitTestBody(CreateSpaceFn create_space);
-  void ZygoteSpaceTestBody(CreateSpaceFn create_space);
-  void AllocAndFreeTestBody(CreateSpaceFn create_space);
-  void AllocAndFreeListTestBody(CreateSpaceFn create_space);
 
   void SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space, intptr_t object_size,
                                            int round, size_t growth_limit);
@@ -132,278 +140,11 @@
   return *seed;
 }
 
-void SpaceTest::InitTestBody(CreateSpaceFn create_space) {
-  // This will lead to error messages in the log.
-  ScopedLogSeverity sls(LogSeverity::FATAL);
-
-  {
-    // Init < max == growth
-    std::unique_ptr<Space> space(create_space("test", 16 * MB, 32 * MB, 32 * MB, nullptr));
-    EXPECT_TRUE(space.get() != nullptr);
-  }
-  {
-    // Init == max == growth
-    std::unique_ptr<Space> space(create_space("test", 16 * MB, 16 * MB, 16 * MB, nullptr));
-    EXPECT_TRUE(space.get() != nullptr);
-  }
-  {
-    // Init > max == growth
-    std::unique_ptr<Space> space(create_space("test", 32 * MB, 16 * MB, 16 * MB, nullptr));
-    EXPECT_TRUE(space.get() == nullptr);
-  }
-  {
-    // Growth == init < max
-    std::unique_ptr<Space> space(create_space("test", 16 * MB, 16 * MB, 32 * MB, nullptr));
-    EXPECT_TRUE(space.get() != nullptr);
-  }
-  {
-    // Growth < init < max
-    std::unique_ptr<Space> space(create_space("test", 16 * MB, 8 * MB, 32 * MB, nullptr));
-    EXPECT_TRUE(space.get() == nullptr);
-  }
-  {
-    // Init < growth < max
-    std::unique_ptr<Space> space(create_space("test", 8 * MB, 16 * MB, 32 * MB, nullptr));
-    EXPECT_TRUE(space.get() != nullptr);
-  }
-  {
-    // Init < max < growth
-    std::unique_ptr<Space> space(create_space("test", 8 * MB, 32 * MB, 16 * MB, nullptr));
-    EXPECT_TRUE(space.get() == nullptr);
-  }
-}
-
-// TODO: This test is not very good, we should improve it.
-// The test should do more allocations before the creation of the ZygoteSpace, and then do
-// allocations after the ZygoteSpace is created. The test should also do some GCs to ensure that
-// the GC works with the ZygoteSpace.
-void SpaceTest::ZygoteSpaceTestBody(CreateSpaceFn create_space) {
-  size_t dummy;
-  MallocSpace* space(create_space("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
-  ASSERT_TRUE(space != nullptr);
-
-  // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddSpace(space);
-  Thread* self = Thread::Current();
-  ScopedObjectAccess soa(self);
-
-  // Succeeds, fits without adjusting the footprint limit.
-  size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated;
-  StackHandleScope<3> hs(soa.Self());
-  MutableHandle<mirror::Object> ptr1(
-      hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size,
-                         &ptr1_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr1.Get() != nullptr);
-  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
-  EXPECT_LE(1U * MB, ptr1_usable_size);
-  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
-  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr2 == nullptr);
-
-  // Succeeds, adjusts the footprint.
-  size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated;
-  MutableHandle<mirror::Object> ptr3(
-      hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size,
-                                   &ptr3_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr3.Get() != nullptr);
-  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
-  EXPECT_LE(8U * MB, ptr3_usable_size);
-  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
-  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr4 == nullptr);
-
-  // Also fails, requires a higher allowed footprint.
-  mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr5 == nullptr);
-
-  // Release some memory.
-  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
-  EXPECT_EQ(free3, ptr3_bytes_allocated);
-  EXPECT_EQ(free3, space->Free(self, ptr3.Assign(nullptr)));
-  EXPECT_LE(8U * MB, free3);
-
-  // Succeeds, now that memory has been freed.
-  size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated;
-  Handle<mirror::Object> ptr6(
-      hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size,
-                                   &ptr6_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr6.Get() != nullptr);
-  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
-  EXPECT_LE(9U * MB, ptr6_usable_size);
-  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
-  EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated);
-
-  // Final clean up.
-  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
-  space->Free(self, ptr1.Assign(nullptr));
-  EXPECT_LE(1U * MB, free1);
-
-  // Make sure that the zygote space isn't directly at the start of the space.
-  EXPECT_TRUE(space->Alloc(self, 1U * MB, &dummy, nullptr, &dummy) != nullptr);
-
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  space::Space* old_space = space;
-  heap->RemoveSpace(old_space);
-  heap->RevokeAllThreadLocalBuffers();
-  space::ZygoteSpace* zygote_space = space->CreateZygoteSpace("alloc space",
-                                                              heap->IsLowMemoryMode(),
-                                                              &space);
-  delete old_space;
-  // Add the zygote space.
-  AddSpace(zygote_space, false);
-
-  // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddSpace(space, false);
-
-  // Succeeds, fits without adjusting the footprint limit.
-  ptr1.Assign(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size,
-                    &ptr1_bytes_tl_bulk_allocated));
-  EXPECT_TRUE(ptr1.Get() != nullptr);
-  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
-  EXPECT_LE(1U * MB, ptr1_usable_size);
-  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
-  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr2 == nullptr);
-
-  // Succeeds, adjusts the footprint.
-  ptr3.Assign(AllocWithGrowth(space, self, 2 * MB, &ptr3_bytes_allocated, &ptr3_usable_size,
-                              &ptr3_bytes_tl_bulk_allocated));
-  EXPECT_TRUE(ptr3.Get() != nullptr);
-  EXPECT_LE(2U * MB, ptr3_bytes_allocated);
-  EXPECT_LE(2U * MB, ptr3_usable_size);
-  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
-  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
-  space->Free(self, ptr3.Assign(nullptr));
-
-  // Final clean up.
-  free1 = space->AllocationSize(ptr1.Get(), nullptr);
-  space->Free(self, ptr1.Assign(nullptr));
-  EXPECT_LE(1U * MB, free1);
-}
-
-void SpaceTest::AllocAndFreeTestBody(CreateSpaceFn create_space) {
-  size_t dummy = 0;
-  MallocSpace* space(create_space("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
-  ASSERT_TRUE(space != nullptr);
-  Thread* self = Thread::Current();
-  ScopedObjectAccess soa(self);
-
-  // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddSpace(space);
-
-  // Succeeds, fits without adjusting the footprint limit.
-  size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated;
-  StackHandleScope<3> hs(soa.Self());
-  MutableHandle<mirror::Object> ptr1(
-      hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size,
-                         &ptr1_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr1.Get() != nullptr);
-  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
-  EXPECT_LE(1U * MB, ptr1_usable_size);
-  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
-  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr2 == nullptr);
-
-  // Succeeds, adjusts the footprint.
-  size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated;
-  MutableHandle<mirror::Object> ptr3(
-      hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size,
-                                   &ptr3_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr3.Get() != nullptr);
-  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
-  EXPECT_LE(8U * MB, ptr3_usable_size);
-  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
-  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  mirror::Object* ptr4 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr4 == nullptr);
-
-  // Also fails, requires a higher allowed footprint.
-  mirror::Object* ptr5 = AllocWithGrowth(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr5 == nullptr);
-
-  // Release some memory.
-  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
-  EXPECT_EQ(free3, ptr3_bytes_allocated);
-  space->Free(self, ptr3.Assign(nullptr));
-  EXPECT_LE(8U * MB, free3);
-
-  // Succeeds, now that memory has been freed.
-  size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated;
-  Handle<mirror::Object> ptr6(
-      hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size,
-                                   &ptr6_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr6.Get() != nullptr);
-  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
-  EXPECT_LE(9U * MB, ptr6_usable_size);
-  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
-  EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated);
-
-  // Final clean up.
-  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
-  space->Free(self, ptr1.Assign(nullptr));
-  EXPECT_LE(1U * MB, free1);
-}
-
-void SpaceTest::AllocAndFreeListTestBody(CreateSpaceFn create_space) {
-  MallocSpace* space(create_space("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
-  ASSERT_TRUE(space != nullptr);
-
-  // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddSpace(space);
-  Thread* self = Thread::Current();
-  ScopedObjectAccess soa(self);
-
-  // Succeeds, fits without adjusting the max allowed footprint.
-  mirror::Object* lots_of_objects[1024];
-  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    size_t allocation_size, usable_size, bytes_tl_bulk_allocated;
-    size_t size_of_zero_length_byte_array = SizeOfZeroLengthByteArray();
-    lots_of_objects[i] = Alloc(space, self, size_of_zero_length_byte_array, &allocation_size,
-                               &usable_size, &bytes_tl_bulk_allocated);
-    EXPECT_TRUE(lots_of_objects[i] != nullptr);
-    size_t computed_usable_size;
-    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
-    EXPECT_EQ(usable_size, computed_usable_size);
-    EXPECT_TRUE(bytes_tl_bulk_allocated == 0 ||
-                bytes_tl_bulk_allocated >= allocation_size);
-  }
-
-  // Release memory.
-  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
-
-  // Succeeds, fits by adjusting the max allowed footprint.
-  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    size_t allocation_size, usable_size, bytes_tl_bulk_allocated;
-    lots_of_objects[i] = AllocWithGrowth(space, self, 1024, &allocation_size, &usable_size,
-                                         &bytes_tl_bulk_allocated);
-    EXPECT_TRUE(lots_of_objects[i] != nullptr);
-    size_t computed_usable_size;
-    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
-    EXPECT_EQ(usable_size, computed_usable_size);
-    EXPECT_TRUE(bytes_tl_bulk_allocated == 0 ||
-                bytes_tl_bulk_allocated >= allocation_size);
-  }
-
-  // Release memory.
-  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
-}
-
-void SpaceTest::SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space, intptr_t object_size,
-                                                    int round, size_t growth_limit) {
+template <class Super>
+void SpaceTest<Super>::SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space,
+                                                           intptr_t object_size,
+                                                           int round,
+                                                           size_t growth_limit) {
   if (((object_size > 0 && object_size >= static_cast<intptr_t>(growth_limit))) ||
       ((object_size < 0 && -object_size >= static_cast<intptr_t>(growth_limit)))) {
     // No allocation can succeed
@@ -576,7 +317,9 @@
   EXPECT_LE(space->Size(), growth_limit);
 }
 
-void SpaceTest::SizeFootPrintGrowthLimitAndTrimDriver(size_t object_size, CreateSpaceFn create_space) {
+template <class Super>
+void SpaceTest<Super>::SizeFootPrintGrowthLimitAndTrimDriver(size_t object_size,
+                                                             CreateSpaceFn create_space) {
   if (object_size < SizeOfZeroLengthByteArray()) {
     // Too small for the object layout/model.
     return;
@@ -614,25 +357,8 @@
     SizeFootPrintGrowthLimitAndTrimDriver(-size, spaceFn); \
   }
 
-#define TEST_SPACE_CREATE_FN_BASE(spaceName, spaceFn) \
-  class spaceName##BaseTest : public SpaceTest { \
-  }; \
-  \
-  TEST_F(spaceName##BaseTest, Init) { \
-    InitTestBody(spaceFn); \
-  } \
-  TEST_F(spaceName##BaseTest, ZygoteSpace) { \
-    ZygoteSpaceTestBody(spaceFn); \
-  } \
-  TEST_F(spaceName##BaseTest, AllocAndFree) { \
-    AllocAndFreeTestBody(spaceFn); \
-  } \
-  TEST_F(spaceName##BaseTest, AllocAndFreeList) { \
-    AllocAndFreeListTestBody(spaceFn); \
-  }
-
 #define TEST_SPACE_CREATE_FN_STATIC(spaceName, spaceFn) \
-  class spaceName##StaticTest : public SpaceTest { \
+  class spaceName##StaticTest : public SpaceTest<CommonRuntimeTest> { \
   }; \
   \
   TEST_SizeFootPrintGrowthLimitAndTrimStatic(12B, spaceName, spaceFn, 12) \
@@ -648,7 +374,7 @@
   TEST_SizeFootPrintGrowthLimitAndTrimStatic(8MB, spaceName, spaceFn, 8 * MB)
 
 #define TEST_SPACE_CREATE_FN_RANDOM(spaceName, spaceFn) \
-  class spaceName##RandomTest : public SpaceTest { \
+  class spaceName##RandomTest : public SpaceTest<CommonRuntimeTest> { \
   }; \
   \
   TEST_SizeFootPrintGrowthLimitAndTrimRandom(16B, spaceName, spaceFn, 16) \
diff --git a/runtime/handle.h b/runtime/handle.h
index 5b3bb60..a415373 100644
--- a/runtime/handle.h
+++ b/runtime/handle.h
@@ -70,13 +70,11 @@
     return reinterpret_cast<jobject>(reference_);
   }
 
-  ALWAYS_INLINE StackReference<mirror::Object>* GetReference()
-      SHARED_REQUIRES(Locks::mutator_lock_) {
+  ALWAYS_INLINE StackReference<mirror::Object>* GetReference() {
     return reference_;
   }
 
-  ALWAYS_INLINE const StackReference<mirror::Object>* GetReference() const
-      SHARED_REQUIRES(Locks::mutator_lock_) {
+  ALWAYS_INLINE const StackReference<mirror::Object>* GetReference() const {
     return reference_;
   }
 
diff --git a/runtime/handle_scope.h b/runtime/handle_scope.h
index e617348..d53a0e4 100644
--- a/runtime/handle_scope.h
+++ b/runtime/handle_scope.h
@@ -62,8 +62,7 @@
   ALWAYS_INLINE mirror::Object* GetReference(size_t i) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE Handle<mirror::Object> GetHandle(size_t i)
-      SHARED_REQUIRES(Locks::mutator_lock_);
+  ALWAYS_INLINE Handle<mirror::Object> GetHandle(size_t i);
 
   ALWAYS_INLINE MutableHandle<mirror::Object> GetMutableHandle(size_t i)
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index dfc1f5f..bb35ec7 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -419,18 +419,13 @@
   Hprof(const char* output_filename, int fd, bool direct_to_ddms)
       : filename_(output_filename),
         fd_(fd),
-        direct_to_ddms_(direct_to_ddms),
-        start_ns_(NanoTime()),
-        output_(nullptr),
-        current_heap_(HPROF_HEAP_DEFAULT),
-        objects_in_segment_(0),
-        next_string_id_(0x400000),
-        next_class_serial_number_(1) {
+        direct_to_ddms_(direct_to_ddms) {
     LOG(INFO) << "hprof: heap dump \"" << filename_ << "\" starting...";
   }
 
   void Dump()
-    REQUIRES(Locks::mutator_lock_, !Locks::heap_bitmap_lock_, !Locks::alloc_tracker_lock_) {
+    REQUIRES(Locks::mutator_lock_)
+    REQUIRES(!Locks::heap_bitmap_lock_, !Locks::alloc_tracker_lock_) {
     {
       MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
       if (Runtime::Current()->GetHeap()->IsAllocTrackingEnabled()) {
@@ -462,10 +457,11 @@
     }
 
     if (okay) {
-      uint64_t duration = NanoTime() - start_ns_;
-      LOG(INFO) << "hprof: heap dump completed ("
-          << PrettySize(RoundUp(overall_size, 1024))
-          << ") in " << PrettyDuration(duration);
+      const uint64_t duration = NanoTime() - start_ns_;
+      LOG(INFO) << "hprof: heap dump completed (" << PrettySize(RoundUp(overall_size, KB))
+                << ") in " << PrettyDuration(duration)
+                << " objects " << total_objects_
+                << " objects with stack traces " << total_objects_with_stack_trace_;
     }
   }
 
@@ -855,7 +851,7 @@
     }
     CHECK_EQ(traces_.size(), next_trace_sn - kHprofNullStackTrace - 1);
     CHECK_EQ(frames_.size(), next_frame_id);
-    VLOG(heap) << "hprof: found " << count << " objects with allocation stack traces";
+    total_objects_with_stack_trace_ = count;
   }
 
   // If direct_to_ddms_ is set, "filename_" and "fd" will be ignored.
@@ -865,16 +861,19 @@
   int fd_;
   bool direct_to_ddms_;
 
-  uint64_t start_ns_;
+  uint64_t start_ns_ = NanoTime();
 
-  EndianOutput* output_;
+  EndianOutput* output_ = nullptr;
 
-  HprofHeapId current_heap_;  // Which heap we're currently dumping.
-  size_t objects_in_segment_;
+  HprofHeapId current_heap_ = HPROF_HEAP_DEFAULT;  // Which heap we're currently dumping.
+  size_t objects_in_segment_ = 0;
 
-  HprofStringId next_string_id_;
+  size_t total_objects_ = 0u;
+  size_t total_objects_with_stack_trace_ = 0u;
+
+  HprofStringId next_string_id_ = 0x400000;
   SafeMap<std::string, HprofStringId> strings_;
-  HprofClassSerialNumber next_class_serial_number_;
+  HprofClassSerialNumber next_class_serial_number_ = 1;
   SafeMap<mirror::Class*, HprofClassSerialNumber> classes_;
 
   std::unordered_map<const gc::AllocRecordStackTrace*, HprofStackTraceSerialNumber,
@@ -1064,6 +1063,8 @@
     return;
   }
 
+  ++total_objects_;
+
   GcRootVisitor visitor(this);
   obj->VisitReferences(visitor, VoidFunctor());
 
diff --git a/runtime/image-inl.h b/runtime/image-inl.h
new file mode 100644
index 0000000..e3307d8
--- /dev/null
+++ b/runtime/image-inl.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_IMAGE_INL_H_
+#define ART_RUNTIME_IMAGE_INL_H_
+
+#include "image.h"
+
+namespace art {
+
+template <ReadBarrierOption kReadBarrierOption>
+inline mirror::Object* ImageHeader::GetImageRoot(ImageRoot image_root) const {
+  mirror::ObjectArray<mirror::Object>* image_roots = GetImageRoots<kReadBarrierOption>();
+  return image_roots->Get<kVerifyNone, kReadBarrierOption>(static_cast<int32_t>(image_root));
+}
+
+template <ReadBarrierOption kReadBarrierOption>
+inline mirror::ObjectArray<mirror::Object>* ImageHeader::GetImageRoots() const {
+  // Need a read barrier as it's not visited during root scan.
+  // Pass in the address of the local variable to the read barrier
+  // rather than image_roots_ because it won't move (asserted below)
+  // and it's a const member.
+  mirror::ObjectArray<mirror::Object>* image_roots =
+      reinterpret_cast<mirror::ObjectArray<mirror::Object>*>(image_roots_);
+  mirror::ObjectArray<mirror::Object>* result =
+      ReadBarrier::BarrierForRoot<mirror::ObjectArray<mirror::Object>, kReadBarrierOption>(
+          &image_roots);
+  DCHECK_EQ(image_roots, result);
+  return image_roots;
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_IMAGE_INL_H_
diff --git a/runtime/image.cc b/runtime/image.cc
index 3cb6642..de00343 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -24,7 +24,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '5', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '6', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
@@ -35,8 +35,13 @@
                          uint32_t oat_data_begin,
                          uint32_t oat_data_end,
                          uint32_t oat_file_end,
+                         uint32_t boot_image_begin,
+                         uint32_t boot_image_size,
+                         uint32_t boot_oat_begin,
+                         uint32_t boot_oat_size,
                          uint32_t pointer_size,
                          bool compile_pic,
+                         bool is_pic,
                          StorageMode storage_mode,
                          size_t data_size)
   : image_begin_(image_begin),
@@ -46,10 +51,15 @@
     oat_data_begin_(oat_data_begin),
     oat_data_end_(oat_data_end),
     oat_file_end_(oat_file_end),
+    boot_image_begin_(boot_image_begin),
+    boot_image_size_(boot_image_size),
+    boot_oat_begin_(boot_oat_begin),
+    boot_oat_size_(boot_oat_size),
     patch_delta_(0),
     image_roots_(image_roots),
     pointer_size_(pointer_size),
     compile_pic_(compile_pic),
+    is_pic_(is_pic),
     storage_mode_(storage_mode),
     data_size_(data_size) {
   CHECK_EQ(image_begin, RoundUp(image_begin, kPageSize));
@@ -67,13 +77,21 @@
 
 void ImageHeader::RelocateImage(off_t delta) {
   CHECK_ALIGNED(delta, kPageSize) << " patch delta must be page aligned";
-  image_begin_ += delta;
   oat_file_begin_ += delta;
   oat_data_begin_ += delta;
   oat_data_end_ += delta;
   oat_file_end_ += delta;
-  image_roots_ += delta;
   patch_delta_ += delta;
+  RelocateImageObjects(delta);
+  RelocateImageMethods(delta);
+}
+
+void ImageHeader::RelocateImageObjects(off_t delta) {
+  image_begin_ += delta;
+  image_roots_ += delta;
+}
+
+void ImageHeader::RelocateImageMethods(off_t delta) {
   for (size_t i = 0; i < kImageMethodsCount; ++i) {
     image_methods_[i] += delta;
   }
@@ -110,24 +128,6 @@
   return reinterpret_cast<const char*>(magic_);
 }
 
-mirror::Object* ImageHeader::GetImageRoot(ImageRoot image_root) const {
-  return GetImageRoots()->Get(image_root);
-}
-
-mirror::ObjectArray<mirror::Object>* ImageHeader::GetImageRoots() const {
-  // Need a read barrier as it's not visited during root scan.
-  // Pass in the address of the local variable to the read barrier
-  // rather than image_roots_ because it won't move (asserted below)
-  // and it's a const member.
-  mirror::ObjectArray<mirror::Object>* image_roots =
-      reinterpret_cast<mirror::ObjectArray<mirror::Object>*>(image_roots_);
-  mirror::ObjectArray<mirror::Object>* result =
-      ReadBarrier::BarrierForRoot<mirror::ObjectArray<mirror::Object>, kWithReadBarrier>(
-          &image_roots);
-  DCHECK_EQ(image_roots, result);
-  return result;
-}
-
 ArtMethod* ImageHeader::GetImageMethod(ImageMethod index) const {
   CHECK_LT(static_cast<size_t>(index), kImageMethodsCount);
   return reinterpret_cast<ArtMethod*>(image_methods_[index]);
diff --git a/runtime/image.h b/runtime/image.h
index 7418f66..c449e43 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -93,10 +93,15 @@
         oat_data_begin_(0U),
         oat_data_end_(0U),
         oat_file_end_(0U),
+        boot_image_begin_(0U),
+        boot_image_size_(0U),
+        boot_oat_begin_(0U),
+        boot_oat_size_(0U),
         patch_delta_(0),
         image_roots_(0U),
         pointer_size_(0U),
         compile_pic_(0),
+        is_pic_(0),
         storage_mode_(kDefaultStorageMode),
         data_size_(0) {}
 
@@ -109,8 +114,13 @@
               uint32_t oat_data_begin,
               uint32_t oat_data_end,
               uint32_t oat_file_end,
+              uint32_t boot_image_begin,
+              uint32_t boot_image_size,
+              uint32_t boot_oat_begin,
+              uint32_t boot_oat_size,
               uint32_t pointer_size,
               bool compile_pic,
+              bool is_pic,
               StorageMode storage_mode,
               size_t data_size);
 
@@ -202,17 +212,42 @@
     return GetImageSection(kSectionArtMethods);
   }
 
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   mirror::Object* GetImageRoot(ImageRoot image_root) const
       SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   mirror::ObjectArray<mirror::Object>* GetImageRoots() const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void RelocateImage(off_t delta);
+  void RelocateImageMethods(off_t delta);
+  void RelocateImageObjects(off_t delta);
 
   bool CompilePic() const {
     return compile_pic_ != 0;
   }
 
+  bool IsPic() const {
+    return is_pic_ != 0;
+  }
+
+  uint32_t GetBootImageBegin() const {
+    return boot_image_begin_;
+  }
+
+  uint32_t GetBootImageSize() const {
+    return boot_image_size_;
+  }
+
+  uint32_t GetBootOatBegin() const {
+    return boot_oat_begin_;
+  }
+
+  uint32_t GetBootOatSize() const {
+    return boot_oat_size_;
+  }
+
   StorageMode GetStorageMode() const {
     return storage_mode_;
   }
@@ -221,6 +256,12 @@
     return data_size_;
   }
 
+  bool IsAppImage() const {
+    // App images currently require a boot image, if the size is non zero then it is an app image
+    // header.
+    return boot_image_size_ != 0u;
+  }
+
  private:
   static const uint8_t kImageMagic[4];
   static const uint8_t kImageVersion[4];
@@ -250,6 +291,16 @@
   // .so files. Used for positioning a following alloc spaces.
   uint32_t oat_file_end_;
 
+  // Boot image begin and end (app image headers only).
+  uint32_t boot_image_begin_;
+  uint32_t boot_image_size_;
+
+  // Boot oat begin and end (app image headers only).
+  uint32_t boot_oat_begin_;
+  uint32_t boot_oat_size_;
+
+  // TODO: We should probably insert a boot image checksum for app images.
+
   // The total delta that this image has been patched.
   int32_t patch_delta_;
 
@@ -262,10 +313,15 @@
   // Boolean (0 or 1) to denote if the image was compiled with --compile-pic option
   const uint32_t compile_pic_;
 
+  // Boolean (0 or 1) to denote if the image can be mapped at a random address, this only refers to
+  // the .art file. Currently, app oat files do not depend on their app image. There are no pointers
+  // from the app oat code to the app image.
+  const uint32_t is_pic_;
+
   // Image section sizes/offsets correspond to the uncompressed form.
   ImageSection sections_[kSectionCount];
 
-  // Image methods.
+  // Image methods, may be inside of the boot image for app images.
   uint64_t image_methods_[kImageMethodsCount];
 
   // Storage method for the image, the image may be compressed.
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 7d55e8c..c57b1bb 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -78,7 +78,7 @@
       have_field_read_listeners_(false),
       have_field_write_listeners_(false),
       have_exception_caught_listeners_(false),
-      have_backward_branch_listeners_(false),
+      have_branch_listeners_(false),
       have_invoke_virtual_or_interface_listeners_(false),
       deoptimized_methods_lock_("deoptimized methods lock"),
       deoptimization_enabled_(false),
@@ -431,11 +431,11 @@
                            method_unwind_listeners_,
                            listener,
                            &have_method_unwind_listeners_);
-  PotentiallyAddListenerTo(kBackwardBranch,
+  PotentiallyAddListenerTo(kBranch,
                            events,
-                           backward_branch_listeners_,
+                           branch_listeners_,
                            listener,
-                           &have_backward_branch_listeners_);
+                           &have_branch_listeners_);
   PotentiallyAddListenerTo(kInvokeVirtualOrInterface,
                            events,
                            invoke_virtual_or_interface_listeners_,
@@ -508,11 +508,11 @@
                                 method_unwind_listeners_,
                                 listener,
                                 &have_method_unwind_listeners_);
-  PotentiallyRemoveListenerFrom(kBackwardBranch,
+  PotentiallyRemoveListenerFrom(kBranch,
                                 events,
-                                backward_branch_listeners_,
+                                branch_listeners_,
                                 listener,
-                                &have_backward_branch_listeners_);
+                                &have_branch_listeners_);
   PotentiallyRemoveListenerFrom(kInvokeVirtualOrInterface,
                                 events,
                                 invoke_virtual_or_interface_listeners_,
@@ -917,11 +917,13 @@
   }
 }
 
-void Instrumentation::BackwardBranchImpl(Thread* thread, ArtMethod* method,
-                                         int32_t offset) const {
-  for (InstrumentationListener* listener : backward_branch_listeners_) {
+void Instrumentation::BranchImpl(Thread* thread,
+                                 ArtMethod* method,
+                                 uint32_t dex_pc,
+                                 int32_t offset) const {
+  for (InstrumentationListener* listener : branch_listeners_) {
     if (listener != nullptr) {
-      listener->BackwardBranch(thread, method, offset);
+      listener->Branch(thread, method, dex_pc, offset);
     }
   }
 }
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index b29245f..56aeefc 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -94,8 +94,11 @@
   virtual void ExceptionCaught(Thread* thread, mirror::Throwable* exception_object)
       SHARED_REQUIRES(Locks::mutator_lock_) = 0;
 
-  // Call-back for when we get a backward branch.
-  virtual void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
+  // Call-back for when we execute a branch.
+  virtual void Branch(Thread* thread,
+                      ArtMethod* method,
+                      uint32_t dex_pc,
+                      int32_t dex_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) = 0;
 
   // Call-back for when we get an invokevirtual or an invokeinterface.
@@ -122,7 +125,7 @@
     kFieldRead = 0x10,
     kFieldWritten = 0x20,
     kExceptionCaught = 0x40,
-    kBackwardBranch = 0x80,
+    kBranch = 0x80,
     kInvokeVirtualOrInterface = 0x100,
   };
 
@@ -276,8 +279,8 @@
     return have_exception_caught_listeners_;
   }
 
-  bool HasBackwardBranchListeners() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    return have_backward_branch_listeners_;
+  bool HasBranchListeners() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return have_branch_listeners_;
   }
 
   bool HasInvokeVirtualOrInterfaceListeners() const SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -324,11 +327,11 @@
     }
   }
 
-  // Inform listeners that a backward branch has been taken (only supported by the interpreter).
-  void BackwardBranch(Thread* thread, ArtMethod* method, int32_t offset) const
+  // Inform listeners that a branch has been taken (only supported by the interpreter).
+  void Branch(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t offset) const
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (UNLIKELY(HasBackwardBranchListeners())) {
-      BackwardBranchImpl(thread, method, offset);
+    if (UNLIKELY(HasBranchListeners())) {
+      BranchImpl(thread, method, dex_pc, offset);
     }
   }
 
@@ -442,7 +445,7 @@
   void DexPcMovedEventImpl(Thread* thread, mirror::Object* this_object,
                            ArtMethod* method, uint32_t dex_pc) const
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void BackwardBranchImpl(Thread* thread, ArtMethod* method, int32_t offset) const
+  void BranchImpl(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t offset) const
       SHARED_REQUIRES(Locks::mutator_lock_);
   void InvokeVirtualOrInterfaceImpl(Thread* thread,
                                     mirror::Object* this_object,
@@ -513,8 +516,8 @@
   // Do we have any exception caught listeners? Short-cut to avoid taking the instrumentation_lock_.
   bool have_exception_caught_listeners_ GUARDED_BY(Locks::mutator_lock_);
 
-  // Do we have any backward branch listeners? Short-cut to avoid taking the instrumentation_lock_.
-  bool have_backward_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  // Do we have any branch listeners? Short-cut to avoid taking the instrumentation_lock_.
+  bool have_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
 
   // Do we have any invoke listeners? Short-cut to avoid taking the instrumentation_lock_.
   bool have_invoke_virtual_or_interface_listeners_ GUARDED_BY(Locks::mutator_lock_);
@@ -537,7 +540,7 @@
   std::list<InstrumentationListener*> method_entry_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> method_exit_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> method_unwind_listeners_ GUARDED_BY(Locks::mutator_lock_);
-  std::list<InstrumentationListener*> backward_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  std::list<InstrumentationListener*> branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> invoke_virtual_or_interface_listeners_
       GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> dex_pc_listeners_ GUARDED_BY(Locks::mutator_lock_);
diff --git a/runtime/instrumentation_test.cc b/runtime/instrumentation_test.cc
index e4688a2..56e3bc5 100644
--- a/runtime/instrumentation_test.cc
+++ b/runtime/instrumentation_test.cc
@@ -37,7 +37,7 @@
     : received_method_enter_event(false), received_method_exit_event(false),
       received_method_unwind_event(false), received_dex_pc_moved_event(false),
       received_field_read_event(false), received_field_written_event(false),
-      received_exception_caught_event(false), received_backward_branch_event(false),
+      received_exception_caught_event(false), received_branch_event(false),
       received_invoke_virtual_or_interface_event(false) {}
 
   virtual ~TestInstrumentationListener() {}
@@ -100,11 +100,12 @@
     received_exception_caught_event = true;
   }
 
-  void BackwardBranch(Thread* thread ATTRIBUTE_UNUSED,
-                      ArtMethod* method ATTRIBUTE_UNUSED,
-                      int32_t dex_pc_offset ATTRIBUTE_UNUSED)
+  void Branch(Thread* thread ATTRIBUTE_UNUSED,
+              ArtMethod* method ATTRIBUTE_UNUSED,
+              uint32_t dex_pc ATTRIBUTE_UNUSED,
+              int32_t dex_pc_offset ATTRIBUTE_UNUSED)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
-    received_backward_branch_event = true;
+    received_branch_event = true;
   }
 
   void InvokeVirtualOrInterface(Thread* thread ATTRIBUTE_UNUSED,
@@ -124,7 +125,7 @@
     received_field_read_event = false;
     received_field_written_event = false;
     received_exception_caught_event = false;
-    received_backward_branch_event = false;
+    received_branch_event = false;
     received_invoke_virtual_or_interface_event = false;
   }
 
@@ -135,7 +136,7 @@
   bool received_field_read_event;
   bool received_field_written_event;
   bool received_exception_caught_event;
-  bool received_backward_branch_event;
+  bool received_branch_event;
   bool received_invoke_virtual_or_interface_event;
 
  private:
@@ -305,8 +306,8 @@
         return instr->HasFieldWriteListeners();
       case instrumentation::Instrumentation::kExceptionCaught:
         return instr->HasExceptionCaughtListeners();
-      case instrumentation::Instrumentation::kBackwardBranch:
-        return instr->HasBackwardBranchListeners();
+      case instrumentation::Instrumentation::kBranch:
+        return instr->HasBranchListeners();
       case instrumentation::Instrumentation::kInvokeVirtualOrInterface:
         return instr->HasInvokeVirtualOrInterfaceListeners();
       default:
@@ -349,8 +350,8 @@
         self->ClearException();
         break;
       }
-      case instrumentation::Instrumentation::kBackwardBranch:
-        instr->BackwardBranch(self, method, dex_pc);
+      case instrumentation::Instrumentation::kBranch:
+        instr->Branch(self, method, dex_pc, -1);
         break;
       case instrumentation::Instrumentation::kInvokeVirtualOrInterface:
         instr->InvokeVirtualOrInterface(self, obj, method, dex_pc, method);
@@ -378,8 +379,8 @@
         return listener.received_field_written_event;
       case instrumentation::Instrumentation::kExceptionCaught:
         return listener.received_exception_caught_event;
-      case instrumentation::Instrumentation::kBackwardBranch:
-        return listener.received_backward_branch_event;
+      case instrumentation::Instrumentation::kBranch:
+        return listener.received_branch_event;
       case instrumentation::Instrumentation::kInvokeVirtualOrInterface:
         return listener.received_invoke_virtual_or_interface_event;
       default:
@@ -441,8 +442,8 @@
   TestEvent(instrumentation::Instrumentation::kExceptionCaught);
 }
 
-TEST_F(InstrumentationTest, BackwardBranchEvent) {
-  TestEvent(instrumentation::Instrumentation::kBackwardBranch);
+TEST_F(InstrumentationTest, BranchEvent) {
+  TestEvent(instrumentation::Instrumentation::kBranch);
 }
 
 TEST_F(InstrumentationTest, InvokeVirtualOrInterfaceEvent) {
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 015bf98..74a2532 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -22,6 +22,7 @@
 #include "gc/collector/garbage_collector.h"
 #include "gc/space/image_space.h"
 #include "gc/weak_root_state.h"
+#include "image-inl.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
@@ -86,14 +87,24 @@
   // Note: we deliberately don't visit the weak_interns_ table and the immutable image roots.
 }
 
-mirror::String* InternTable::LookupStrong(mirror::String* s) {
-  return strong_interns_.Find(s);
+mirror::String* InternTable::LookupWeak(Thread* self, mirror::String* s) {
+  MutexLock mu(self, *Locks::intern_table_lock_);
+  return LookupWeakLocked(s);
 }
 
-mirror::String* InternTable::LookupWeak(mirror::String* s) {
+mirror::String* InternTable::LookupStrong(Thread* self, mirror::String* s) {
+  MutexLock mu(self, *Locks::intern_table_lock_);
+  return LookupStrongLocked(s);
+}
+
+mirror::String* InternTable::LookupWeakLocked(mirror::String* s) {
   return weak_interns_.Find(s);
 }
 
+mirror::String* InternTable::LookupStrongLocked(mirror::String* s) {
+  return strong_interns_.Find(s);
+}
+
 void InternTable::AddNewTable() {
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
   weak_interns_.AddNewTable();
@@ -169,7 +180,7 @@
         for (size_t j = 0; j < num_strings; ++j) {
           mirror::String* image_string = dex_cache->GetResolvedString(j);
           if (image_string != nullptr) {
-            mirror::String* found = LookupStrong(image_string);
+            mirror::String* found = LookupStrongLocked(image_string);
             if (found == nullptr) {
               InsertStrong(image_string);
             } else {
@@ -250,7 +261,7 @@
       }
     }
     // Check the strong table for a match.
-    mirror::String* strong = LookupStrong(s);
+    mirror::String* strong = LookupStrongLocked(s);
     if (strong != nullptr) {
       return strong;
     }
@@ -272,7 +283,7 @@
     CHECK(self->GetWeakRefAccessEnabled());
   }
   // There is no match in the strong table, check the weak table.
-  mirror::String* weak = LookupWeak(s);
+  mirror::String* weak = LookupWeakLocked(s);
   if (weak != nullptr) {
     if (is_strong) {
       // A match was found in the weak table. Promote to the strong table.
@@ -317,8 +328,7 @@
 }
 
 bool InternTable::ContainsWeak(mirror::String* s) {
-  MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
-  return LookupWeak(s) == s;
+  return LookupWeak(Thread::Current(), s) == s;
 }
 
 void InternTable::SweepInternTableWeaks(IsMarkedVisitor* visitor) {
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 2b2176e..274f5ad 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -84,10 +84,22 @@
   bool ContainsWeak(mirror::String* s) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Locks::intern_table_lock_);
 
+  // Lookup a strong intern, returns null if not found.
+  mirror::String* LookupStrong(Thread* self, mirror::String* s)
+      REQUIRES(!Locks::intern_table_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Lookup a weak intern, returns null if not found.
+  mirror::String* LookupWeak(Thread* self, mirror::String* s)
+      REQUIRES(!Locks::intern_table_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Total number of interned strings.
   size_t Size() const REQUIRES(!Locks::intern_table_lock_);
+
   // Total number of weakly live interned strings.
   size_t StrongSize() const REQUIRES(!Locks::intern_table_lock_);
+
   // Total number of strongly live interned strings.
   size_t WeakSize() const REQUIRES(!Locks::intern_table_lock_);
 
@@ -186,9 +198,9 @@
   mirror::String* Insert(mirror::String* s, bool is_strong, bool holding_locks)
       REQUIRES(!Locks::intern_table_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  mirror::String* LookupStrong(mirror::String* s)
+  mirror::String* LookupStrongLocked(mirror::String* s)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
-  mirror::String* LookupWeak(mirror::String* s)
+  mirror::String* LookupWeakLocked(mirror::String* s)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
   mirror::String* InsertStrong(mirror::String* s)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 6b5218d..ec63fdf 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -239,7 +239,7 @@
 }
 
 #if !defined(__clang__)
-#if defined(__arm__)
+#if (defined(__arm__) || defined(__i386__))
 // TODO: remove when all targets implemented.
 static constexpr InterpreterImplKind kInterpreterImplKind = kMterpImplKind;
 #else
@@ -247,7 +247,7 @@
 #endif
 #else
 // Clang 3.4 fails to build the goto interpreter implementation.
-#if defined(__arm__)
+#if (defined(__arm__) || defined(__i386__))
 static constexpr InterpreterImplKind kInterpreterImplKind = kMterpImplKind;
 #else
 static constexpr InterpreterImplKind kInterpreterImplKind = kSwitchImplKind;
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index ecd4de9..6c9cc70 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -518,10 +518,10 @@
         Dbg::IsForcedInterpreterNeededForCalling(self, target);
 }
 
-static void ArtInterpreterToCompiledCodeBridge(Thread* self,
-                                               const DexFile::CodeItem* code_item,
-                                               ShadowFrame* shadow_frame,
-                                               JValue* result)
+void ArtInterpreterToCompiledCodeBridge(Thread* self,
+                                        const DexFile::CodeItem* code_item,
+                                        ShadowFrame* shadow_frame,
+                                        JValue* result)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtMethod* method = shadow_frame->GetMethod();
   // Ensure static methods are initialized.
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 932d255..949112d 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -980,6 +980,9 @@
   return branch_offset <= 0;
 }
 
+void ArtInterpreterToCompiledCodeBridge(Thread* self, const DexFile::CodeItem* code_item,
+                                        ShadowFrame* shadow_frame, JValue* result);
+
 // Explicitly instantiate all DoInvoke functions.
 #define EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, _is_range, _do_check)                      \
   template SHARED_REQUIRES(Locks::mutator_lock_)                                     \
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 9766299..d50cfe6 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -21,6 +21,7 @@
 #include "base/stl_util.h"  // MakeUnique
 #include "experimental_flags.h"
 #include "interpreter_common.h"
+#include "jit/jit.h"
 #include "safe_math.h"
 
 #include <memory>  // std::unique_ptr
@@ -63,10 +64,10 @@
   currentHandlersTable = handlersTable[ \
       Runtime::Current()->GetInstrumentation()->GetInterpreterHandlerTable()]
 
-#define BACKWARD_BRANCH_INSTRUMENTATION(offset) \
+#define BRANCH_INSTRUMENTATION(offset) \
   do { \
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); \
-    instrumentation->BackwardBranch(self, shadow_frame.GetMethod(), offset); \
+    instrumentation->Branch(self, shadow_frame.GetMethod(), dex_pc, offset); \
   } while (false)
 
 #define UNREACHABLE_CODE_CHECK()                \
@@ -183,9 +184,25 @@
       self->AssertNoPendingException();
     }
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+    ArtMethod *method = shadow_frame.GetMethod();
+
     if (UNLIKELY(instrumentation->HasMethodEntryListeners())) {
       instrumentation->MethodEnterEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                        shadow_frame.GetMethod(), 0);
+                                        method, 0);
+    }
+
+    if (UNLIKELY(Runtime::Current()->GetJit() != nullptr &&
+                 Runtime::Current()->GetJit()->JitAtFirstUse() &&
+                 method->HasAnyCompiledCode())) {
+      JValue result;
+
+      // Pop the shadow frame before calling into compiled code.
+      self->PopShadowFrame();
+      ArtInterpreterToCompiledCodeBridge(self, code_item, &shadow_frame, &result);
+      // Push the shadow frame back as the caller will expect it.
+      self->PushShadowFrame(&shadow_frame);
+
+      return result;
     }
   }
 
@@ -633,8 +650,8 @@
 
   HANDLE_INSTRUCTION_START(GOTO) {
     int8_t offset = inst->VRegA_10t(inst_data);
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -646,8 +663,8 @@
 
   HANDLE_INSTRUCTION_START(GOTO_16) {
     int16_t offset = inst->VRegA_20t();
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -659,8 +676,8 @@
 
   HANDLE_INSTRUCTION_START(GOTO_32) {
     int32_t offset = inst->VRegA_30t();
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -672,8 +689,8 @@
 
   HANDLE_INSTRUCTION_START(PACKED_SWITCH) {
     int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -685,8 +702,8 @@
 
   HANDLE_INSTRUCTION_START(SPARSE_SWITCH) {
     int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -788,8 +805,8 @@
   HANDLE_INSTRUCTION_START(IF_EQ) {
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) == shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -806,8 +823,8 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) !=
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -824,8 +841,8 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -842,8 +859,8 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >=
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -860,8 +877,8 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >
     shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -878,8 +895,8 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <=
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -895,8 +912,8 @@
   HANDLE_INSTRUCTION_START(IF_EQZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) == 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -912,8 +929,8 @@
   HANDLE_INSTRUCTION_START(IF_NEZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) != 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -929,8 +946,8 @@
   HANDLE_INSTRUCTION_START(IF_LTZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) < 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -946,8 +963,8 @@
   HANDLE_INSTRUCTION_START(IF_GEZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) >= 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -963,8 +980,8 @@
   HANDLE_INSTRUCTION_START(IF_GTZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) > 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -980,8 +997,8 @@
   HANDLE_INSTRUCTION_START(IF_LEZ)  {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) <= 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index bab0d40..56a213c 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -17,6 +17,7 @@
 #include "base/stl_util.h"  // MakeUnique
 #include "experimental_flags.h"
 #include "interpreter_common.h"
+#include "jit/jit.h"
 #include "safe_math.h"
 
 #include <memory>  // std::unique_ptr
@@ -69,9 +70,9 @@
     }                                                                                           \
   } while (false)
 
-#define BACKWARD_BRANCH_INSTRUMENTATION(offset) \
+#define BRANCH_INSTRUMENTATION(offset) \
   do { \
-    instrumentation->BackwardBranch(self, shadow_frame.GetMethod(), offset); \
+    instrumentation->Branch(self, shadow_frame.GetMethod(), dex_pc, offset); \
   } while (false)
 
 static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
@@ -96,9 +97,26 @@
     if (kIsDebugBuild) {
         self->AssertNoPendingException();
     }
+
+    ArtMethod* method = shadow_frame.GetMethod();
+
     if (UNLIKELY(instrumentation->HasMethodEntryListeners())) {
       instrumentation->MethodEnterEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                        shadow_frame.GetMethod(), 0);
+                                        method, 0);
+    }
+
+    if (UNLIKELY(Runtime::Current()->GetJit() != nullptr &&
+                 Runtime::Current()->GetJit()->JitAtFirstUse() &&
+                 method->HasAnyCompiledCode())) {
+      JValue result;
+
+      // Pop the shadow frame before calling into compiled code.
+      self->PopShadowFrame();
+      ArtInterpreterToCompiledCodeBridge(self, code_item, &shadow_frame, &result);
+      // Push the shadow frame back as the caller will expect it.
+      self->PushShadowFrame(&shadow_frame);
+
+      return result;
     }
   }
   const uint16_t* const insns = code_item->insns_;
@@ -565,8 +583,8 @@
       case Instruction::GOTO: {
         PREAMBLE();
         int8_t offset = inst->VRegA_10t(inst_data);
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -575,8 +593,8 @@
       case Instruction::GOTO_16: {
         PREAMBLE();
         int16_t offset = inst->VRegA_20t();
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -585,8 +603,8 @@
       case Instruction::GOTO_32: {
         PREAMBLE();
         int32_t offset = inst->VRegA_30t();
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -595,8 +613,8 @@
       case Instruction::PACKED_SWITCH: {
         PREAMBLE();
         int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -605,8 +623,8 @@
       case Instruction::SPARSE_SWITCH: {
         PREAMBLE();
         int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -709,8 +727,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) ==
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -724,8 +742,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) !=
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -739,8 +757,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -754,8 +772,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >=
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -769,8 +787,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -784,8 +802,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <=
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -798,8 +816,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) == 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -812,8 +830,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) != 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -826,8 +844,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) < 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -840,8 +858,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) >= 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -854,8 +872,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) > 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -868,8 +886,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) <= 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
diff --git a/runtime/interpreter/mterp/arm/footer.S b/runtime/interpreter/mterp/arm/footer.S
index 617f572..1dba856 100644
--- a/runtime/interpreter/mterp/arm/footer.S
+++ b/runtime/interpreter/mterp/arm/footer.S
@@ -150,12 +150,8 @@
     b MterpDone
 MterpReturn:
     ldr     r2, [rFP, #OFF_FP_RESULT_REGISTER]
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     str     r0, [r2]
     str     r1, [r2, #4]
-    mov     r0, rSELF
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    blne    MterpSuspendCheck                       @ (self)
     mov     r0, #1                                  @ signal return to caller.
 MterpDone:
     add     sp, sp, #4                              @ un-align 64
diff --git a/runtime/interpreter/mterp/arm/op_check_cast.S b/runtime/interpreter/mterp/arm/op_check_cast.S
index 3e3ac70..24eba45 100644
--- a/runtime/interpreter/mterp/arm/op_check_cast.S
+++ b/runtime/interpreter/mterp/arm/op_check_cast.S
@@ -5,10 +5,10 @@
     EXPORT_PC
     FETCH    r0, 1                      @ r0<- BBBB
     mov      r1, rINST, lsr #8          @ r1<- AA
-    GET_VREG r1, r1                     @ r1<- object
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &object
     ldr      r2, [rFP, #OFF_FP_METHOD]  @ r2<- method
     mov      r3, rSELF                  @ r3<- self
-    bl       MterpCheckCast             @ (index, obj, method, self)
+    bl       MterpCheckCast             @ (index, &obj, method, self)
     PREFETCH_INST 2
     cmp      r0, #0
     bne      MterpPossibleException
diff --git a/runtime/interpreter/mterp/arm/op_instance_of.S b/runtime/interpreter/mterp/arm/op_instance_of.S
index e94108c..d76f0b0 100644
--- a/runtime/interpreter/mterp/arm/op_instance_of.S
+++ b/runtime/interpreter/mterp/arm/op_instance_of.S
@@ -8,12 +8,12 @@
     EXPORT_PC
     FETCH     r0, 1                     @ r0<- CCCC
     mov       r1, rINST, lsr #12        @ r1<- B
-    GET_VREG  r1, r1                    @ r1<- vB (object)
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &object
     ldr       r2, [rFP, #OFF_FP_METHOD] @ r2<- method
     mov       r3, rSELF                 @ r3<- self
     mov       r9, rINST, lsr #8         @ r9<- A+
     and       r9, r9, #15               @ r9<- A
-    bl        MterpInstanceOf           @ (index, obj, method, self)
+    bl        MterpInstanceOf           @ (index, &obj, method, self)
     ldr       r1, [rSELF, #THREAD_EXCEPTION_OFFSET]
     PREFETCH_INST 2
     cmp       r1, #0                    @ exception pending?
diff --git a/runtime/interpreter/mterp/arm/op_return.S b/runtime/interpreter/mterp/arm/op_return.S
index a4ffd04..1888373 100644
--- a/runtime/interpreter/mterp/arm/op_return.S
+++ b/runtime/interpreter/mterp/arm/op_return.S
@@ -6,6 +6,10 @@
     /* op vAA */
     .extern MterpThreadFenceForConstructor
     bl      MterpThreadFenceForConstructor
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
     mov     r2, rINST, lsr #8           @ r2<- AA
     GET_VREG r0, r2                     @ r0<- vAA
     mov     r1, #0
diff --git a/runtime/interpreter/mterp/arm/op_return_void.S b/runtime/interpreter/mterp/arm/op_return_void.S
index f6dfd99..cbea2bf 100644
--- a/runtime/interpreter/mterp/arm/op_return_void.S
+++ b/runtime/interpreter/mterp/arm/op_return_void.S
@@ -1,5 +1,9 @@
     .extern MterpThreadFenceForConstructor
     bl      MterpThreadFenceForConstructor
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
     mov    r0, #0
     mov    r1, #0
     b      MterpReturn
diff --git a/runtime/interpreter/mterp/arm/op_return_void_no_barrier.S b/runtime/interpreter/mterp/arm/op_return_void_no_barrier.S
index 7322940..2dde7ae 100644
--- a/runtime/interpreter/mterp/arm/op_return_void_no_barrier.S
+++ b/runtime/interpreter/mterp/arm/op_return_void_no_barrier.S
@@ -1,3 +1,7 @@
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
     mov    r0, #0
     mov    r1, #0
     b      MterpReturn
diff --git a/runtime/interpreter/mterp/arm/op_return_wide.S b/runtime/interpreter/mterp/arm/op_return_wide.S
index 2881c87..cfab530 100644
--- a/runtime/interpreter/mterp/arm/op_return_wide.S
+++ b/runtime/interpreter/mterp/arm/op_return_wide.S
@@ -4,6 +4,10 @@
     /* return-wide vAA */
     .extern MterpThreadFenceForConstructor
     bl      MterpThreadFenceForConstructor
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
     mov     r2, rINST, lsr #8           @ r2<- AA
     add     r2, rFP, r2, lsl #2         @ r2<- &fp[AA]
     ldmia   r2, {r0-r1}                 @ r0/r1 <- vAA/vAA+1
diff --git a/runtime/interpreter/mterp/config_x86 b/runtime/interpreter/mterp/config_x86
index 277817d..5fab379 100644
--- a/runtime/interpreter/mterp/config_x86
+++ b/runtime/interpreter/mterp/config_x86
@@ -36,262 +36,262 @@
     # (override example:) op OP_SUB_FLOAT_2ADDR arm-vfp
     # (fallback example:) op OP_SUB_FLOAT_2ADDR FALLBACK
 
-    op op_nop FALLBACK
-    op op_move FALLBACK
-    op op_move_from16 FALLBACK
-    op op_move_16 FALLBACK
-    op op_move_wide FALLBACK
-    op op_move_wide_from16 FALLBACK
-    op op_move_wide_16 FALLBACK
-    op op_move_object FALLBACK
-    op op_move_object_from16 FALLBACK
-    op op_move_object_16 FALLBACK
-    op op_move_result FALLBACK
-    op op_move_result_wide FALLBACK
-    op op_move_result_object FALLBACK
-    op op_move_exception FALLBACK
-    op op_return_void FALLBACK
-    op op_return FALLBACK
-    op op_return_wide FALLBACK
-    op op_return_object FALLBACK
-    op op_const_4 FALLBACK
-    op op_const_16 FALLBACK
-    op op_const FALLBACK
-    op op_const_high16 FALLBACK
-    op op_const_wide_16 FALLBACK
-    op op_const_wide_32 FALLBACK
-    op op_const_wide FALLBACK
-    op op_const_wide_high16 FALLBACK
-    op op_const_string FALLBACK
-    op op_const_string_jumbo FALLBACK
-    op op_const_class FALLBACK
-    op op_monitor_enter FALLBACK
-    op op_monitor_exit FALLBACK
-    op op_check_cast FALLBACK
-    op op_instance_of FALLBACK
-    op op_array_length FALLBACK
-    op op_new_instance FALLBACK
-    op op_new_array FALLBACK
-    op op_filled_new_array FALLBACK
-    op op_filled_new_array_range FALLBACK
-    op op_fill_array_data FALLBACK
-    op op_throw FALLBACK
-    op op_goto FALLBACK
-    op op_goto_16 FALLBACK
-    op op_goto_32 FALLBACK
-    op op_packed_switch FALLBACK
-    op op_sparse_switch FALLBACK
-    op op_cmpl_float FALLBACK
-    op op_cmpg_float FALLBACK
-    op op_cmpl_double FALLBACK
-    op op_cmpg_double FALLBACK
-    op op_cmp_long FALLBACK
-    op op_if_eq FALLBACK
-    op op_if_ne FALLBACK
-    op op_if_lt FALLBACK
-    op op_if_ge FALLBACK
-    op op_if_gt FALLBACK
-    op op_if_le FALLBACK
-    op op_if_eqz FALLBACK
-    op op_if_nez FALLBACK
-    op op_if_ltz FALLBACK
-    op op_if_gez FALLBACK
-    op op_if_gtz FALLBACK
-    op op_if_lez FALLBACK
-    op_unused_3e FALLBACK
-    op_unused_3f FALLBACK
-    op_unused_40 FALLBACK
-    op_unused_41 FALLBACK
-    op_unused_42 FALLBACK
-    op_unused_43 FALLBACK
-    op op_aget FALLBACK
-    op op_aget_wide FALLBACK
-    op op_aget_object FALLBACK
-    op op_aget_boolean FALLBACK
-    op op_aget_byte FALLBACK
-    op op_aget_char FALLBACK
-    op op_aget_short FALLBACK
-    op op_aput FALLBACK
-    op op_aput_wide FALLBACK
-    op op_aput_object FALLBACK
-    op op_aput_boolean FALLBACK
-    op op_aput_byte FALLBACK
-    op op_aput_char FALLBACK
-    op op_aput_short FALLBACK
-    op op_iget FALLBACK
-    op op_iget_wide FALLBACK
-    op op_iget_object FALLBACK
-    op op_iget_boolean FALLBACK
-    op op_iget_byte FALLBACK
-    op op_iget_char FALLBACK
-    op op_iget_short FALLBACK
-    op op_iput FALLBACK
-    op op_iput_wide FALLBACK
-    op op_iput_object FALLBACK
-    op op_iput_boolean FALLBACK
-    op op_iput_byte FALLBACK
-    op op_iput_char FALLBACK
-    op op_iput_short FALLBACK
-    op op_sget FALLBACK
-    op op_sget_wide FALLBACK
-    op op_sget_object FALLBACK
-    op op_sget_boolean FALLBACK
-    op op_sget_byte FALLBACK
-    op op_sget_char FALLBACK
-    op op_sget_short FALLBACK
-    op op_sput FALLBACK
-    op op_sput_wide FALLBACK
-    op op_sput_object FALLBACK
-    op op_sput_boolean FALLBACK
-    op op_sput_byte FALLBACK
-    op op_sput_char FALLBACK
-    op op_sput_short FALLBACK
-    op op_invoke_virtual FALLBACK
-    op op_invoke_super FALLBACK
-    op op_invoke_direct FALLBACK
-    op op_invoke_static FALLBACK
-    op op_invoke_interface FALLBACK
-    op op_return_void_no_barrier FALLBACK
-    op op_invoke_virtual_range FALLBACK
-    op op_invoke_super_range FALLBACK
-    op op_invoke_direct_range FALLBACK
-    op op_invoke_static_range FALLBACK
-    op op_invoke_interface_range FALLBACK
-    op_unused_79 FALLBACK
-    op_unused_7a FALLBACK
-    op op_neg_int FALLBACK
-    op op_not_int FALLBACK
-    op op_neg_long FALLBACK
-    op op_not_long FALLBACK
-    op op_neg_float FALLBACK
-    op op_neg_double FALLBACK
-    op op_int_to_long FALLBACK
-    op op_int_to_float FALLBACK
-    op op_int_to_double FALLBACK
-    op op_long_to_int FALLBACK
-    op op_long_to_float FALLBACK
-    op op_long_to_double FALLBACK
-    op op_float_to_int FALLBACK
-    op op_float_to_long FALLBACK
-    op op_float_to_double FALLBACK
-    op op_double_to_int FALLBACK
-    op op_double_to_long FALLBACK
-    op op_double_to_float FALLBACK
-    op op_int_to_byte FALLBACK
-    op op_int_to_char FALLBACK
-    op op_int_to_short FALLBACK
-    op op_add_int FALLBACK
-    op op_sub_int FALLBACK
-    op op_mul_int FALLBACK
-    op op_div_int FALLBACK
-    op op_rem_int FALLBACK
-    op op_and_int FALLBACK
-    op op_or_int FALLBACK
-    op op_xor_int FALLBACK
-    op op_shl_int FALLBACK
-    op op_shr_int FALLBACK
-    op op_ushr_int FALLBACK
-    op op_add_long FALLBACK
-    op op_sub_long FALLBACK
-    op op_mul_long FALLBACK
-    op op_div_long FALLBACK
-    op op_rem_long FALLBACK
-    op op_and_long FALLBACK
-    op op_or_long FALLBACK
-    op op_xor_long FALLBACK
-    op op_shl_long FALLBACK
-    op op_shr_long FALLBACK
-    op op_ushr_long FALLBACK
-    op op_add_float FALLBACK
-    op op_sub_float FALLBACK
-    op op_mul_float FALLBACK
-    op op_div_float FALLBACK
-    op op_rem_float FALLBACK
-    op op_add_double FALLBACK
-    op op_sub_double FALLBACK
-    op op_mul_double FALLBACK
-    op op_div_double FALLBACK
-    op op_rem_double FALLBACK
-    op op_add_int_2addr FALLBACK
-    op op_sub_int_2addr FALLBACK
-    op op_mul_int_2addr FALLBACK
-    op op_div_int_2addr FALLBACK
-    op op_rem_int_2addr FALLBACK
-    op op_and_int_2addr FALLBACK
-    op op_or_int_2addr FALLBACK
-    op op_xor_int_2addr FALLBACK
-    op op_shl_int_2addr FALLBACK
-    op op_shr_int_2addr FALLBACK
-    op op_ushr_int_2addr FALLBACK
-    op op_add_long_2addr FALLBACK
-    op op_sub_long_2addr FALLBACK
-    op op_mul_long_2addr FALLBACK
-    op op_div_long_2addr FALLBACK
-    op op_rem_long_2addr FALLBACK
-    op op_and_long_2addr FALLBACK
-    op op_or_long_2addr FALLBACK
-    op op_xor_long_2addr FALLBACK
-    op op_shl_long_2addr FALLBACK
-    op op_shr_long_2addr FALLBACK
-    op op_ushr_long_2addr FALLBACK
-    op op_add_float_2addr FALLBACK
-    op op_sub_float_2addr FALLBACK
-    op op_mul_float_2addr FALLBACK
-    op op_div_float_2addr FALLBACK
-    op op_rem_float_2addr FALLBACK
-    op op_add_double_2addr FALLBACK
-    op op_sub_double_2addr FALLBACK
-    op op_mul_double_2addr FALLBACK
-    op op_div_double_2addr FALLBACK
-    op op_rem_double_2addr FALLBACK
-    op op_add_int_lit16 FALLBACK
-    op op_rsub_int FALLBACK
-    op op_mul_int_lit16 FALLBACK
-    op op_div_int_lit16 FALLBACK
-    op op_rem_int_lit16 FALLBACK
-    op op_and_int_lit16 FALLBACK
-    op op_or_int_lit16 FALLBACK
-    op op_xor_int_lit16 FALLBACK
-    op op_add_int_lit8 FALLBACK
-    op op_rsub_int_lit8 FALLBACK
-    op op_mul_int_lit8 FALLBACK
-    op op_div_int_lit8 FALLBACK
-    op op_rem_int_lit8 FALLBACK
-    op op_and_int_lit8 FALLBACK
-    op op_or_int_lit8 FALLBACK
-    op op_xor_int_lit8 FALLBACK
-    op op_shl_int_lit8 FALLBACK
-    op op_shr_int_lit8 FALLBACK
-    op op_ushr_int_lit8 FALLBACK
-    op op_iget_quick FALLBACK
-    op op_iget_wide_quick FALLBACK
-    op op_iget_object_quick FALLBACK
-    op op_iput_quick FALLBACK
-    op op_iput_wide_quick FALLBACK
-    op op_iput_object_quick FALLBACK
-    op op_invoke_virtual_quick FALLBACK
-    op op_invoke_virtual_range_quick FALLBACK
-    op op_iput_boolean_quick FALLBACK
-    op op_iput_byte_quick FALLBACK
-    op op_iput_char_quick FALLBACK
-    op op_iput_short_quick FALLBACK
-    op op_iget_boolean_quick FALLBACK
-    op op_iget_byte_quick FALLBACK
-    op op_iget_char_quick FALLBACK
-    op op_iget_short_quick FALLBACK
-    op_unused_f3 FALLBACK
-    op_unused_f4 FALLBACK
-    op_unused_f5 FALLBACK
-    op_unused_f6 FALLBACK
-    op_unused_f7 FALLBACK
-    op_unused_f8 FALLBACK
-    op_unused_f9 FALLBACK
-    op_unused_fa FALLBACK
-    op_unused_fb FALLBACK
-    op_unused_fc FALLBACK
-    op_unused_fd FALLBACK
-    op_unused_fe FALLBACK
-    op_unused_ff FALLBACK
+    # op op_nop FALLBACK
+    # op op_move FALLBACK
+    # op op_move_from16 FALLBACK
+    # op op_move_16 FALLBACK
+    # op op_move_wide FALLBACK
+    # op op_move_wide_from16 FALLBACK
+    # op op_move_wide_16 FALLBACK
+    # op op_move_object FALLBACK
+    # op op_move_object_from16 FALLBACK
+    # op op_move_object_16 FALLBACK
+    # op op_move_result FALLBACK
+    # op op_move_result_wide FALLBACK
+    # op op_move_result_object FALLBACK
+    # op op_move_exception FALLBACK
+    # op op_return_void FALLBACK
+    # op op_return FALLBACK
+    # op op_return_wide FALLBACK
+    # op op_return_object FALLBACK
+    # op op_const_4 FALLBACK
+    # op op_const_16 FALLBACK
+    # op op_const FALLBACK
+    # op op_const_high16 FALLBACK
+    # op op_const_wide_16 FALLBACK
+    # op op_const_wide_32 FALLBACK
+    # op op_const_wide FALLBACK
+    # op op_const_wide_high16 FALLBACK
+    # op op_const_string FALLBACK
+    # op op_const_string_jumbo FALLBACK
+    # op op_const_class FALLBACK
+    # op op_monitor_enter FALLBACK
+    # op op_monitor_exit FALLBACK
+    # op op_check_cast FALLBACK
+    # op op_instance_of FALLBACK
+    # op op_array_length FALLBACK
+    # op op_new_instance FALLBACK
+    # op op_new_array FALLBACK
+    # op op_filled_new_array FALLBACK
+    # op op_filled_new_array_range FALLBACK
+    # op op_fill_array_data FALLBACK
+    # op op_throw FALLBACK
+    # op op_goto FALLBACK
+    # op op_goto_16 FALLBACK
+    # op op_goto_32 FALLBACK
+    # op op_packed_switch FALLBACK
+    # op op_sparse_switch FALLBACK
+    # op op_cmpl_float FALLBACK
+    # op op_cmpg_float FALLBACK
+    # op op_cmpl_double FALLBACK
+    # op op_cmpg_double FALLBACK
+    # op op_cmp_long FALLBACK
+    # op op_if_eq FALLBACK
+    # op op_if_ne FALLBACK
+    # op op_if_lt FALLBACK
+    # op op_if_ge FALLBACK
+    # op op_if_gt FALLBACK
+    # op op_if_le FALLBACK
+    # op op_if_eqz FALLBACK
+    # op op_if_nez FALLBACK
+    # op op_if_ltz FALLBACK
+    # op op_if_gez FALLBACK
+    # op op_if_gtz FALLBACK
+    # op op_if_lez FALLBACK
+    # op op_unused_3e FALLBACK
+    # op op_unused_3f FALLBACK
+    # op op_unused_40 FALLBACK
+    # op op_unused_41 FALLBACK
+    # op op_unused_42 FALLBACK
+    # op op_unused_43 FALLBACK
+    # op op_aget FALLBACK
+    # op op_aget_wide FALLBACK
+    # op op_aget_object FALLBACK
+    # op op_aget_boolean FALLBACK
+    # op op_aget_byte FALLBACK
+    # op op_aget_char FALLBACK
+    # op op_aget_short FALLBACK
+    # op op_aput FALLBACK
+    # op op_aput_wide FALLBACK
+    # op op_aput_object FALLBACK
+    # op op_aput_boolean FALLBACK
+    # op op_aput_byte FALLBACK
+    # op op_aput_char FALLBACK
+    # op op_aput_short FALLBACK
+    # op op_iget FALLBACK
+    # op op_iget_wide FALLBACK
+    # op op_iget_object FALLBACK
+    # op op_iget_boolean FALLBACK
+    # op op_iget_byte FALLBACK
+    # op op_iget_char FALLBACK
+    # op op_iget_short FALLBACK
+    # op op_iput FALLBACK
+    # op op_iput_wide FALLBACK
+    # op op_iput_object FALLBACK
+    # op op_iput_boolean FALLBACK
+    # op op_iput_byte FALLBACK
+    # op op_iput_char FALLBACK
+    # op op_iput_short FALLBACK
+    # op op_sget FALLBACK
+    # op op_sget_wide FALLBACK
+    # op op_sget_object FALLBACK
+    # op op_sget_boolean FALLBACK
+    # op op_sget_byte FALLBACK
+    # op op_sget_char FALLBACK
+    # op op_sget_short FALLBACK
+    # op op_sput FALLBACK
+    # op op_sput_wide FALLBACK
+    # op op_sput_object FALLBACK
+    # op op_sput_boolean FALLBACK
+    # op op_sput_byte FALLBACK
+    # op op_sput_char FALLBACK
+    # op op_sput_short FALLBACK
+    # op op_invoke_virtual FALLBACK
+    # op op_invoke_super FALLBACK
+    # op op_invoke_direct FALLBACK
+    # op op_invoke_static FALLBACK
+    # op op_invoke_interface FALLBACK
+    # op op_return_void_no_barrier FALLBACK
+    # op op_invoke_virtual_range FALLBACK
+    # op op_invoke_super_range FALLBACK
+    # op op_invoke_direct_range FALLBACK
+    # op op_invoke_static_range FALLBACK
+    # op op_invoke_interface_range FALLBACK
+    # op op_unused_79 FALLBACK
+    # op op_unused_7a FALLBACK
+    # op op_neg_int FALLBACK
+    # op op_not_int FALLBACK
+    # op op_neg_long FALLBACK
+    # op op_not_long FALLBACK
+    # op op_neg_float FALLBACK
+    # op op_neg_double FALLBACK
+    # op op_int_to_long FALLBACK
+    # op op_int_to_float FALLBACK
+    # op op_int_to_double FALLBACK
+    # op op_long_to_int FALLBACK
+    # op op_long_to_float FALLBACK
+    # op op_long_to_double FALLBACK
+    # op op_float_to_int FALLBACK
+    # op op_float_to_long FALLBACK
+    # op op_float_to_double FALLBACK
+    # op op_double_to_int FALLBACK
+    # op op_double_to_long FALLBACK
+    # op op_double_to_float FALLBACK
+    # op op_int_to_byte FALLBACK
+    # op op_int_to_char FALLBACK
+    # op op_int_to_short FALLBACK
+    # op op_add_int FALLBACK
+    # op op_sub_int FALLBACK
+    # op op_mul_int FALLBACK
+    # op op_div_int FALLBACK
+    # op op_rem_int FALLBACK
+    # op op_and_int FALLBACK
+    # op op_or_int FALLBACK
+    # op op_xor_int FALLBACK
+    # op op_shl_int FALLBACK
+    # op op_shr_int FALLBACK
+    # op op_ushr_int FALLBACK
+    # op op_add_long FALLBACK
+    # op op_sub_long FALLBACK
+    # op op_mul_long FALLBACK
+    # op op_div_long FALLBACK
+    # op op_rem_long FALLBACK
+    # op op_and_long FALLBACK
+    # op op_or_long FALLBACK
+    # op op_xor_long FALLBACK
+    # op op_shl_long FALLBACK
+    # op op_shr_long FALLBACK
+    # op op_ushr_long FALLBACK
+    # op op_add_float FALLBACK
+    # op op_sub_float FALLBACK
+    # op op_mul_float FALLBACK
+    # op op_div_float FALLBACK
+    # op op_rem_float FALLBACK
+    # op op_add_double FALLBACK
+    # op op_sub_double FALLBACK
+    # op op_mul_double FALLBACK
+    # op op_div_double FALLBACK
+    # op op_rem_double FALLBACK
+    # op op_add_int_2addr FALLBACK
+    # op op_sub_int_2addr FALLBACK
+    # op op_mul_int_2addr FALLBACK
+    # op op_div_int_2addr FALLBACK
+    # op op_rem_int_2addr FALLBACK
+    # op op_and_int_2addr FALLBACK
+    # op op_or_int_2addr FALLBACK
+    # op op_xor_int_2addr FALLBACK
+    # op op_shl_int_2addr FALLBACK
+    # op op_shr_int_2addr FALLBACK
+    # op op_ushr_int_2addr FALLBACK
+    # op op_add_long_2addr FALLBACK
+    # op op_sub_long_2addr FALLBACK
+    # op op_mul_long_2addr FALLBACK
+    # op op_div_long_2addr FALLBACK
+    # op op_rem_long_2addr FALLBACK
+    # op op_and_long_2addr FALLBACK
+    # op op_or_long_2addr FALLBACK
+    # op op_xor_long_2addr FALLBACK
+    # op op_shl_long_2addr FALLBACK
+    # op op_shr_long_2addr FALLBACK
+    # op op_ushr_long_2addr FALLBACK
+    # op op_add_float_2addr FALLBACK
+    # op op_sub_float_2addr FALLBACK
+    # op op_mul_float_2addr FALLBACK
+    # op op_div_float_2addr FALLBACK
+    # op op_rem_float_2addr FALLBACK
+    # op op_add_double_2addr FALLBACK
+    # op op_sub_double_2addr FALLBACK
+    # op op_mul_double_2addr FALLBACK
+    # op op_div_double_2addr FALLBACK
+    # op op_rem_double_2addr FALLBACK
+    # op op_add_int_lit16 FALLBACK
+    # op op_rsub_int FALLBACK
+    # op op_mul_int_lit16 FALLBACK
+    # op op_div_int_lit16 FALLBACK
+    # op op_rem_int_lit16 FALLBACK
+    # op op_and_int_lit16 FALLBACK
+    # op op_or_int_lit16 FALLBACK
+    # op op_xor_int_lit16 FALLBACK
+    # op op_add_int_lit8 FALLBACK
+    # op op_rsub_int_lit8 FALLBACK
+    # op op_mul_int_lit8 FALLBACK
+    # op op_div_int_lit8 FALLBACK
+    # op op_rem_int_lit8 FALLBACK
+    # op op_and_int_lit8 FALLBACK
+    # op op_or_int_lit8 FALLBACK
+    # op op_xor_int_lit8 FALLBACK
+    # op op_shl_int_lit8 FALLBACK
+    # op op_shr_int_lit8 FALLBACK
+    # op op_ushr_int_lit8 FALLBACK
+    # op op_iget_quick FALLBACK
+    # op op_iget_wide_quick FALLBACK
+    # op op_iget_object_quick FALLBACK
+    # op op_iput_quick FALLBACK
+    # op op_iput_wide_quick FALLBACK
+    # op op_iput_object_quick FALLBACK
+    # op op_invoke_virtual_quick FALLBACK
+    # op op_invoke_virtual_range_quick FALLBACK
+    # op op_iput_boolean_quick FALLBACK
+    # op op_iput_byte_quick FALLBACK
+    # op op_iput_char_quick FALLBACK
+    # op op_iput_short_quick FALLBACK
+    # op op_iget_boolean_quick FALLBACK
+    # op op_iget_byte_quick FALLBACK
+    # op op_iget_char_quick FALLBACK
+    # op op_iget_short_quick FALLBACK
+    op op_invoke_lambda FALLBACK
+    # op op_unused_f4 FALLBACK
+    op op_capture_variable FALLBACK
+    op op_create_lambda FALLBACK
+    op op_liberate_variable FALLBACK
+    op op_box_lambda FALLBACK
+    op op_unbox_lambda FALLBACK
+    # op op_unused_fa FALLBACK
+    # op op_unused_fb FALLBACK
+    # op op_unused_fc FALLBACK
+    # op op_unused_fd FALLBACK
+    # op op_unused_fe FALLBACK
+    # op op_unused_ff FALLBACK
 op-end
 
 # common subroutines for asm
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 9975458..0afd276 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -274,13 +274,15 @@
   return false;
 }
 
-extern "C" bool MterpCheckCast(uint32_t index, Object* obj, art::ArtMethod* method,
-                               Thread* self)
+extern "C" bool MterpCheckCast(uint32_t index, StackReference<mirror::Object>* vreg_addr,
+                               art::ArtMethod* method, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   Class* c = ResolveVerifyAndClinit(index, method, self, false, false);
   if (UNLIKELY(c == nullptr)) {
     return true;
   }
+  // Must load obj from vreg following ResolveVerifyAndClinit due to moving gc.
+  Object* obj = vreg_addr->AsMirrorPtr();
   if (UNLIKELY(obj != nullptr && !obj->InstanceOf(c))) {
     ThrowClassCastException(c, obj->GetClass());
     return true;
@@ -288,13 +290,15 @@
   return false;
 }
 
-extern "C" bool MterpInstanceOf(uint32_t index, Object* obj, art::ArtMethod* method,
-                                Thread* self)
+extern "C" bool MterpInstanceOf(uint32_t index, StackReference<mirror::Object>* vreg_addr,
+                                art::ArtMethod* method, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   Class* c = ResolveVerifyAndClinit(index, method, self, false, false);
   if (UNLIKELY(c == nullptr)) {
     return false;  // Caller will check for pending exception.  Return value unimportant.
   }
+  // Must load obj from vreg following ResolveVerifyAndClinit due to moving gc.
+  Object* obj = vreg_addr->AsMirrorPtr();
   return (obj != nullptr) && obj->InstanceOf(c);
 }
 
@@ -505,8 +509,7 @@
                                                uint64_t* new_value, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite,
-                                          sizeof(int64_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int64_t));
   if (LIKELY(field != nullptr)) {
     // Compiled code can't use transactional mode.
     field->Set64<false>(field->GetDeclaringClass(), *new_value);
@@ -524,8 +527,7 @@
 extern "C" int artSet8InstanceFromMterp(uint32_t field_idx, mirror::Object* obj, uint8_t new_value,
                                         ArtMethod* referrer)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
-                                          sizeof(int8_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int8_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     Primitive::Type type = field->GetTypeAsPrimitiveType();
     if (type == Primitive::kPrimBoolean) {
@@ -582,7 +584,7 @@
 }
 
 extern "C" int artSetObjInstanceFromMterp(uint32_t field_idx, mirror::Object* obj,
-                                         mirror::Object* new_value, ArtMethod* referrer)
+                                          mirror::Object* new_value, ArtMethod* referrer)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectWrite,
                                           sizeof(mirror::HeapReference<mirror::Object>));
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index 9ae98a2..78c784b 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -599,6 +599,10 @@
 /* File: arm/op_return_void.S */
     .extern MterpThreadFenceForConstructor
     bl      MterpThreadFenceForConstructor
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
     mov    r0, #0
     mov    r1, #0
     b      MterpReturn
@@ -615,6 +619,10 @@
     /* op vAA */
     .extern MterpThreadFenceForConstructor
     bl      MterpThreadFenceForConstructor
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
     mov     r2, rINST, lsr #8           @ r2<- AA
     GET_VREG r0, r2                     @ r0<- vAA
     mov     r1, #0
@@ -630,6 +638,10 @@
     /* return-wide vAA */
     .extern MterpThreadFenceForConstructor
     bl      MterpThreadFenceForConstructor
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
     mov     r2, rINST, lsr #8           @ r2<- AA
     add     r2, rFP, r2, lsl #2         @ r2<- &fp[AA]
     ldmia   r2, {r0-r1}                 @ r0/r1 <- vAA/vAA+1
@@ -648,6 +660,10 @@
     /* op vAA */
     .extern MterpThreadFenceForConstructor
     bl      MterpThreadFenceForConstructor
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
     mov     r2, rINST, lsr #8           @ r2<- AA
     GET_VREG r0, r2                     @ r0<- vAA
     mov     r1, #0
@@ -878,10 +894,10 @@
     EXPORT_PC
     FETCH    r0, 1                      @ r0<- BBBB
     mov      r1, rINST, lsr #8          @ r1<- AA
-    GET_VREG r1, r1                     @ r1<- object
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &object
     ldr      r2, [rFP, #OFF_FP_METHOD]  @ r2<- method
     mov      r3, rSELF                  @ r3<- self
-    bl       MterpCheckCast             @ (index, obj, method, self)
+    bl       MterpCheckCast             @ (index, &obj, method, self)
     PREFETCH_INST 2
     cmp      r0, #0
     bne      MterpPossibleException
@@ -903,12 +919,12 @@
     EXPORT_PC
     FETCH     r0, 1                     @ r0<- CCCC
     mov       r1, rINST, lsr #12        @ r1<- B
-    GET_VREG  r1, r1                    @ r1<- vB (object)
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &object
     ldr       r2, [rFP, #OFF_FP_METHOD] @ r2<- method
     mov       r3, rSELF                 @ r3<- self
     mov       r9, rINST, lsr #8         @ r9<- A+
     and       r9, r9, #15               @ r9<- A
-    bl        MterpInstanceOf           @ (index, obj, method, self)
+    bl        MterpInstanceOf           @ (index, &obj, method, self)
     ldr       r1, [rSELF, #THREAD_EXCEPTION_OFFSET]
     PREFETCH_INST 2
     cmp       r1, #0                    @ exception pending?
@@ -3385,6 +3401,10 @@
     .balign 128
 .L_op_return_void_no_barrier: /* 0x73 */
 /* File: arm/op_return_void_no_barrier.S */
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
     mov    r0, #0
     mov    r1, #0
     b      MterpReturn
@@ -12183,12 +12203,8 @@
     b MterpDone
 MterpReturn:
     ldr     r2, [rFP, #OFF_FP_RESULT_REGISTER]
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     str     r0, [r2]
     str     r1, [r2, #4]
-    mov     r0, rSELF
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    blne    MterpSuspendCheck                       @ (self)
     mov     r0, #1                                  @ signal return to caller.
 MterpDone:
     add     sp, sp, #4                              @ un-align 64
diff --git a/runtime/interpreter/mterp/out/mterp_x86.S b/runtime/interpreter/mterp/out/mterp_x86.S
new file mode 100644
index 0000000..e2918dc
--- /dev/null
+++ b/runtime/interpreter/mterp/out/mterp_x86.S
@@ -0,0 +1,12969 @@
+/*
+ * This file was generated automatically by gen-mterp.py for 'x86'.
+ *
+ * --> DO NOT EDIT <--
+ */
+
+/* File: x86/header.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat rFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via rFP &
+  number_of_vregs_.
+
+ */
+
+/*
+x86 ABI general notes:
+
+Caller save set:
+   eax, edx, ecx, st(0)-st(7)
+Callee save set:
+   ebx, esi, edi, ebp
+Return regs:
+   32-bit in eax
+   64-bit in edx:eax (low-order 32 in eax)
+   fp on top of fp stack st(0)
+
+Parameters passed on stack, pushed right-to-left.  On entry to target, first
+parm is at 4(%esp).  Traditional entry code is:
+
+functEntry:
+    push    %ebp             # save old frame pointer
+    mov     %ebp,%esp        # establish new frame pointer
+    sub     FrameSize,%esp   # Allocate storage for spill, locals & outs
+
+Once past the prologue, arguments are referenced at ((argno + 2)*4)(%ebp)
+
+Stack must be 16-byte aligned to support SSE in native code.
+
+If we're not doing variable stack allocation (alloca), the frame pointer can be
+eliminated and all arg references adjusted to be esp relative.
+*/
+
+/*
+Mterp and x86 notes:
+
+Some key interpreter variables will be assigned to registers.
+
+  nick     reg   purpose
+  rPC      esi   interpreted program counter, used for fetching instructions
+  rFP      edi   interpreted frame pointer, used for accessing locals and args
+  rINSTw   bx    first 16-bit code of current instruction
+  rINSTbl  bl    opcode portion of instruction word
+  rINSTbh  bh    high byte of inst word, usually contains src/tgt reg names
+  rIBASE   edx   base of instruction handler table
+  rREFS    ebp   base of object references in shadow frame.
+
+Notes:
+   o High order 16 bits of ebx must be zero on entry to handler
+   o rPC, rFP, rINSTw/rINSTbl valid on handler entry and exit
+   o eax and ecx are scratch, rINSTw/ebx sometimes scratch
+
+Macros are provided for common operations.  Each macro MUST emit only
+one instruction to make instruction-counting easier.  They MUST NOT alter
+unspecified registers or condition codes.
+*/
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+/* Frame size must be 16-byte aligned.
+ * Remember about 4 bytes for return address
+ */
+#define FRAME_SIZE     44
+
+/* Frame diagram while executing ExecuteMterpImpl, high to low addresses */
+#define IN_ARG3        (FRAME_SIZE + 16)
+#define IN_ARG2        (FRAME_SIZE + 12)
+#define IN_ARG1        (FRAME_SIZE +  8)
+#define IN_ARG0        (FRAME_SIZE +  4)
+#define CALLER_RP      (FRAME_SIZE +  0)
+/* Spill offsets relative to %esp */
+#define EBP_SPILL      (FRAME_SIZE -  4)
+#define EDI_SPILL      (FRAME_SIZE -  8)
+#define ESI_SPILL      (FRAME_SIZE - 12)
+#define EBX_SPILL      (FRAME_SIZE - 16)
+#define LOCAL0         (FRAME_SIZE - 20)
+#define LOCAL1         (FRAME_SIZE - 24)
+#define LOCAL2         (FRAME_SIZE - 28)
+/* Out Arg offsets, relative to %esp */
+#define OUT_ARG3       ( 12)
+#define OUT_ARG2       (  8)
+#define OUT_ARG1       (  4)
+#define OUT_ARG0       (  0)  /* <- ExecuteMterpImpl esp + 0 */
+
+/* During bringup, we'll use the shadow frame model instead of rFP */
+/* single-purpose registers, given names for clarity */
+#define rSELF    IN_ARG0(%esp)
+#define rPC      %esi
+#define rFP      %edi
+#define rINST    %ebx
+#define rINSTw   %bx
+#define rINSTbh  %bh
+#define rINSTbl  %bl
+#define rIBASE   %edx
+#define rREFS    %ebp
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
+/*
+ *
+ * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
+ * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
+ * mterp should do so as well.
+ */
+#define MTERP_SUSPEND 0
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    movl    rPC, OFF_FP_DEX_PC_PTR(rFP)
+.endm
+
+/*
+ * Refresh handler table.
+ * IBase handles uses the caller save register so we must restore it after each call.
+ * Also it is used as a result of some 64-bit operations (like imul) and we should
+ * restore it in such cases also.
+ *
+ * TODO: Consider spilling the IBase instead of restoring it from Thread structure.
+ */
+.macro REFRESH_IBASE
+    movl    rSELF, rIBASE
+    movl    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
+.endm
+
+/*
+ * If rSELF is already loaded then we can use it from known reg.
+ */
+.macro REFRESH_IBASE_FROM_SELF _reg
+    movl    THREAD_CURRENT_IBASE_OFFSET(\_reg), rIBASE
+.endm
+
+/*
+ * Refresh rINST.
+ * At enter to handler rINST does not contain the opcode number.
+ * However some utilities require the full value, so this macro
+ * restores the opcode number.
+ */
+.macro REFRESH_INST _opnum
+    movb    rINSTbl, rINSTbh
+    movb    $\_opnum, rINSTbl
+.endm
+
+/*
+ * Fetch the next instruction from rPC into rINSTw.  Does not advance rPC.
+ */
+.macro FETCH_INST
+    movzwl  (rPC), rINST
+.endm
+
+/*
+ * Remove opcode from rINST, compute the address of handler and jump to it.
+ */
+.macro GOTO_NEXT
+    movzx   rINSTbl,%eax
+    movzbl  rINSTbh,rINST
+    shll    $7, %eax
+    addl    rIBASE, %eax
+    jmp     *%eax
+.endm
+
+/*
+ * Advance rPC by instruction count.
+ */
+.macro ADVANCE_PC _count
+    leal    2*\_count(rPC), rPC
+.endm
+
+/*
+ * Advance rPC by instruction count, fetch instruction and jump to handler.
+ */
+.macro ADVANCE_PC_FETCH_AND_GOTO_NEXT _count
+    ADVANCE_PC \_count
+    FETCH_INST
+    GOTO_NEXT
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+#define VREG_ADDRESS(_vreg) (rFP,_vreg,4)
+#define VREG_HIGH_ADDRESS(_vreg) 4(rFP,_vreg,4)
+#define VREG_REF_ADDRESS(_vreg) (rREFS,_vreg,4)
+#define VREG_REF_HIGH_ADDRESS(_vreg) 4(rREFS,_vreg,4)
+
+.macro GET_VREG _reg _vreg
+    movl    (rFP,\_vreg,4), \_reg
+.endm
+
+/* Read wide value to xmm. */
+.macro GET_WIDE_FP_VREG _reg _vreg
+    movq    (rFP,\_vreg,4), \_reg
+.endm
+
+.macro SET_VREG _reg _vreg
+    movl    \_reg, (rFP,\_vreg,4)
+    movl    $0, (rREFS,\_vreg,4)
+.endm
+
+/* Write wide value from xmm. xmm is clobbered. */
+.macro SET_WIDE_FP_VREG _reg _vreg
+    movq    \_reg, (rFP,\_vreg,4)
+    pxor    \_reg, \_reg
+    movq    \_reg, (rREFS,\_vreg,4)
+.endm
+
+.macro SET_VREG_OBJECT _reg _vreg
+    movl    \_reg, (rFP,\_vreg,4)
+    movl    \_reg, (rREFS,\_vreg,4)
+.endm
+
+.macro GET_VREG_HIGH _reg _vreg
+    movl    4(rFP,\_vreg,4), \_reg
+.endm
+
+.macro SET_VREG_HIGH _reg _vreg
+    movl    \_reg, 4(rFP,\_vreg,4)
+    movl    $0, 4(rREFS,\_vreg,4)
+.endm
+
+.macro CLEAR_REF _vreg
+    movl    $0,  (rREFS,\_vreg,4)
+.endm
+
+.macro CLEAR_WIDE_REF _vreg
+    movl    $0,  (rREFS,\_vreg,4)
+    movl    $0, 4(rREFS,\_vreg,4)
+.endm
+
+/* File: x86/entry.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Interpreter entry point.
+ */
+
+    .text
+    .global ExecuteMterpImpl
+    .type   ExecuteMterpImpl, %function
+
+/*
+ * On entry:
+ *  0  Thread* self
+ *  1  code_item
+ *  2  ShadowFrame
+ *  3  JValue* result_register
+ *
+ */
+
+ExecuteMterpImpl:
+    .cfi_startproc
+    /* Allocate frame */
+    subl    $FRAME_SIZE, %esp
+    .cfi_adjust_cfa_offset FRAME_SIZE
+
+    /* Spill callee save regs */
+    movl    %ebp, EBP_SPILL(%esp)
+    movl    %edi, EDI_SPILL(%esp)
+    movl    %esi, ESI_SPILL(%esp)
+    movl    %ebx, EBX_SPILL(%esp)
+
+    /* Load ShadowFrame pointer */
+    movl    IN_ARG2(%esp), %edx
+
+    /* Remember the return register */
+    movl    IN_ARG3(%esp), %eax
+    movl    %eax, SHADOWFRAME_RESULT_REGISTER_OFFSET(%edx)
+
+    /* Remember the code_item */
+    movl    IN_ARG1(%esp), %ecx
+    movl    %ecx, SHADOWFRAME_CODE_ITEM_OFFSET(%edx)
+
+    /* set up "named" registers */
+    movl    SHADOWFRAME_NUMBER_OF_VREGS_OFFSET(%edx), %eax
+    leal    SHADOWFRAME_VREGS_OFFSET(%edx), rFP
+    leal    (rFP, %eax, 4), rREFS
+    movl    SHADOWFRAME_DEX_PC_OFFSET(%edx), %eax
+    lea     CODEITEM_INSNS_OFFSET(%ecx), rPC
+    lea     (rPC, %eax, 2), rPC
+    EXPORT_PC
+
+    /* Starting ibase */
+    REFRESH_IBASE
+
+    /* start executing the instruction at rPC */
+    FETCH_INST
+    GOTO_NEXT
+    /* NOTE: no fallthrough */
+
+
+    .global artMterpAsmInstructionStart
+    .type   artMterpAsmInstructionStart, %function
+artMterpAsmInstructionStart = .L_op_nop
+    .text
+
+/* ------------------------------ */
+    .balign 128
+.L_op_nop: /* 0x00 */
+/* File: x86/op_nop.S */
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move: /* 0x01 */
+/* File: x86/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- BA
+    andb    $0xf, %al                      # eax <- A
+    shrl    $4, rINST                      # rINST <- B
+    GET_VREG rINST rINST
+    .if 0
+    SET_VREG_OBJECT rINST %eax              # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST %eax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_from16: /* 0x02 */
+/* File: x86/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    movzx   rINSTbl, %eax                   # eax <- AA
+    movw    2(rPC), rINSTw                  # rINSTw <- BBBB
+    GET_VREG rINST rINST                    # rINST <- fp[BBBB]
+    .if 0
+    SET_VREG_OBJECT rINST %eax              # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST %eax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_16: /* 0x03 */
+/* File: x86/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    movzwl  4(rPC), %ecx                    # ecx <- BBBB
+    movzwl  2(rPC), %eax                    # eax <- AAAA
+    GET_VREG rINST %ecx
+    .if 0
+    SET_VREG_OBJECT rINST %eax              # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST %eax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide: /* 0x04 */
+/* File: x86/op_move_wide.S */
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    GET_WIDE_FP_VREG %xmm0 rINST            # xmm0 <- v[B]
+    SET_WIDE_FP_VREG %xmm0 %ecx             # v[A] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_from16: /* 0x05 */
+/* File: x86/op_move_wide_from16.S */
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzwl  2(rPC), %ecx                    # ecx <- BBBB
+    movzbl  rINSTbl, %eax                   # eax <- AAAA
+    GET_WIDE_FP_VREG %xmm0 %ecx             # xmm0 <- v[B]
+    SET_WIDE_FP_VREG %xmm0 %eax             # v[A] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_16: /* 0x06 */
+/* File: x86/op_move_wide_16.S */
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzwl  4(rPC), %ecx                    # ecx<- BBBB
+    movzwl  2(rPC), %eax                    # eax<- AAAA
+    GET_WIDE_FP_VREG %xmm0 %ecx             # xmm0 <- v[B]
+    SET_WIDE_FP_VREG %xmm0 %eax             # v[A] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object: /* 0x07 */
+/* File: x86/op_move_object.S */
+/* File: x86/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- BA
+    andb    $0xf, %al                      # eax <- A
+    shrl    $4, rINST                      # rINST <- B
+    GET_VREG rINST rINST
+    .if 1
+    SET_VREG_OBJECT rINST %eax              # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST %eax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_from16: /* 0x08 */
+/* File: x86/op_move_object_from16.S */
+/* File: x86/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    movzx   rINSTbl, %eax                   # eax <- AA
+    movw    2(rPC), rINSTw                  # rINSTw <- BBBB
+    GET_VREG rINST rINST                    # rINST <- fp[BBBB]
+    .if 1
+    SET_VREG_OBJECT rINST %eax              # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST %eax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_16: /* 0x09 */
+/* File: x86/op_move_object_16.S */
+/* File: x86/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    movzwl  4(rPC), %ecx                    # ecx <- BBBB
+    movzwl  2(rPC), %eax                    # eax <- AAAA
+    GET_VREG rINST %ecx
+    .if 1
+    SET_VREG_OBJECT rINST %eax              # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST %eax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result: /* 0x0a */
+/* File: x86/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    movl    OFF_FP_RESULT_REGISTER(rFP), %eax    # get pointer to result JType.
+    movl    (%eax), %eax                    # r0 <- result.i.
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- fp[B]
+    .else
+    SET_VREG %eax rINST                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_wide: /* 0x0b */
+/* File: x86/op_move_result_wide.S */
+    /* move-result-wide vAA */
+    movl    OFF_FP_RESULT_REGISTER(rFP), %eax    # get pointer to result JType.
+    movl    4(%eax), %ecx                   # Get high
+    movl    (%eax), %eax                    # Get low
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    SET_VREG_HIGH %ecx rINST                # v[AA+1] <- ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_object: /* 0x0c */
+/* File: x86/op_move_result_object.S */
+/* File: x86/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    movl    OFF_FP_RESULT_REGISTER(rFP), %eax    # get pointer to result JType.
+    movl    (%eax), %eax                    # r0 <- result.i.
+    .if 1
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- fp[B]
+    .else
+    SET_VREG %eax rINST                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_exception: /* 0x0d */
+/* File: x86/op_move_exception.S */
+    /* move-exception vAA */
+    movl    rSELF, %ecx
+    movl    THREAD_EXCEPTION_OFFSET(%ecx), %eax
+    SET_VREG_OBJECT %eax rINST              # fp[AA] <- exception object
+    movl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void: /* 0x0e */
+/* File: x86/op_return_void.S */
+    .extern MterpThreadFenceForConstructor
+    call    MterpThreadFenceForConstructor
+    movl    rSELF, %eax
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    MterpSuspendCheck
+1:
+    xorl    %eax, %eax
+    xorl    %ecx, %ecx
+    jmp     MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return: /* 0x0f */
+/* File: x86/op_return.S */
+/*
+ * Return a 32-bit value.
+ *
+ * for: return, return-object
+ */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    call    MterpThreadFenceForConstructor
+    movl    rSELF, %eax
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    MterpSuspendCheck
+1:
+    GET_VREG %eax rINST                     # eax <- vAA
+    xorl    %ecx, %ecx
+    jmp     MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_wide: /* 0x10 */
+/* File: x86/op_return_wide.S */
+/*
+ * Return a 64-bit value.
+ */
+    /* return-wide vAA */
+    .extern MterpThreadFenceForConstructor
+    call    MterpThreadFenceForConstructor
+    movl    rSELF, %eax
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    MterpSuspendCheck
+1:
+    GET_VREG %eax rINST                     # eax <- v[AA+0]
+    GET_VREG_HIGH %ecx rINST                # ecx <- v[AA+1]
+    jmp     MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_object: /* 0x11 */
+/* File: x86/op_return_object.S */
+/* File: x86/op_return.S */
+/*
+ * Return a 32-bit value.
+ *
+ * for: return, return-object
+ */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    call    MterpThreadFenceForConstructor
+    movl    rSELF, %eax
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    MterpSuspendCheck
+1:
+    GET_VREG %eax rINST                     # eax <- vAA
+    xorl    %ecx, %ecx
+    jmp     MterpReturn
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_4: /* 0x12 */
+/* File: x86/op_const_4.S */
+    /* const/4 vA, #+B */
+    movsx   rINSTbl, %eax                   # eax <-ssssssBx
+    movl    $0xf, rINST
+    andl    %eax, rINST                     # rINST <- A
+    sarl    $4, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_16: /* 0x13 */
+/* File: x86/op_const_16.S */
+    /* const/16 vAA, #+BBBB */
+    movswl  2(rPC), %ecx                    # ecx <- ssssBBBB
+    SET_VREG %ecx rINST                     # vAA <- ssssBBBB
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const: /* 0x14 */
+/* File: x86/op_const.S */
+    /* const vAA, #+BBBBbbbb */
+    movl    2(rPC), %eax                    # grab all 32 bits at once
+    SET_VREG %eax rINST                     # vAA<- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_high16: /* 0x15 */
+/* File: x86/op_const_high16.S */
+    /* const/high16 vAA, #+BBBB0000 */
+    movzwl  2(rPC), %eax                    # eax <- 0000BBBB
+    sall    $16, %eax                      # eax <- BBBB0000
+    SET_VREG %eax rINST                     # vAA <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_16: /* 0x16 */
+/* File: x86/op_const_wide_16.S */
+    /* const-wide/16 vAA, #+BBBB */
+    movswl  2(rPC), %eax                    # eax <- ssssBBBB
+    movl    rIBASE, %ecx                    # preserve rIBASE (cltd trashes it)
+    cltd                                    # rIBASE:eax <- ssssssssssssBBBB
+    SET_VREG_HIGH rIBASE rINST              # store msw
+    SET_VREG %eax rINST                     # store lsw
+    movl    %ecx, rIBASE                    # restore rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_32: /* 0x17 */
+/* File: x86/op_const_wide_32.S */
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    movl    2(rPC), %eax                    # eax <- BBBBbbbb
+    movl    rIBASE, %ecx                    # preserve rIBASE (cltd trashes it)
+    cltd                                    # rIBASE:eax <- ssssssssssssBBBB
+    SET_VREG_HIGH rIBASE rINST              # store msw
+    SET_VREG %eax rINST                     # store lsw
+    movl    %ecx, rIBASE                    # restore rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide: /* 0x18 */
+/* File: x86/op_const_wide.S */
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    movl    2(rPC), %eax                    # eax <- lsw
+    movzbl  rINSTbl, %ecx                   # ecx <- AA
+    movl    6(rPC), rINST                   # rINST <- msw
+    SET_VREG %eax %ecx
+    SET_VREG_HIGH  rINST %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 5
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_high16: /* 0x19 */
+/* File: x86/op_const_wide_high16.S */
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    movzwl  2(rPC), %eax                    # eax <- 0000BBBB
+    sall    $16, %eax                      # eax <- BBBB0000
+    SET_VREG_HIGH %eax rINST                # v[AA+1] <- eax
+    xorl    %eax, %eax
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string: /* 0x1a */
+/* File: x86/op_const_string.S */
+    /* const/string vAA, String@BBBB */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, OUT_ARG1(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    MterpConstString                # (index, tgt_reg, shadow_frame, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string_jumbo: /* 0x1b */
+/* File: x86/op_const_string_jumbo.S */
+    /* const/string vAA, String@BBBBBBBB */
+    EXPORT_PC
+    movl    2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, OUT_ARG1(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    MterpConstString                # (index, tgt_reg, shadow_frame, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_class: /* 0x1c */
+/* File: x86/op_const_class.S */
+    /* const/class vAA, Class@BBBB */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, OUT_ARG1(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    MterpConstClass                 # (index, tgt_reg, shadow_frame, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_enter: /* 0x1d */
+/* File: x86/op_monitor_enter.S */
+/*
+ * Synchronize on an object.
+ */
+    /* monitor-enter vAA */
+    EXPORT_PC
+    GET_VREG %ecx rINST
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    artLockObjectFromCode           # (object, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_exit: /* 0x1e */
+/* File: x86/op_monitor_exit.S */
+/*
+ * Unlock an object.
+ *
+ * Exceptions that occur when unlocking a monitor need to appear as
+ * if they happened at the following instruction.  See the Dalvik
+ * instruction spec.
+ */
+    /* monitor-exit vAA */
+    EXPORT_PC
+    GET_VREG %ecx rINST
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    artUnlockObjectFromCode         # (object, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_check_cast: /* 0x1f */
+/* File: x86/op_check_cast.S */
+/*
+ * Check to see if a cast from one class to another is allowed.
+ */
+    /* check-cast vAA, class@BBBB */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    leal    VREG_ADDRESS(rINST), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    OFF_FP_METHOD(rFP),%eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    MterpCheckCast                  # (index, &obj, method, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_instance_of: /* 0x20 */
+/* File: x86/op_instance_of.S */
+/*
+ * Check to see if an object reference is an instance of a class.
+ *
+ * Most common situation is a non-null object, being compared against
+ * an already-resolved class.
+ */
+    /* instance-of vA, vB, class@CCCC */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, %eax                     # eax <- BA
+    sarl    $4, %eax                       # eax <- B
+    leal    VREG_ADDRESS(%eax), %ecx        # Get object address
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    OFF_FP_METHOD(rFP),%eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    MterpInstanceOf                 # (index, &obj, method, self)
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    andb    $0xf, rINSTbl                  # rINSTbl <- A
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_array_length: /* 0x21 */
+/* File: x86/op_array_length.S */
+/*
+ * Return the length of an array.
+ */
+    mov     rINST, %eax                     # eax <- BA
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %ecx rINST                     # ecx <- vB (object ref)
+    testl   %ecx, %ecx                      # is null?
+    je      common_errNullObject
+    andb    $0xf, %al                      # eax <- A
+    movl    MIRROR_ARRAY_LENGTH_OFFSET(%ecx), rINST
+    SET_VREG rINST %eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_instance: /* 0x22 */
+/* File: x86/op_new_instance.S */
+/*
+ * Create a new instance of a class.
+ */
+    /* new-instance vAA, class@BBBB */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    REFRESH_INST 34
+    movl    rINST, OUT_ARG2(%esp)
+    call    MterpNewInstance
+    REFRESH_IBASE
+    testl   %eax, %eax                 # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_array: /* 0x23 */
+/* File: x86/op_new_array.S */
+/*
+ * Allocate an array of objects, specified with the array class
+ * and a count.
+ *
+ * The verifier guarantees that this is an array class, so we don't
+ * check for it here.
+ */
+    /* new-array vA, vB, class@CCCC */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST 35
+    movl    rINST, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    MterpNewArray
+    REFRESH_IBASE
+    testl   %eax, %eax                      # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array: /* 0x24 */
+/* File: x86/op_filled_new_array.S */
+/*
+ * Create a new array with elements filled from registers.
+ *
+ * for: filled-new-array, filled-new-array/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+    .extern MterpFilledNewArray
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)
+    call    MterpFilledNewArray
+    REFRESH_IBASE
+    testl   %eax, %eax                      # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array_range: /* 0x25 */
+/* File: x86/op_filled_new_array_range.S */
+/* File: x86/op_filled_new_array.S */
+/*
+ * Create a new array with elements filled from registers.
+ *
+ * for: filled-new-array, filled-new-array/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+    .extern MterpFilledNewArrayRange
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)
+    call    MterpFilledNewArrayRange
+    REFRESH_IBASE
+    testl   %eax, %eax                      # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_fill_array_data: /* 0x26 */
+/* File: x86/op_fill_array_data.S */
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC
+    movl    2(rPC), %ecx                    # ecx <- BBBBbbbb
+    leal    (rPC,%ecx,2), %ecx              # ecx <- PC + BBBBbbbb*2
+    GET_VREG %eax rINST                     # eax <- vAA (array object)
+    movl    %eax, OUT_ARG0(%esp)
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpFillArrayData              # (obj, payload)
+    REFRESH_IBASE
+    testl   %eax, %eax                      # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_throw: /* 0x27 */
+/* File: x86/op_throw.S */
+/*
+ * Throw an exception object in the current thread.
+ */
+    /* throw vAA */
+    EXPORT_PC
+    GET_VREG %eax rINST                     # eax<- vAA (exception object)
+    testl   %eax, %eax
+    jz      common_errNullObject
+    movl    rSELF,%ecx
+    movl    %eax, THREAD_EXCEPTION_OFFSET(%ecx)
+    jmp     MterpException
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto: /* 0x28 */
+/* File: x86/op_goto.S */
+/*
+ * Unconditional branch, 8-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ */
+    /* goto +AA */
+    movsbl  rINSTbl, %eax                   # eax <- ssssssAA
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      1f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+1:
+    GOTO_NEXT
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_16: /* 0x29 */
+/* File: x86/op_goto_16.S */
+/*
+ * Unconditional branch, 16-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ */
+    /* goto/16 +AAAA */
+    movswl  2(rPC), %eax                    # eax <- ssssAAAA
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      1f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+1:
+    GOTO_NEXT
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_32: /* 0x2a */
+/* File: x86/op_goto_32.S */
+/*
+ * Unconditional branch, 32-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ *
+ * Unlike most opcodes, this one is allowed to branch to itself, so
+ * our "backward branch" test must be "<=0" instead of "<0".  Because
+ * we need the V bit set, we'll use an adds to convert from Dalvik
+ * offset to byte offset.
+ */
+    /* goto/32 +AAAAAAAA */
+    movl    2(rPC), %eax                    # eax <- AAAAAAAA
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      1f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+1:
+    GOTO_NEXT
+
+/* ------------------------------ */
+    .balign 128
+.L_op_packed_switch: /* 0x2b */
+/* File: x86/op_packed_switch.S */
+/*
+ * Handle a packed-switch or sparse-switch instruction.  In both cases
+ * we decode it and hand it off to a helper function.
+ *
+ * We don't really expect backward branches in a switch statement, but
+ * they're perfectly legal, so we check for them here.
+ *
+ * for: packed-switch, sparse-switch
+ */
+    /* op vAA, +BBBB */
+    movl    2(rPC), %ecx                    # ecx <- BBBBbbbb
+    GET_VREG %eax rINST                     # eax <- vAA
+    leal    (rPC,%ecx,2), %ecx              # ecx <- PC + BBBBbbbb*2
+    movl    %eax, OUT_ARG1(%esp)            # ARG1 <- vAA
+    movl    %ecx, OUT_ARG0(%esp)            # ARG0 <- switchData
+    call    MterpDoPackedSwitch
+    addl    %eax, %eax
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    REFRESH_IBASE
+    jg      1f
+#if MTERP_SUSPEND
+    #     REFRESH_IBASE - we did it above.
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+1:
+    GOTO_NEXT
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sparse_switch: /* 0x2c */
+/* File: x86/op_sparse_switch.S */
+/* File: x86/op_packed_switch.S */
+/*
+ * Handle a packed-switch or sparse-switch instruction.  In both cases
+ * we decode it and hand it off to a helper function.
+ *
+ * We don't really expect backward branches in a switch statement, but
+ * they're perfectly legal, so we check for them here.
+ *
+ * for: packed-switch, sparse-switch
+ */
+    /* op vAA, +BBBB */
+    movl    2(rPC), %ecx                    # ecx <- BBBBbbbb
+    GET_VREG %eax rINST                     # eax <- vAA
+    leal    (rPC,%ecx,2), %ecx              # ecx <- PC + BBBBbbbb*2
+    movl    %eax, OUT_ARG1(%esp)            # ARG1 <- vAA
+    movl    %ecx, OUT_ARG0(%esp)            # ARG0 <- switchData
+    call    MterpDoSparseSwitch
+    addl    %eax, %eax
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    REFRESH_IBASE
+    jg      1f
+#if MTERP_SUSPEND
+    #     REFRESH_IBASE - we did it above.
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+1:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_float: /* 0x2d */
+/* File: x86/op_cmpl_float.S */
+/* File: x86/fpcmp.S */
+/*
+ * Compare two floating-point values.  Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ *     if (x == y) {
+ *         return 0;
+ *     } else if (x < y) {
+ *         return -1;
+ *     } else if (x > y) {
+ *         return 1;
+ *     } else {
+ *         return nanval ? 1 : -1;
+ *     }
+ * }
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx<- CC
+    movzbl  2(rPC), %eax                    # eax<- BB
+    movss VREG_ADDRESS(%eax), %xmm0
+    xor     %eax, %eax
+    ucomiss VREG_ADDRESS(%ecx), %xmm0
+    jp      .Lop_cmpl_float_nan_is_neg
+    je      .Lop_cmpl_float_finish
+    jb      .Lop_cmpl_float_less
+.Lop_cmpl_float_nan_is_pos:
+    incl    %eax
+    jmp     .Lop_cmpl_float_finish
+.Lop_cmpl_float_nan_is_neg:
+.Lop_cmpl_float_less:
+    decl    %eax
+.Lop_cmpl_float_finish:
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_float: /* 0x2e */
+/* File: x86/op_cmpg_float.S */
+/* File: x86/fpcmp.S */
+/*
+ * Compare two floating-point values.  Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ *     if (x == y) {
+ *         return 0;
+ *     } else if (x < y) {
+ *         return -1;
+ *     } else if (x > y) {
+ *         return 1;
+ *     } else {
+ *         return nanval ? 1 : -1;
+ *     }
+ * }
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx<- CC
+    movzbl  2(rPC), %eax                    # eax<- BB
+    movss VREG_ADDRESS(%eax), %xmm0
+    xor     %eax, %eax
+    ucomiss VREG_ADDRESS(%ecx), %xmm0
+    jp      .Lop_cmpg_float_nan_is_pos
+    je      .Lop_cmpg_float_finish
+    jb      .Lop_cmpg_float_less
+.Lop_cmpg_float_nan_is_pos:
+    incl    %eax
+    jmp     .Lop_cmpg_float_finish
+.Lop_cmpg_float_nan_is_neg:
+.Lop_cmpg_float_less:
+    decl    %eax
+.Lop_cmpg_float_finish:
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_double: /* 0x2f */
+/* File: x86/op_cmpl_double.S */
+/* File: x86/fpcmp.S */
+/*
+ * Compare two floating-point values.  Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ *     if (x == y) {
+ *         return 0;
+ *     } else if (x < y) {
+ *         return -1;
+ *     } else if (x > y) {
+ *         return 1;
+ *     } else {
+ *         return nanval ? 1 : -1;
+ *     }
+ * }
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx<- CC
+    movzbl  2(rPC), %eax                    # eax<- BB
+    movsd VREG_ADDRESS(%eax), %xmm0
+    xor     %eax, %eax
+    ucomisd VREG_ADDRESS(%ecx), %xmm0
+    jp      .Lop_cmpl_double_nan_is_neg
+    je      .Lop_cmpl_double_finish
+    jb      .Lop_cmpl_double_less
+.Lop_cmpl_double_nan_is_pos:
+    incl    %eax
+    jmp     .Lop_cmpl_double_finish
+.Lop_cmpl_double_nan_is_neg:
+.Lop_cmpl_double_less:
+    decl    %eax
+.Lop_cmpl_double_finish:
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_double: /* 0x30 */
+/* File: x86/op_cmpg_double.S */
+/* File: x86/fpcmp.S */
+/*
+ * Compare two floating-point values.  Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ *     if (x == y) {
+ *         return 0;
+ *     } else if (x < y) {
+ *         return -1;
+ *     } else if (x > y) {
+ *         return 1;
+ *     } else {
+ *         return nanval ? 1 : -1;
+ *     }
+ * }
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx<- CC
+    movzbl  2(rPC), %eax                    # eax<- BB
+    movsd VREG_ADDRESS(%eax), %xmm0
+    xor     %eax, %eax
+    ucomisd VREG_ADDRESS(%ecx), %xmm0
+    jp      .Lop_cmpg_double_nan_is_pos
+    je      .Lop_cmpg_double_finish
+    jb      .Lop_cmpg_double_less
+.Lop_cmpg_double_nan_is_pos:
+    incl    %eax
+    jmp     .Lop_cmpg_double_finish
+.Lop_cmpg_double_nan_is_neg:
+.Lop_cmpg_double_less:
+    decl    %eax
+.Lop_cmpg_double_finish:
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmp_long: /* 0x31 */
+/* File: x86/op_cmp_long.S */
+/*
+ * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
+ * register based on the results of the comparison.
+ */
+    /* cmp-long vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG_HIGH %eax %eax                 # eax <- v[BB+1], BB is clobbered
+    cmpl    VREG_HIGH_ADDRESS(%ecx), %eax
+    jl      .Lop_cmp_long_smaller
+    jg      .Lop_cmp_long_bigger
+    movzbl  2(rPC), %eax                    # eax <- BB, restore BB
+    GET_VREG %eax %eax                      # eax <- v[BB]
+    sub     VREG_ADDRESS(%ecx), %eax
+    ja      .Lop_cmp_long_bigger
+    jb      .Lop_cmp_long_smaller
+.Lop_cmp_long_finish:
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.Lop_cmp_long_bigger:
+    movl    $1, %eax
+    jmp     .Lop_cmp_long_finish
+
+.Lop_cmp_long_smaller:
+    movl    $-1, %eax
+    jmp     .Lop_cmp_long_finish
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eq: /* 0x32 */
+/* File: x86/op_if_eq.S */
+/* File: x86/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax %ecx                      # eax <- vA
+    sarl    $4, rINST                      # rINST <- B
+    cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    movl    $2, %eax                       # assume not taken
+    jne   1f
+    movswl  2(rPC),%eax                     # Get signed branch offset
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ne: /* 0x33 */
+/* File: x86/op_if_ne.S */
+/* File: x86/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax %ecx                      # eax <- vA
+    sarl    $4, rINST                      # rINST <- B
+    cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    movl    $2, %eax                       # assume not taken
+    je   1f
+    movswl  2(rPC),%eax                     # Get signed branch offset
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lt: /* 0x34 */
+/* File: x86/op_if_lt.S */
+/* File: x86/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax %ecx                      # eax <- vA
+    sarl    $4, rINST                      # rINST <- B
+    cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    movl    $2, %eax                       # assume not taken
+    jge   1f
+    movswl  2(rPC),%eax                     # Get signed branch offset
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ge: /* 0x35 */
+/* File: x86/op_if_ge.S */
+/* File: x86/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax %ecx                      # eax <- vA
+    sarl    $4, rINST                      # rINST <- B
+    cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    movl    $2, %eax                       # assume not taken
+    jl   1f
+    movswl  2(rPC),%eax                     # Get signed branch offset
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gt: /* 0x36 */
+/* File: x86/op_if_gt.S */
+/* File: x86/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax %ecx                      # eax <- vA
+    sarl    $4, rINST                      # rINST <- B
+    cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    movl    $2, %eax                       # assume not taken
+    jle   1f
+    movswl  2(rPC),%eax                     # Get signed branch offset
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_le: /* 0x37 */
+/* File: x86/op_if_le.S */
+/* File: x86/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax %ecx                      # eax <- vA
+    sarl    $4, rINST                      # rINST <- B
+    cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    movl    $2, %eax                       # assume not taken
+    jg   1f
+    movswl  2(rPC),%eax                     # Get signed branch offset
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eqz: /* 0x38 */
+/* File: x86/op_if_eqz.S */
+/* File: x86/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    movl    $2, %eax                       # assume branch not taken
+    jne   1f
+    movswl  2(rPC),%eax                     # fetch signed displacement
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_nez: /* 0x39 */
+/* File: x86/op_if_nez.S */
+/* File: x86/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    movl    $2, %eax                       # assume branch not taken
+    je   1f
+    movswl  2(rPC),%eax                     # fetch signed displacement
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ltz: /* 0x3a */
+/* File: x86/op_if_ltz.S */
+/* File: x86/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    movl    $2, %eax                       # assume branch not taken
+    jge   1f
+    movswl  2(rPC),%eax                     # fetch signed displacement
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gez: /* 0x3b */
+/* File: x86/op_if_gez.S */
+/* File: x86/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    movl    $2, %eax                       # assume branch not taken
+    jl   1f
+    movswl  2(rPC),%eax                     # fetch signed displacement
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gtz: /* 0x3c */
+/* File: x86/op_if_gtz.S */
+/* File: x86/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    movl    $2, %eax                       # assume branch not taken
+    jle   1f
+    movswl  2(rPC),%eax                     # fetch signed displacement
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lez: /* 0x3d */
+/* File: x86/op_if_lez.S */
+/* File: x86/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    movl    $2, %eax                       # assume branch not taken
+    jg   1f
+    movswl  2(rPC),%eax                     # fetch signed displacement
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3e: /* 0x3e */
+/* File: x86/op_unused_3e.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3f: /* 0x3f */
+/* File: x86/op_unused_3f.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_40: /* 0x40 */
+/* File: x86/op_unused_40.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_41: /* 0x41 */
+/* File: x86/op_unused_41.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_42: /* 0x42 */
+/* File: x86/op_unused_42.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_43: /* 0x43 */
+/* File: x86/op_unused_43.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget: /* 0x44 */
+/* File: x86/op_aget.S */
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    movl   MIRROR_INT_ARRAY_DATA_OFFSET(%eax,%ecx,4), %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_wide: /* 0x45 */
+/* File: x86/op_aget_wide.S */
+/*
+ * Array get, 64 bits.  vAA <- vBB[vCC].
+ */
+    /* aget-wide vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_WIDE_ARRAY_DATA_OFFSET(%eax,%ecx,8), %eax
+    movq    (%eax), %xmm0                   # xmm0 <- vBB[vCC]
+    SET_WIDE_FP_VREG %xmm0 rINST            # vAA <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_object: /* 0x46 */
+/* File: x86/op_aget_object.S */
+/*
+ * Array object get.  vAA <- vBB[vCC].
+ *
+ * for: aget-object
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecs <- vCC (requested index)
+    EXPORT_PC
+    movl    %eax, OUT_ARG0(%esp)
+    movl    %ecx, OUT_ARG1(%esp)
+    call    artAGetObjectFromMterp          # (array, index)
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    SET_VREG_OBJECT %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_boolean: /* 0x47 */
+/* File: x86/op_aget_boolean.S */
+/* File: x86/op_aget.S */
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    movzbl   MIRROR_BOOLEAN_ARRAY_DATA_OFFSET(%eax,%ecx,1), %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_byte: /* 0x48 */
+/* File: x86/op_aget_byte.S */
+/* File: x86/op_aget.S */
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    movsbl   MIRROR_BYTE_ARRAY_DATA_OFFSET(%eax,%ecx,1), %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_char: /* 0x49 */
+/* File: x86/op_aget_char.S */
+/* File: x86/op_aget.S */
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    movzwl   MIRROR_CHAR_ARRAY_DATA_OFFSET(%eax,%ecx,2), %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_short: /* 0x4a */
+/* File: x86/op_aget_short.S */
+/* File: x86/op_aget.S */
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    movswl   MIRROR_SHORT_ARRAY_DATA_OFFSET(%eax,%ecx,2), %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput: /* 0x4b */
+/* File: x86/op_aput.S */
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_INT_ARRAY_DATA_OFFSET(%eax,%ecx,4), %eax
+    GET_VREG rINST rINST
+    movl  rINST, (%eax)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_wide: /* 0x4c */
+/* File: x86/op_aput_wide.S */
+/*
+ * Array put, 64 bits.  vBB[vCC] <- vAA.
+ *
+ */
+    /* aput-wide vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_WIDE_ARRAY_DATA_OFFSET(%eax,%ecx,8), %eax
+    GET_WIDE_FP_VREG %xmm0 rINST            # xmm0 <- vAA
+    movq    %xmm0, (%eax)                   # vBB[vCC] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_object: /* 0x4d */
+/* File: x86/op_aput_object.S */
+/*
+ * Store an object into an array.  vBB[vCC] <- vAA.
+ */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST 77
+    movl    rINST, OUT_ARG2(%esp)
+    call    MterpAputObject            # (array, index)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_boolean: /* 0x4e */
+/* File: x86/op_aput_boolean.S */
+/* File: x86/op_aput.S */
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_BOOLEAN_ARRAY_DATA_OFFSET(%eax,%ecx,1), %eax
+    GET_VREG rINST rINST
+    movb  rINSTbl, (%eax)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_byte: /* 0x4f */
+/* File: x86/op_aput_byte.S */
+/* File: x86/op_aput.S */
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_BYTE_ARRAY_DATA_OFFSET(%eax,%ecx,1), %eax
+    GET_VREG rINST rINST
+    movb  rINSTbl, (%eax)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_char: /* 0x50 */
+/* File: x86/op_aput_char.S */
+/* File: x86/op_aput.S */
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_CHAR_ARRAY_DATA_OFFSET(%eax,%ecx,2), %eax
+    GET_VREG rINST rINST
+    movw  rINSTw, (%eax)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_short: /* 0x51 */
+/* File: x86/op_aput_short.S */
+/* File: x86/op_aput.S */
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_SHORT_ARRAY_DATA_OFFSET(%eax,%ecx,2), %eax
+    GET_VREG rINST rINST
+    movw  rINSTw, (%eax)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget: /* 0x52 */
+/* File: x86/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artGet32InstanceFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <-value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <-value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide: /* 0x53 */
+/* File: x86/op_iget_wide.S */
+/*
+ * 64-bit instance field get.
+ *
+ * for: iget-wide
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artGet64InstanceFromCode
+    mov     rSELF, %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    SET_VREG %eax rINST
+    SET_VREG_HIGH %edx rINST
+    REFRESH_IBASE_FROM_SELF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object: /* 0x54 */
+/* File: x86/op_iget_object.S */
+/* File: x86/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artGetObjInstanceFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 1
+    SET_VREG_OBJECT %eax rINST              # fp[A] <-value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <-value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean: /* 0x55 */
+/* File: x86/op_iget_boolean.S */
+/* File: x86/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artGetBooleanInstanceFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <-value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <-value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte: /* 0x56 */
+/* File: x86/op_iget_byte.S */
+/* File: x86/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artGetByteInstanceFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <-value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <-value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char: /* 0x57 */
+/* File: x86/op_iget_char.S */
+/* File: x86/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artGetCharInstanceFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <-value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <-value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short: /* 0x58 */
+/* File: x86/op_iget_short.S */
+/* File: x86/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artGetShortInstanceFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <-value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <-value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput: /* 0x59 */
+/* File: x86/op_iput.S */
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern artSet32InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $0xf, rINSTbl                  # rINST<- A
+    GET_VREG %eax, rINST
+    movl    %eax, OUT_ARG2(%esp)            # fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    artSet32InstanceFromMterp
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide: /* 0x5a */
+/* File: x86/op_iput_wide.S */
+    /* iput-wide vA, vB, field@CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl,%ecx                    # ecx <- BA
+    sarl    $4,%ecx                        # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $0xf,rINSTbl                   # rINST <- A
+    leal    VREG_ADDRESS(rINST), %eax
+    movl    %eax, OUT_ARG2(%esp)            # &fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    artSet64InstanceFromMterp
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object: /* 0x5b */
+/* File: x86/op_iput_object.S */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST 91
+    movl    rINST, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    MterpIputObject
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean: /* 0x5c */
+/* File: x86/op_iput_boolean.S */
+/* File: x86/op_iput.S */
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $0xf, rINSTbl                  # rINST<- A
+    GET_VREG %eax, rINST
+    movl    %eax, OUT_ARG2(%esp)            # fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    artSet8InstanceFromMterp
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte: /* 0x5d */
+/* File: x86/op_iput_byte.S */
+/* File: x86/op_iput.S */
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $0xf, rINSTbl                  # rINST<- A
+    GET_VREG %eax, rINST
+    movl    %eax, OUT_ARG2(%esp)            # fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    artSet8InstanceFromMterp
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char: /* 0x5e */
+/* File: x86/op_iput_char.S */
+/* File: x86/op_iput.S */
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $0xf, rINSTbl                  # rINST<- A
+    GET_VREG %eax, rINST
+    movl    %eax, OUT_ARG2(%esp)            # fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    artSet16InstanceFromMterp
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short: /* 0x5f */
+/* File: x86/op_iput_short.S */
+/* File: x86/op_iput.S */
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $0xf, rINSTbl                  # rINST<- A
+    GET_VREG %eax, rINST
+    movl    %eax, OUT_ARG2(%esp)            # fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    artSet16InstanceFromMterp
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget: /* 0x60 */
+/* File: x86/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artGet32StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    artGet32StaticFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_wide: /* 0x61 */
+/* File: x86/op_sget_wide.S */
+/*
+ * SGET_WIDE handler wrapper.
+ *
+ */
+    /* sget-wide vAA, field@BBBB */
+    .extern artGet64StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    artGet64StaticFromCode
+    movl    rSELF, %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    SET_VREG %eax rINST                     # fp[A]<- low part
+    SET_VREG_HIGH %edx rINST                # fp[A+1]<- high part
+    REFRESH_IBASE_FROM_SELF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_object: /* 0x62 */
+/* File: x86/op_sget_object.S */
+/* File: x86/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artGetObjStaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    artGetObjStaticFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    .if 1
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_boolean: /* 0x63 */
+/* File: x86/op_sget_boolean.S */
+/* File: x86/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artGetBooleanStaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    artGetBooleanStaticFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_byte: /* 0x64 */
+/* File: x86/op_sget_byte.S */
+/* File: x86/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artGetByteStaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    artGetByteStaticFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_char: /* 0x65 */
+/* File: x86/op_sget_char.S */
+/* File: x86/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artGetCharStaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    artGetCharStaticFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_short: /* 0x66 */
+/* File: x86/op_sget_short.S */
+/* File: x86/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artGetShortStaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    artGetShortStaticFromCode
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    .if 0
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput: /* 0x67 */
+/* File: x86/op_sput.S */
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artSet32StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    GET_VREG rINST rINST
+    movl    rINST, OUT_ARG1(%esp)           # fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artSet32StaticFromCode
+    testl   %eax, %eax
+    jnz     MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_wide: /* 0x68 */
+/* File: x86/op_sput_wide.S */
+/*
+ * SPUT_WIDE handler wrapper.
+ *
+ */
+    /* sput-wide vAA, field@BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    leal    VREG_ADDRESS(rINST), %eax
+    movl    %eax, OUT_ARG2(%esp)            # &fp[AA]
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artSet64IndirectStaticFromMterp
+    testl   %eax, %eax
+    jnz     MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_object: /* 0x69 */
+/* File: x86/op_sput_object.S */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST 105
+    movl    rINST, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    MterpSputObject
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_boolean: /* 0x6a */
+/* File: x86/op_sput_boolean.S */
+/* File: x86/op_sput.S */
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artSet8StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    GET_VREG rINST rINST
+    movl    rINST, OUT_ARG1(%esp)           # fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artSet8StaticFromCode
+    testl   %eax, %eax
+    jnz     MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_byte: /* 0x6b */
+/* File: x86/op_sput_byte.S */
+/* File: x86/op_sput.S */
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artSet8StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    GET_VREG rINST rINST
+    movl    rINST, OUT_ARG1(%esp)           # fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artSet8StaticFromCode
+    testl   %eax, %eax
+    jnz     MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_char: /* 0x6c */
+/* File: x86/op_sput_char.S */
+/* File: x86/op_sput.S */
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artSet16StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    GET_VREG rINST rINST
+    movl    rINST, OUT_ARG1(%esp)           # fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artSet16StaticFromCode
+    testl   %eax, %eax
+    jnz     MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_short: /* 0x6d */
+/* File: x86/op_sput_short.S */
+/* File: x86/op_sput.S */
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artSet16StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    GET_VREG rINST rINST
+    movl    rINST, OUT_ARG1(%esp)           # fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artSet16StaticFromCode
+    testl   %eax, %eax
+    jnz     MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual: /* 0x6e */
+/* File: x86/op_invoke_virtual.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtual
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 110
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeVirtual
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/*
+ * Handle a virtual method call.
+ *
+ * for: invoke-virtual, invoke-virtual/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super: /* 0x6f */
+/* File: x86/op_invoke_super.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuper
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 111
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeSuper
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/*
+ * Handle a "super" method call.
+ *
+ * for: invoke-super, invoke-super/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct: /* 0x70 */
+/* File: x86/op_invoke_direct.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirect
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 112
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeDirect
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static: /* 0x71 */
+/* File: x86/op_invoke_static.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStatic
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 113
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeStatic
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface: /* 0x72 */
+/* File: x86/op_invoke_interface.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterface
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 114
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeInterface
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/*
+ * Handle an interface method call.
+ *
+ * for: invoke-interface, invoke-interface/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void_no_barrier: /* 0x73 */
+/* File: x86/op_return_void_no_barrier.S */
+    movl    rSELF, %eax
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    MterpSuspendCheck
+1:
+    xorl    %eax, %eax
+    xorl    %ecx, %ecx
+    jmp     MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range: /* 0x74 */
+/* File: x86/op_invoke_virtual_range.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualRange
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 116
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeVirtualRange
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super_range: /* 0x75 */
+/* File: x86/op_invoke_super_range.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuperRange
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 117
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeSuperRange
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct_range: /* 0x76 */
+/* File: x86/op_invoke_direct_range.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirectRange
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 118
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeDirectRange
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static_range: /* 0x77 */
+/* File: x86/op_invoke_static_range.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStaticRange
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 119
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeStaticRange
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface_range: /* 0x78 */
+/* File: x86/op_invoke_interface_range.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterfaceRange
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 120
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeInterfaceRange
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_79: /* 0x79 */
+/* File: x86/op_unused_79.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_7a: /* 0x7a */
+/* File: x86/op_unused_7a.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_int: /* 0x7b */
+/* File: x86/op_neg_int.S */
+/* File: x86/unop.S */
+/*
+ * Generic 32-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf,%cl                       # ecx <- A
+    negl    %eax
+    SET_VREG %eax %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_int: /* 0x7c */
+/* File: x86/op_not_int.S */
+/* File: x86/unop.S */
+/*
+ * Generic 32-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf,%cl                       # ecx <- A
+    notl %eax
+    SET_VREG %eax %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_long: /* 0x7d */
+/* File: x86/op_neg_long.S */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax %ecx                      # eax <- v[B+0]
+    GET_VREG_HIGH %ecx %ecx                 # ecx <- v[B+1]
+    negl    %eax
+    adcl    $0, %ecx
+    negl    %ecx
+    SET_VREG %eax rINST                     # v[A+0] <- eax
+    SET_VREG_HIGH %ecx rINST                # v[A+1] <- ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_long: /* 0x7e */
+/* File: x86/op_not_long.S */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax %ecx                      # eax <- v[B+0]
+    GET_VREG_HIGH %ecx %ecx                 # ecx <- v[B+1]
+    notl    %eax
+    notl    %ecx
+    SET_VREG %eax rINST                     # v[A+0] <- eax
+    SET_VREG_HIGH %ecx rINST                # v[A+1] <- ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_float: /* 0x7f */
+/* File: x86/op_neg_float.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    flds   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    fchs
+    fstps  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 0
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_double: /* 0x80 */
+/* File: x86/op_neg_double.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fldl   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    fchs
+    fstpl  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 1
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_long: /* 0x81 */
+/* File: x86/op_int_to_long.S */
+    /* int to long vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- +A
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    movl    rIBASE, %ecx                    # cltd trashes rIBASE/edx
+    cltd                                    # rINST:eax<- sssssssBBBBBBBB
+    SET_VREG_HIGH rIBASE rINST              # v[A+1] <- rIBASE
+    SET_VREG %eax rINST                     # v[A+0] <- %eax
+    movl    %ecx, rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_float: /* 0x82 */
+/* File: x86/op_int_to_float.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fildl   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    
+    fstps  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 0
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_double: /* 0x83 */
+/* File: x86/op_int_to_double.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fildl   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    
+    fstpl  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 1
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_int: /* 0x84 */
+/* File: x86/op_long_to_int.S */
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+/* File: x86/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- BA
+    andb    $0xf, %al                      # eax <- A
+    shrl    $4, rINST                      # rINST <- B
+    GET_VREG rINST rINST
+    .if 0
+    SET_VREG_OBJECT rINST %eax              # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST %eax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_float: /* 0x85 */
+/* File: x86/op_long_to_float.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fildll   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    
+    fstps  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 0
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_double: /* 0x86 */
+/* File: x86/op_long_to_double.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fildll   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    
+    fstpl  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 1
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_int: /* 0x87 */
+/* File: x86/op_float_to_int.S */
+/* File: x86/cvtfp_int.S */
+/* On fp to int conversions, Java requires that
+ * if the result > maxint, it should be clamped to maxint.  If it is less
+ * than minint, it should be clamped to minint.  If it is a nan, the result
+ * should be zero.  Further, the rounding mode is to truncate.  This model
+ * differs from what is delivered normally via the x86 fpu, so we have
+ * to play some games.
+ */
+    /* float/double to int/long vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    .if 0
+    fldl    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .else
+    flds    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .endif
+    ftst
+    fnstcw  LOCAL0(%esp)                    # remember original rounding mode
+    movzwl  LOCAL0(%esp), %eax
+    movb    $0xc, %ah
+    movw    %ax, LOCAL0+2(%esp)
+    fldcw   LOCAL0+2(%esp)                  # set "to zero" rounding mode
+    andb    $0xf, %cl                      # ecx <- A
+    .if 0
+    fistpll VREG_ADDRESS(%ecx)              # convert and store
+    .else
+    fistpl  VREG_ADDRESS(%ecx)              # convert and store
+    .endif
+    fldcw   LOCAL0(%esp)                    # restore previous rounding mode
+    .if 0
+    movl    $0x80000000, %eax
+    xorl    VREG_HIGH_ADDRESS(%ecx), %eax
+    orl     VREG_ADDRESS(%ecx), %eax
+    .else
+    cmpl    $0x80000000, VREG_ADDRESS(%ecx)
+    .endif
+    je      .Lop_float_to_int_special_case # fix up result
+
+.Lop_float_to_int_finish:
+    xor     %eax, %eax
+    mov     %eax, VREG_REF_ADDRESS(%ecx)
+    .if 0
+    mov     %eax, VREG_REF_HIGH_ADDRESS(%ecx)
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.Lop_float_to_int_special_case:
+    fnstsw  %ax
+    sahf
+    jp      .Lop_float_to_int_isNaN
+    adcl    $-1, VREG_ADDRESS(%ecx)
+    .if 0
+    adcl    $-1, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+   jmp      .Lop_float_to_int_finish
+.Lop_float_to_int_isNaN:
+    movl    $0, VREG_ADDRESS(%ecx)
+    .if 0
+    movl    $0, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+    jmp     .Lop_float_to_int_finish
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_long: /* 0x88 */
+/* File: x86/op_float_to_long.S */
+/* File: x86/cvtfp_int.S */
+/* On fp to int conversions, Java requires that
+ * if the result > maxint, it should be clamped to maxint.  If it is less
+ * than minint, it should be clamped to minint.  If it is a nan, the result
+ * should be zero.  Further, the rounding mode is to truncate.  This model
+ * differs from what is delivered normally via the x86 fpu, so we have
+ * to play some games.
+ */
+    /* float/double to int/long vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    .if 0
+    fldl    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .else
+    flds    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .endif
+    ftst
+    fnstcw  LOCAL0(%esp)                    # remember original rounding mode
+    movzwl  LOCAL0(%esp), %eax
+    movb    $0xc, %ah
+    movw    %ax, LOCAL0+2(%esp)
+    fldcw   LOCAL0+2(%esp)                  # set "to zero" rounding mode
+    andb    $0xf, %cl                      # ecx <- A
+    .if 1
+    fistpll VREG_ADDRESS(%ecx)              # convert and store
+    .else
+    fistpl  VREG_ADDRESS(%ecx)              # convert and store
+    .endif
+    fldcw   LOCAL0(%esp)                    # restore previous rounding mode
+    .if 1
+    movl    $0x80000000, %eax
+    xorl    VREG_HIGH_ADDRESS(%ecx), %eax
+    orl     VREG_ADDRESS(%ecx), %eax
+    .else
+    cmpl    $0x80000000, VREG_ADDRESS(%ecx)
+    .endif
+    je      .Lop_float_to_long_special_case # fix up result
+
+.Lop_float_to_long_finish:
+    xor     %eax, %eax
+    mov     %eax, VREG_REF_ADDRESS(%ecx)
+    .if 1
+    mov     %eax, VREG_REF_HIGH_ADDRESS(%ecx)
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.Lop_float_to_long_special_case:
+    fnstsw  %ax
+    sahf
+    jp      .Lop_float_to_long_isNaN
+    adcl    $-1, VREG_ADDRESS(%ecx)
+    .if 1
+    adcl    $-1, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+   jmp      .Lop_float_to_long_finish
+.Lop_float_to_long_isNaN:
+    movl    $0, VREG_ADDRESS(%ecx)
+    .if 1
+    movl    $0, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+    jmp     .Lop_float_to_long_finish
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_double: /* 0x89 */
+/* File: x86/op_float_to_double.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    flds   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    
+    fstpl  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 1
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_int: /* 0x8a */
+/* File: x86/op_double_to_int.S */
+/* File: x86/cvtfp_int.S */
+/* On fp to int conversions, Java requires that
+ * if the result > maxint, it should be clamped to maxint.  If it is less
+ * than minint, it should be clamped to minint.  If it is a nan, the result
+ * should be zero.  Further, the rounding mode is to truncate.  This model
+ * differs from what is delivered normally via the x86 fpu, so we have
+ * to play some games.
+ */
+    /* float/double to int/long vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    .if 1
+    fldl    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .else
+    flds    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .endif
+    ftst
+    fnstcw  LOCAL0(%esp)                    # remember original rounding mode
+    movzwl  LOCAL0(%esp), %eax
+    movb    $0xc, %ah
+    movw    %ax, LOCAL0+2(%esp)
+    fldcw   LOCAL0+2(%esp)                  # set "to zero" rounding mode
+    andb    $0xf, %cl                      # ecx <- A
+    .if 0
+    fistpll VREG_ADDRESS(%ecx)              # convert and store
+    .else
+    fistpl  VREG_ADDRESS(%ecx)              # convert and store
+    .endif
+    fldcw   LOCAL0(%esp)                    # restore previous rounding mode
+    .if 0
+    movl    $0x80000000, %eax
+    xorl    VREG_HIGH_ADDRESS(%ecx), %eax
+    orl     VREG_ADDRESS(%ecx), %eax
+    .else
+    cmpl    $0x80000000, VREG_ADDRESS(%ecx)
+    .endif
+    je      .Lop_double_to_int_special_case # fix up result
+
+.Lop_double_to_int_finish:
+    xor     %eax, %eax
+    mov     %eax, VREG_REF_ADDRESS(%ecx)
+    .if 0
+    mov     %eax, VREG_REF_HIGH_ADDRESS(%ecx)
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.Lop_double_to_int_special_case:
+    fnstsw  %ax
+    sahf
+    jp      .Lop_double_to_int_isNaN
+    adcl    $-1, VREG_ADDRESS(%ecx)
+    .if 0
+    adcl    $-1, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+   jmp      .Lop_double_to_int_finish
+.Lop_double_to_int_isNaN:
+    movl    $0, VREG_ADDRESS(%ecx)
+    .if 0
+    movl    $0, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+    jmp     .Lop_double_to_int_finish
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_long: /* 0x8b */
+/* File: x86/op_double_to_long.S */
+/* File: x86/cvtfp_int.S */
+/* On fp to int conversions, Java requires that
+ * if the result > maxint, it should be clamped to maxint.  If it is less
+ * than minint, it should be clamped to minint.  If it is a nan, the result
+ * should be zero.  Further, the rounding mode is to truncate.  This model
+ * differs from what is delivered normally via the x86 fpu, so we have
+ * to play some games.
+ */
+    /* float/double to int/long vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    .if 1
+    fldl    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .else
+    flds    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .endif
+    ftst
+    fnstcw  LOCAL0(%esp)                    # remember original rounding mode
+    movzwl  LOCAL0(%esp), %eax
+    movb    $0xc, %ah
+    movw    %ax, LOCAL0+2(%esp)
+    fldcw   LOCAL0+2(%esp)                  # set "to zero" rounding mode
+    andb    $0xf, %cl                      # ecx <- A
+    .if 1
+    fistpll VREG_ADDRESS(%ecx)              # convert and store
+    .else
+    fistpl  VREG_ADDRESS(%ecx)              # convert and store
+    .endif
+    fldcw   LOCAL0(%esp)                    # restore previous rounding mode
+    .if 1
+    movl    $0x80000000, %eax
+    xorl    VREG_HIGH_ADDRESS(%ecx), %eax
+    orl     VREG_ADDRESS(%ecx), %eax
+    .else
+    cmpl    $0x80000000, VREG_ADDRESS(%ecx)
+    .endif
+    je      .Lop_double_to_long_special_case # fix up result
+
+.Lop_double_to_long_finish:
+    xor     %eax, %eax
+    mov     %eax, VREG_REF_ADDRESS(%ecx)
+    .if 1
+    mov     %eax, VREG_REF_HIGH_ADDRESS(%ecx)
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.Lop_double_to_long_special_case:
+    fnstsw  %ax
+    sahf
+    jp      .Lop_double_to_long_isNaN
+    adcl    $-1, VREG_ADDRESS(%ecx)
+    .if 1
+    adcl    $-1, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+   jmp      .Lop_double_to_long_finish
+.Lop_double_to_long_isNaN:
+    movl    $0, VREG_ADDRESS(%ecx)
+    .if 1
+    movl    $0, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+    jmp     .Lop_double_to_long_finish
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_float: /* 0x8c */
+/* File: x86/op_double_to_float.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fldl   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    
+    fstps  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 0
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_byte: /* 0x8d */
+/* File: x86/op_int_to_byte.S */
+/* File: x86/unop.S */
+/*
+ * Generic 32-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf,%cl                       # ecx <- A
+    movsbl  %al, %eax
+    SET_VREG %eax %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_char: /* 0x8e */
+/* File: x86/op_int_to_char.S */
+/* File: x86/unop.S */
+/*
+ * Generic 32-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf,%cl                       # ecx <- A
+    movzwl  %ax,%eax
+    SET_VREG %eax %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_short: /* 0x8f */
+/* File: x86/op_int_to_short.S */
+/* File: x86/unop.S */
+/*
+ * Generic 32-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf,%cl                       # ecx <- A
+    movswl %ax, %eax
+    SET_VREG %eax %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int: /* 0x90 */
+/* File: x86/op_add_int.S */
+/* File: x86/binop.S */
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    addl    (rFP,%ecx,4), %eax                                  # ex: addl    (rFP,%ecx,4),%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int: /* 0x91 */
+/* File: x86/op_sub_int.S */
+/* File: x86/binop.S */
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    subl    (rFP,%ecx,4), %eax                                  # ex: addl    (rFP,%ecx,4),%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int: /* 0x92 */
+/* File: x86/op_mul_int.S */
+    /*
+     * 32-bit binary multiplication.
+     */
+    /* mul vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    mov     rIBASE, LOCAL0(%esp)
+    imull   (rFP,%ecx,4), %eax              # trashes rIBASE/edx
+    mov     LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int: /* 0x93 */
+/* File: x86/op_div_int.S */
+/* File: x86/bindiv.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    GET_VREG %ecx %ecx                      # ecx <- vCC
+    mov     rIBASE, LOCAL0(%esp)
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    movl    %eax, %edx
+    orl     %ecx, %edx
+    test    $0xFFFFFF00, %edx              # If both arguments are less
+                                            #   than 8-bit and +ve
+    jz      .Lop_div_int_8                   # Do 8-bit divide
+    test    $0xFFFF0000, %edx              # If both arguments are less
+                                            #   than 16-bit and +ve
+    jz      .Lop_div_int_16                  # Do 16-bit divide
+    cmpl    $-1, %ecx
+    jne     .Lop_div_int_32
+    cmpl    $0x80000000, %eax
+    jne     .Lop_div_int_32
+    movl    $0x80000000, %eax
+    jmp     .Lop_div_int_finish
+.Lop_div_int_32:
+    cltd
+    idivl   %ecx
+    jmp     .Lop_div_int_finish
+.Lop_div_int_8:
+    div     %cl                             # 8-bit divide otherwise.
+                                            # Remainder in %ah, quotient in %al
+    .if 0
+    movl    %eax, %edx
+    shr     $8, %edx
+    .else
+    andl    $0x000000FF, %eax
+    .endif
+    jmp     .Lop_div_int_finish
+.Lop_div_int_16:
+    xorl    %edx, %edx                      # Clear %edx before divide
+    div     %cx
+.Lop_div_int_finish:
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int: /* 0x94 */
+/* File: x86/op_rem_int.S */
+/* File: x86/bindiv.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    GET_VREG %ecx %ecx                      # ecx <- vCC
+    mov     rIBASE, LOCAL0(%esp)
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    movl    %eax, %edx
+    orl     %ecx, %edx
+    test    $0xFFFFFF00, %edx              # If both arguments are less
+                                            #   than 8-bit and +ve
+    jz      .Lop_rem_int_8                   # Do 8-bit divide
+    test    $0xFFFF0000, %edx              # If both arguments are less
+                                            #   than 16-bit and +ve
+    jz      .Lop_rem_int_16                  # Do 16-bit divide
+    cmpl    $-1, %ecx
+    jne     .Lop_rem_int_32
+    cmpl    $0x80000000, %eax
+    jne     .Lop_rem_int_32
+    movl    $0, rIBASE
+    jmp     .Lop_rem_int_finish
+.Lop_rem_int_32:
+    cltd
+    idivl   %ecx
+    jmp     .Lop_rem_int_finish
+.Lop_rem_int_8:
+    div     %cl                             # 8-bit divide otherwise.
+                                            # Remainder in %ah, quotient in %al
+    .if 1
+    movl    %eax, %edx
+    shr     $8, %edx
+    .else
+    andl    $0x000000FF, %eax
+    .endif
+    jmp     .Lop_rem_int_finish
+.Lop_rem_int_16:
+    xorl    %edx, %edx                      # Clear %edx before divide
+    div     %cx
+.Lop_rem_int_finish:
+    SET_VREG rIBASE rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int: /* 0x95 */
+/* File: x86/op_and_int.S */
+/* File: x86/binop.S */
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    andl    (rFP,%ecx,4), %eax                                  # ex: addl    (rFP,%ecx,4),%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int: /* 0x96 */
+/* File: x86/op_or_int.S */
+/* File: x86/binop.S */
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    orl     (rFP,%ecx,4), %eax                                  # ex: addl    (rFP,%ecx,4),%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int: /* 0x97 */
+/* File: x86/op_xor_int.S */
+/* File: x86/binop.S */
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    xorl    (rFP,%ecx,4), %eax                                  # ex: addl    (rFP,%ecx,4),%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int: /* 0x98 */
+/* File: x86/op_shl_int.S */
+/* File: x86/binop1.S */
+/*
+ * Generic 32-bit binary operation in which both operands loaded to
+ * registers (op0 in eax, op1 in ecx).
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    sall    %cl, %eax                                  # ex: addl    %ecx,%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int: /* 0x99 */
+/* File: x86/op_shr_int.S */
+/* File: x86/binop1.S */
+/*
+ * Generic 32-bit binary operation in which both operands loaded to
+ * registers (op0 in eax, op1 in ecx).
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    sarl    %cl, %eax                                  # ex: addl    %ecx,%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int: /* 0x9a */
+/* File: x86/op_ushr_int.S */
+/* File: x86/binop1.S */
+/*
+ * Generic 32-bit binary operation in which both operands loaded to
+ * registers (op0 in eax, op1 in ecx).
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    shrl    %cl, %eax                                  # ex: addl    %ecx,%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long: /* 0x9b */
+/* File: x86/op_add_long.S */
+/* File: x86/binopWide.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    movl    rIBASE,LOCAL0(%esp)             # save rIBASE
+    GET_VREG rIBASE %eax                    # rIBASE <- v[BB+0]
+    GET_VREG_HIGH %eax %eax                 # eax <- v[BB+1]
+    addl    (rFP,%ecx,4), rIBASE                                 # ex: addl   (rFP,%ecx,4),rIBASE
+    adcl    4(rFP,%ecx,4), %eax                                 # ex: adcl   4(rFP,%ecx,4),%eax
+    SET_VREG rIBASE rINST                   # v[AA+0] <- rIBASE
+    movl    LOCAL0(%esp),rIBASE             # restore rIBASE
+    SET_VREG_HIGH %eax rINST                # v[AA+1] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long: /* 0x9c */
+/* File: x86/op_sub_long.S */
+/* File: x86/binopWide.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    movl    rIBASE,LOCAL0(%esp)             # save rIBASE
+    GET_VREG rIBASE %eax                    # rIBASE <- v[BB+0]
+    GET_VREG_HIGH %eax %eax                 # eax <- v[BB+1]
+    subl    (rFP,%ecx,4), rIBASE                                 # ex: addl   (rFP,%ecx,4),rIBASE
+    sbbl    4(rFP,%ecx,4), %eax                                 # ex: adcl   4(rFP,%ecx,4),%eax
+    SET_VREG rIBASE rINST                   # v[AA+0] <- rIBASE
+    movl    LOCAL0(%esp),rIBASE             # restore rIBASE
+    SET_VREG_HIGH %eax rINST                # v[AA+1] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long: /* 0x9d */
+/* File: x86/op_mul_long.S */
+/*
+ * Signed 64-bit integer multiply.
+ *
+ * We could definately use more free registers for
+ * this code.   We spill rINSTw (ebx),
+ * giving us eax, ebc, ecx and edx as computational
+ * temps.  On top of that, we'll spill edi (rFP)
+ * for use as the vB pointer and esi (rPC) for use
+ * as the vC pointer.  Yuck.
+ *
+ */
+    /* mul-long vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- B
+    movzbl  3(rPC), %ecx                    # ecx <- C
+    mov     rPC, LOCAL0(%esp)               # save Interpreter PC
+    mov     rFP, LOCAL1(%esp)               # save FP
+    mov     rIBASE, LOCAL2(%esp)            # save rIBASE
+    leal    (rFP,%eax,4), %esi              # esi <- &v[B]
+    leal    (rFP,%ecx,4), rFP               # rFP <- &v[C]
+    movl    4(%esi), %ecx                   # ecx <- Bmsw
+    imull   (rFP), %ecx                     # ecx <- (Bmsw*Clsw)
+    movl    4(rFP), %eax                    # eax <- Cmsw
+    imull   (%esi), %eax                    # eax <- (Cmsw*Blsw)
+    addl    %eax, %ecx                      # ecx <- (Bmsw*Clsw)+(Cmsw*Blsw)
+    movl    (rFP), %eax                     # eax <- Clsw
+    mull    (%esi)                          # eax <- (Clsw*Alsw)
+    mov     LOCAL0(%esp), rPC               # restore Interpreter PC
+    mov     LOCAL1(%esp), rFP               # restore FP
+    leal    (%ecx,rIBASE), rIBASE           # full result now in rIBASE:%eax
+    SET_VREG_HIGH rIBASE rINST              # v[B+1] <- rIBASE
+    mov     LOCAL2(%esp), rIBASE            # restore IBASE
+    SET_VREG %eax rINST                     # v[B] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long: /* 0x9e */
+/* File: x86/op_div_long.S */
+/* art_quick_* methods has quick abi,
+ *   so use eax, ecx, edx, ebx for args
+ */
+    /* div vAA, vBB, vCC */
+    .extern art_quick_ldiv
+    mov     rIBASE, LOCAL0(%esp)            # save rIBASE/%edx
+    mov     rINST, LOCAL1(%esp)             # save rINST/%ebx
+    movzbl  3(rPC), %eax                    # eax <- CC
+    GET_VREG %ecx %eax
+    GET_VREG_HIGH %ebx %eax
+    movl    %ecx, %edx
+    orl     %ebx, %ecx
+    jz      common_errDivideByZero
+    movzbl  2(rPC), %eax                    # eax <- BB
+    GET_VREG_HIGH %ecx %eax
+    GET_VREG %eax %eax
+    call    art_quick_ldiv
+    mov     LOCAL1(%esp), rINST             # restore rINST/%ebx
+    SET_VREG_HIGH rIBASE rINST
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE            # restore rIBASE/%edx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long: /* 0x9f */
+/* File: x86/op_rem_long.S */
+/* File: x86/op_div_long.S */
+/* art_quick_* methods has quick abi,
+ *   so use eax, ecx, edx, ebx for args
+ */
+    /* div vAA, vBB, vCC */
+    .extern art_quick_lmod
+    mov     rIBASE, LOCAL0(%esp)            # save rIBASE/%edx
+    mov     rINST, LOCAL1(%esp)             # save rINST/%ebx
+    movzbl  3(rPC), %eax                    # eax <- CC
+    GET_VREG %ecx %eax
+    GET_VREG_HIGH %ebx %eax
+    movl    %ecx, %edx
+    orl     %ebx, %ecx
+    jz      common_errDivideByZero
+    movzbl  2(rPC), %eax                    # eax <- BB
+    GET_VREG_HIGH %ecx %eax
+    GET_VREG %eax %eax
+    call    art_quick_lmod
+    mov     LOCAL1(%esp), rINST             # restore rINST/%ebx
+    SET_VREG_HIGH rIBASE rINST
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE            # restore rIBASE/%edx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long: /* 0xa0 */
+/* File: x86/op_and_long.S */
+/* File: x86/binopWide.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    movl    rIBASE,LOCAL0(%esp)             # save rIBASE
+    GET_VREG rIBASE %eax                    # rIBASE <- v[BB+0]
+    GET_VREG_HIGH %eax %eax                 # eax <- v[BB+1]
+    andl    (rFP,%ecx,4), rIBASE                                 # ex: addl   (rFP,%ecx,4),rIBASE
+    andl    4(rFP,%ecx,4), %eax                                 # ex: adcl   4(rFP,%ecx,4),%eax
+    SET_VREG rIBASE rINST                   # v[AA+0] <- rIBASE
+    movl    LOCAL0(%esp),rIBASE             # restore rIBASE
+    SET_VREG_HIGH %eax rINST                # v[AA+1] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long: /* 0xa1 */
+/* File: x86/op_or_long.S */
+/* File: x86/binopWide.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    movl    rIBASE,LOCAL0(%esp)             # save rIBASE
+    GET_VREG rIBASE %eax                    # rIBASE <- v[BB+0]
+    GET_VREG_HIGH %eax %eax                 # eax <- v[BB+1]
+    orl     (rFP,%ecx,4), rIBASE                                 # ex: addl   (rFP,%ecx,4),rIBASE
+    orl     4(rFP,%ecx,4), %eax                                 # ex: adcl   4(rFP,%ecx,4),%eax
+    SET_VREG rIBASE rINST                   # v[AA+0] <- rIBASE
+    movl    LOCAL0(%esp),rIBASE             # restore rIBASE
+    SET_VREG_HIGH %eax rINST                # v[AA+1] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long: /* 0xa2 */
+/* File: x86/op_xor_long.S */
+/* File: x86/binopWide.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    movl    rIBASE,LOCAL0(%esp)             # save rIBASE
+    GET_VREG rIBASE %eax                    # rIBASE <- v[BB+0]
+    GET_VREG_HIGH %eax %eax                 # eax <- v[BB+1]
+    xorl    (rFP,%ecx,4), rIBASE                                 # ex: addl   (rFP,%ecx,4),rIBASE
+    xorl    4(rFP,%ecx,4), %eax                                 # ex: adcl   4(rFP,%ecx,4),%eax
+    SET_VREG rIBASE rINST                   # v[AA+0] <- rIBASE
+    movl    LOCAL0(%esp),rIBASE             # restore rIBASE
+    SET_VREG_HIGH %eax rINST                # v[AA+1] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long: /* 0xa3 */
+/* File: x86/op_shl_long.S */
+/*
+ * Long integer shift.  This is different from the generic 32/64-bit
+ * binary operations because vAA/vBB are 64-bit but vCC (the shift
+ * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+ * 6 bits of the shift distance.  x86 shifts automatically mask off
+ * the low 5 bits of %cl, so have to handle the 64 > shiftcount > 31
+ * case specially.
+ */
+    /* shl-long vAA, vBB, vCC */
+    /* ecx gets shift count */
+    /* Need to spill rINST */
+    /* rINSTw gets AA */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE %eax               # ecx <- v[BB+1]
+    GET_VREG %ecx %ecx                      # ecx <- vCC
+    GET_VREG %eax %eax                      # eax <- v[BB+0]
+    shldl   %eax,rIBASE
+    sall    %cl, %eax
+    testb   $32, %cl
+    je      2f
+    movl    %eax, rIBASE
+    xorl    %eax, %eax
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[AA+0] <- %eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long: /* 0xa4 */
+/* File: x86/op_shr_long.S */
+/*
+ * Long integer shift.  This is different from the generic 32/64-bit
+ * binary operations because vAA/vBB are 64-bit but vCC (the shift
+ * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+ * 6 bits of the shift distance.  x86 shifts automatically mask off
+ * the low 5 bits of %cl, so have to handle the 64 > shiftcount > 31
+ * case specially.
+ */
+    /* shr-long vAA, vBB, vCC */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE %eax               # rIBASE<- v[BB+1]
+    GET_VREG %ecx %ecx                      # ecx <- vCC
+    GET_VREG %eax %eax                      # eax <- v[BB+0]
+    shrdl   rIBASE, %eax
+    sarl    %cl, rIBASE
+    testb   $32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    sarl    $31, rIBASE
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long: /* 0xa5 */
+/* File: x86/op_ushr_long.S */
+/*
+ * Long integer shift.  This is different from the generic 32/64-bit
+ * binary operations because vAA/vBB are 64-bit but vCC (the shift
+ * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+ * 6 bits of the shift distance.  x86 shifts automatically mask off
+ * the low 5 bits of %cl, so have to handle the 64 > shiftcount > 31
+ * case specially.
+ */
+    /* shr-long vAA, vBB, vCC */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE %eax               # rIBASE <- v[BB+1]
+    GET_VREG %ecx %ecx                      # ecx <- vCC
+    GET_VREG %eax %eax                      # eax <- v[BB+0]
+    shrdl   rIBASE, %eax
+    shrl    %cl, rIBASE
+    testb   $32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    xorl    rIBASE, rIBASE
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[BB+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float: /* 0xa6 */
+/* File: x86/op_add_float.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movss   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    addss VREG_ADDRESS(%eax), %xmm0
+    movss   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movss   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float: /* 0xa7 */
+/* File: x86/op_sub_float.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movss   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    subss VREG_ADDRESS(%eax), %xmm0
+    movss   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movss   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float: /* 0xa8 */
+/* File: x86/op_mul_float.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movss   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    mulss VREG_ADDRESS(%eax), %xmm0
+    movss   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movss   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float: /* 0xa9 */
+/* File: x86/op_div_float.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movss   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    divss VREG_ADDRESS(%eax), %xmm0
+    movss   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movss   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float: /* 0xaa */
+/* File: x86/op_rem_float.S */
+    /* rem_float vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx <- BB
+    movzbl  2(rPC), %eax                    # eax <- CC
+    flds    VREG_ADDRESS(%ecx)              # vBB to fp stack
+    flds    VREG_ADDRESS(%eax)              # vCC to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstps   VREG_ADDRESS(rINST)             # %st to vAA
+    CLEAR_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double: /* 0xab */
+/* File: x86/op_add_double.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movsd   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    addsd VREG_ADDRESS(%eax), %xmm0
+    movsd   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double: /* 0xac */
+/* File: x86/op_sub_double.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movsd   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    subsd VREG_ADDRESS(%eax), %xmm0
+    movsd   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double: /* 0xad */
+/* File: x86/op_mul_double.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movsd   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    mulsd VREG_ADDRESS(%eax), %xmm0
+    movsd   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double: /* 0xae */
+/* File: x86/op_div_double.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movsd   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    divsd VREG_ADDRESS(%eax), %xmm0
+    movsd   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double: /* 0xaf */
+/* File: x86/op_rem_double.S */
+    /* rem_double vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx <- BB
+    movzbl  2(rPC), %eax                    # eax <- CC
+    fldl    VREG_ADDRESS(%ecx)              # %st1 <- fp[vBB]
+    fldl    VREG_ADDRESS(%eax)              # %st0 <- fp[vCC]
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstpl   VREG_ADDRESS(rINST)             # fp[vAA] <- %st
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_2addr: /* 0xb0 */
+/* File: x86/op_add_int_2addr.S */
+/* File: x86/binop2addr.S */
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    addl    %eax, (rFP,%ecx,4)                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int_2addr: /* 0xb1 */
+/* File: x86/op_sub_int_2addr.S */
+/* File: x86/binop2addr.S */
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    subl    %eax, (rFP,%ecx,4)                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_2addr: /* 0xb2 */
+/* File: x86/op_mul_int_2addr.S */
+    /* mul vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    mov     rIBASE, LOCAL0(%esp)
+    imull   (rFP,%ecx,4), %eax              # trashes rIBASE/edx
+    mov     LOCAL0(%esp), rIBASE
+    SET_VREG %eax %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_2addr: /* 0xb3 */
+/* File: x86/op_div_int_2addr.S */
+/* File: x86/bindiv2addr.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # eax <- BA
+    mov     rIBASE, LOCAL0(%esp)
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- vBB
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $-1, %ecx
+    jne     .Lop_div_int_2addr_continue_div2addr
+    cmpl    $0x80000000, %eax
+    jne     .Lop_div_int_2addr_continue_div2addr
+    movl    $0x80000000, %eax
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.Lop_div_int_2addr_continue_div2addr:
+    cltd
+    idivl   %ecx
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_2addr: /* 0xb4 */
+/* File: x86/op_rem_int_2addr.S */
+/* File: x86/bindiv2addr.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # eax <- BA
+    mov     rIBASE, LOCAL0(%esp)
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- vBB
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $-1, %ecx
+    jne     .Lop_rem_int_2addr_continue_div2addr
+    cmpl    $0x80000000, %eax
+    jne     .Lop_rem_int_2addr_continue_div2addr
+    movl    $0, rIBASE
+    SET_VREG rIBASE rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.Lop_rem_int_2addr_continue_div2addr:
+    cltd
+    idivl   %ecx
+    SET_VREG rIBASE rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_2addr: /* 0xb5 */
+/* File: x86/op_and_int_2addr.S */
+/* File: x86/binop2addr.S */
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    andl    %eax, (rFP,%ecx,4)                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_2addr: /* 0xb6 */
+/* File: x86/op_or_int_2addr.S */
+/* File: x86/binop2addr.S */
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    orl     %eax, (rFP,%ecx,4)                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_2addr: /* 0xb7 */
+/* File: x86/op_xor_int_2addr.S */
+/* File: x86/binop2addr.S */
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    xorl    %eax, (rFP,%ecx,4)                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_2addr: /* 0xb8 */
+/* File: x86/op_shl_int_2addr.S */
+/* File: x86/shop2addr.S */
+/*
+ * Generic 32-bit "shift/2addr" operation.
+ */
+    /* shift/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # eax <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- vAA
+    sall    %cl, %eax                                  # ex: sarl %cl, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_2addr: /* 0xb9 */
+/* File: x86/op_shr_int_2addr.S */
+/* File: x86/shop2addr.S */
+/*
+ * Generic 32-bit "shift/2addr" operation.
+ */
+    /* shift/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # eax <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- vAA
+    sarl    %cl, %eax                                  # ex: sarl %cl, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_2addr: /* 0xba */
+/* File: x86/op_ushr_int_2addr.S */
+/* File: x86/shop2addr.S */
+/*
+ * Generic 32-bit "shift/2addr" operation.
+ */
+    /* shift/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # eax <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- vAA
+    shrl    %cl, %eax                                  # ex: sarl %cl, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long_2addr: /* 0xbb */
+/* File: x86/op_add_long_2addr.S */
+/* File: x86/binopWide2addr.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx<- BA
+    sarl    $4,%ecx                        # ecx<- B
+    GET_VREG %eax %ecx                      # eax<- v[B+0]
+    GET_VREG_HIGH %ecx %ecx                 # eax<- v[B+1]
+    andb    $0xF,rINSTbl                   # rINST<- A
+    addl    %eax, (rFP,rINST,4)                                 # ex: addl   %eax,(rFP,rINST,4)
+    adcl    %ecx, 4(rFP,rINST,4)                                 # ex: adcl   %ecx,4(rFP,rINST,4)
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long_2addr: /* 0xbc */
+/* File: x86/op_sub_long_2addr.S */
+/* File: x86/binopWide2addr.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx<- BA
+    sarl    $4,%ecx                        # ecx<- B
+    GET_VREG %eax %ecx                      # eax<- v[B+0]
+    GET_VREG_HIGH %ecx %ecx                 # eax<- v[B+1]
+    andb    $0xF,rINSTbl                   # rINST<- A
+    subl    %eax, (rFP,rINST,4)                                 # ex: addl   %eax,(rFP,rINST,4)
+    sbbl    %ecx, 4(rFP,rINST,4)                                 # ex: adcl   %ecx,4(rFP,rINST,4)
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long_2addr: /* 0xbd */
+/* File: x86/op_mul_long_2addr.S */
+/*
+ * Signed 64-bit integer multiply, 2-addr version
+ *
+ * We could definately use more free registers for
+ * this code.  We must spill %edx (rIBASE) because it
+ * is used by imul.  We'll also spill rINST (ebx),
+ * giving us eax, ebc, ecx and rIBASE as computational
+ * temps.  On top of that, we'll spill %esi (edi)
+ * for use as the vA pointer and rFP (esi) for use
+ * as the vB pointer.  Yuck.
+ */
+    /* mul-long/2addr vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- BA
+    andb    $0xf, %al                      # eax <- A
+    CLEAR_WIDE_REF %eax                     # clear refs in advance
+    sarl    $4, rINST                      # rINST <- B
+    mov     rPC, LOCAL0(%esp)               # save Interpreter PC
+    mov     rFP, LOCAL1(%esp)               # save FP
+    mov     rIBASE, LOCAL2(%esp)            # save rIBASE
+    leal    (rFP,%eax,4), %esi              # esi <- &v[A]
+    leal    (rFP,rINST,4), rFP              # rFP <- &v[B]
+    movl    4(%esi), %ecx                   # ecx <- Amsw
+    imull   (rFP), %ecx                     # ecx <- (Amsw*Blsw)
+    movl    4(rFP), %eax                    # eax <- Bmsw
+    imull   (%esi), %eax                    # eax <- (Bmsw*Alsw)
+    addl    %eax, %ecx                      # ecx <- (Amsw*Blsw)+(Bmsw*Alsw)
+    movl    (rFP), %eax                     # eax <- Blsw
+    mull    (%esi)                          # eax <- (Blsw*Alsw)
+    leal    (%ecx,rIBASE), rIBASE           # full result now in %edx:%eax
+    movl    rIBASE, 4(%esi)                 # v[A+1] <- rIBASE
+    movl    %eax, (%esi)                    # v[A] <- %eax
+    mov     LOCAL0(%esp), rPC               # restore Interpreter PC
+    mov     LOCAL2(%esp), rIBASE            # restore IBASE
+    mov     LOCAL1(%esp), rFP               # restore FP
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long_2addr: /* 0xbe */
+/* File: x86/op_div_long_2addr.S */
+/* art_quick_* methods has quick abi,
+ *   so use eax, ecx, edx, ebx for args
+ */
+    /* div/2addr vA, vB */
+    .extern   art_quick_ldiv
+    mov     rIBASE, LOCAL0(%esp)            # save rIBASE/%edx
+    movzbl  rINSTbl, %eax
+    shrl    $4, %eax                       # eax <- B
+    andb    $0xf, rINSTbl                  # rINST <- A
+    mov     rINST, LOCAL1(%esp)             # save rINST/%ebx
+    movl    %ebx, %ecx
+    GET_VREG %edx %eax
+    GET_VREG_HIGH %ebx %eax
+    movl    %edx, %eax
+    orl     %ebx, %eax
+    jz      common_errDivideByZero
+    GET_VREG %eax %ecx
+    GET_VREG_HIGH %ecx %ecx
+    call    art_quick_ldiv
+    mov     LOCAL1(%esp), rINST             # restore rINST/%ebx
+    SET_VREG_HIGH rIBASE rINST
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE            # restore rIBASE/%edx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long_2addr: /* 0xbf */
+/* File: x86/op_rem_long_2addr.S */
+/* File: x86/op_div_long_2addr.S */
+/* art_quick_* methods has quick abi,
+ *   so use eax, ecx, edx, ebx for args
+ */
+    /* div/2addr vA, vB */
+    .extern   art_quick_lmod
+    mov     rIBASE, LOCAL0(%esp)            # save rIBASE/%edx
+    movzbl  rINSTbl, %eax
+    shrl    $4, %eax                       # eax <- B
+    andb    $0xf, rINSTbl                  # rINST <- A
+    mov     rINST, LOCAL1(%esp)             # save rINST/%ebx
+    movl    %ebx, %ecx
+    GET_VREG %edx %eax
+    GET_VREG_HIGH %ebx %eax
+    movl    %edx, %eax
+    orl     %ebx, %eax
+    jz      common_errDivideByZero
+    GET_VREG %eax %ecx
+    GET_VREG_HIGH %ecx %ecx
+    call    art_quick_lmod
+    mov     LOCAL1(%esp), rINST             # restore rINST/%ebx
+    SET_VREG_HIGH rIBASE rINST
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE            # restore rIBASE/%edx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long_2addr: /* 0xc0 */
+/* File: x86/op_and_long_2addr.S */
+/* File: x86/binopWide2addr.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx<- BA
+    sarl    $4,%ecx                        # ecx<- B
+    GET_VREG %eax %ecx                      # eax<- v[B+0]
+    GET_VREG_HIGH %ecx %ecx                 # eax<- v[B+1]
+    andb    $0xF,rINSTbl                   # rINST<- A
+    andl    %eax, (rFP,rINST,4)                                 # ex: addl   %eax,(rFP,rINST,4)
+    andl    %ecx, 4(rFP,rINST,4)                                 # ex: adcl   %ecx,4(rFP,rINST,4)
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long_2addr: /* 0xc1 */
+/* File: x86/op_or_long_2addr.S */
+/* File: x86/binopWide2addr.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx<- BA
+    sarl    $4,%ecx                        # ecx<- B
+    GET_VREG %eax %ecx                      # eax<- v[B+0]
+    GET_VREG_HIGH %ecx %ecx                 # eax<- v[B+1]
+    andb    $0xF,rINSTbl                   # rINST<- A
+    orl     %eax, (rFP,rINST,4)                                 # ex: addl   %eax,(rFP,rINST,4)
+    orl     %ecx, 4(rFP,rINST,4)                                 # ex: adcl   %ecx,4(rFP,rINST,4)
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long_2addr: /* 0xc2 */
+/* File: x86/op_xor_long_2addr.S */
+/* File: x86/binopWide2addr.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx<- BA
+    sarl    $4,%ecx                        # ecx<- B
+    GET_VREG %eax %ecx                      # eax<- v[B+0]
+    GET_VREG_HIGH %ecx %ecx                 # eax<- v[B+1]
+    andb    $0xF,rINSTbl                   # rINST<- A
+    xorl    %eax, (rFP,rINST,4)                                 # ex: addl   %eax,(rFP,rINST,4)
+    xorl    %ecx, 4(rFP,rINST,4)                                 # ex: adcl   %ecx,4(rFP,rINST,4)
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long_2addr: /* 0xc3 */
+/* File: x86/op_shl_long_2addr.S */
+/*
+ * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+ * 32-bit shift distance.
+ */
+    /* shl-long/2addr vA, vB */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- v[AA+0]
+    sarl    $4, %ecx                       # ecx <- B
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE rINST              # rIBASE <- v[AA+1]
+    GET_VREG %ecx %ecx                      # ecx <- vBB
+    shldl   %eax, rIBASE
+    sall    %cl, %eax
+    testb   $32, %cl
+    je      2f
+    movl    %eax, rIBASE
+    xorl    %eax, %eax
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long_2addr: /* 0xc4 */
+/* File: x86/op_shr_long_2addr.S */
+/*
+ * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+ * 32-bit shift distance.
+ */
+    /* shl-long/2addr vA, vB */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- v[AA+0]
+    sarl    $4, %ecx                       # ecx <- B
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE rINST              # rIBASE <- v[AA+1]
+    GET_VREG %ecx %ecx                      # ecx <- vBB
+    shrdl   rIBASE, %eax
+    sarl    %cl, rIBASE
+    testb   $32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    sarl    $31, rIBASE
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long_2addr: /* 0xc5 */
+/* File: x86/op_ushr_long_2addr.S */
+/*
+ * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+ * 32-bit shift distance.
+ */
+    /* shl-long/2addr vA, vB */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- v[AA+0]
+    sarl    $4, %ecx                       # ecx <- B
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE rINST              # rIBASE <- v[AA+1]
+    GET_VREG %ecx %ecx                      # ecx <- vBB
+    shrdl   rIBASE, %eax
+    shrl    %cl, rIBASE
+    testb   $32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    xorl    rIBASE, rIBASE
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float_2addr: /* 0xc6 */
+/* File: x86/op_add_float_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movss VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    addss VREG_ADDRESS(rINST), %xmm0
+    movss %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movss %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float_2addr: /* 0xc7 */
+/* File: x86/op_sub_float_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movss VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    subss VREG_ADDRESS(rINST), %xmm0
+    movss %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movss %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float_2addr: /* 0xc8 */
+/* File: x86/op_mul_float_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movss VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    mulss VREG_ADDRESS(rINST), %xmm0
+    movss %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movss %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float_2addr: /* 0xc9 */
+/* File: x86/op_div_float_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movss VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    divss VREG_ADDRESS(rINST), %xmm0
+    movss %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movss %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float_2addr: /* 0xca */
+/* File: x86/op_rem_float_2addr.S */
+    /* rem_float/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    flds    VREG_ADDRESS(rINST)             # vB to fp stack
+    andb    $0xf, %cl                      # ecx <- A
+    flds    VREG_ADDRESS(%ecx)              # vA to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstps   VREG_ADDRESS(%ecx)              # %st to vA
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double_2addr: /* 0xcb */
+/* File: x86/op_add_double_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movsd VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    addsd VREG_ADDRESS(rINST), %xmm0
+    movsd %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double_2addr: /* 0xcc */
+/* File: x86/op_sub_double_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movsd VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    subsd VREG_ADDRESS(rINST), %xmm0
+    movsd %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double_2addr: /* 0xcd */
+/* File: x86/op_mul_double_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movsd VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    mulsd VREG_ADDRESS(rINST), %xmm0
+    movsd %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double_2addr: /* 0xce */
+/* File: x86/op_div_double_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movsd VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    divsd VREG_ADDRESS(rINST), %xmm0
+    movsd %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double_2addr: /* 0xcf */
+/* File: x86/op_rem_double_2addr.S */
+    /* rem_double/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fldl    VREG_ADDRESS(rINST)             # vB to fp stack
+    andb    $0xf, %cl                      # ecx <- A
+    fldl    VREG_ADDRESS(%ecx)              # vA to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstpl   VREG_ADDRESS(%ecx)              # %st to vA
+    CLEAR_WIDE_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit16: /* 0xd0 */
+/* File: x86/op_add_int_lit16.S */
+/* File: x86/binopLit16.S */
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    addl    %ecx, %eax                                  # for example: addl %ecx, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int: /* 0xd1 */
+/* File: x86/op_rsub_int.S */
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+/* File: x86/binopLit16.S */
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    subl    %eax, %ecx                                  # for example: addl %ecx, %eax
+    SET_VREG %ecx rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit16: /* 0xd2 */
+/* File: x86/op_mul_int_lit16.S */
+    /* mul/lit16 vA, vB, #+CCCC */
+    /* Need A in rINST, ssssCCCC in ecx, vB in eax */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    mov     rIBASE, LOCAL0(%esp)
+    imull   %ecx, %eax                      # trashes rIBASE/edx
+    mov     LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit16: /* 0xd3 */
+/* File: x86/op_div_int_lit16.S */
+/* File: x86/bindivLit16.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem/lit16 vA, vB, #+CCCC */
+    /* Need A in rINST, ssssCCCC in ecx, vB in eax */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $-1, %ecx
+    jne     .Lop_div_int_lit16_continue_div
+    cmpl    $0x80000000, %eax
+    jne     .Lop_div_int_lit16_continue_div
+    movl    $0x80000000, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.Lop_div_int_lit16_continue_div:
+    mov     rIBASE, LOCAL0(%esp)
+    cltd
+    idivl   %ecx
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit16: /* 0xd4 */
+/* File: x86/op_rem_int_lit16.S */
+/* File: x86/bindivLit16.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem/lit16 vA, vB, #+CCCC */
+    /* Need A in rINST, ssssCCCC in ecx, vB in eax */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $-1, %ecx
+    jne     .Lop_rem_int_lit16_continue_div
+    cmpl    $0x80000000, %eax
+    jne     .Lop_rem_int_lit16_continue_div
+    movl    $0, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.Lop_rem_int_lit16_continue_div:
+    mov     rIBASE, LOCAL0(%esp)
+    cltd
+    idivl   %ecx
+    SET_VREG rIBASE rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit16: /* 0xd5 */
+/* File: x86/op_and_int_lit16.S */
+/* File: x86/binopLit16.S */
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    andl    %ecx, %eax                                  # for example: addl %ecx, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit16: /* 0xd6 */
+/* File: x86/op_or_int_lit16.S */
+/* File: x86/binopLit16.S */
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    orl     %ecx, %eax                                  # for example: addl %ecx, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit16: /* 0xd7 */
+/* File: x86/op_xor_int_lit16.S */
+/* File: x86/binopLit16.S */
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    xorl    %ecx, %eax                                  # for example: addl %ecx, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit8: /* 0xd8 */
+/* File: x86/op_add_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax %eax                      # eax <- rBB
+    addl    %ecx, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int_lit8: /* 0xd9 */
+/* File: x86/op_rsub_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax %eax                      # eax <- rBB
+    subl    %eax, %ecx                                  # ex: addl %ecx,%eax
+    SET_VREG %ecx rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit8: /* 0xda */
+/* File: x86/op_mul_int_lit8.S */
+    /* mul/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG  %eax  %eax                    # eax <- rBB
+    mov     rIBASE, LOCAL0(%esp)
+    imull   %ecx, %eax                      # trashes rIBASE/edx
+    mov     LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit8: /* 0xdb */
+/* File: x86/op_div_int_lit8.S */
+/* File: x86/bindivLit8.S */
+/*
+ * 32-bit div/rem "lit8" binary operation.  Handles special case of
+ * op0=minint & op1=-1
+ */
+    /* div/rem/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG  %eax %eax                     # eax <- rBB
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $0x80000000, %eax
+    jne     .Lop_div_int_lit8_continue_div
+    cmpl    $-1, %ecx
+    jne     .Lop_div_int_lit8_continue_div
+    movl    $0x80000000, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.Lop_div_int_lit8_continue_div:
+    mov     rIBASE, LOCAL0(%esp)
+    cltd
+    idivl   %ecx
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit8: /* 0xdc */
+/* File: x86/op_rem_int_lit8.S */
+/* File: x86/bindivLit8.S */
+/*
+ * 32-bit div/rem "lit8" binary operation.  Handles special case of
+ * op0=minint & op1=-1
+ */
+    /* div/rem/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG  %eax %eax                     # eax <- rBB
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $0x80000000, %eax
+    jne     .Lop_rem_int_lit8_continue_div
+    cmpl    $-1, %ecx
+    jne     .Lop_rem_int_lit8_continue_div
+    movl    $0, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.Lop_rem_int_lit8_continue_div:
+    mov     rIBASE, LOCAL0(%esp)
+    cltd
+    idivl   %ecx
+    SET_VREG rIBASE rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit8: /* 0xdd */
+/* File: x86/op_and_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax %eax                      # eax <- rBB
+    andl    %ecx, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit8: /* 0xde */
+/* File: x86/op_or_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax %eax                      # eax <- rBB
+    orl     %ecx, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit8: /* 0xdf */
+/* File: x86/op_xor_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax %eax                      # eax <- rBB
+    xorl    %ecx, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_lit8: /* 0xe0 */
+/* File: x86/op_shl_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax %eax                      # eax <- rBB
+    sall    %cl, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_lit8: /* 0xe1 */
+/* File: x86/op_shr_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax %eax                      # eax <- rBB
+    sarl    %cl, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_lit8: /* 0xe2 */
+/* File: x86/op_ushr_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax %eax                      # eax <- rBB
+    shrl    %cl, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_quick: /* 0xe3 */
+/* File: x86/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    movl (%ecx,%eax,1), %eax
+    andb    $0xf,rINSTbl                   # rINST <- A
+    SET_VREG %eax rINST                     # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide_quick: /* 0xe4 */
+/* File: x86/op_iget_wide_quick.S */
+    /* iget-wide-quick vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    movq    (%ecx,%eax,1), %xmm0
+    andb    $0xf, rINSTbl                  # rINST <- A
+    SET_WIDE_FP_VREG %xmm0 rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object_quick: /* 0xe5 */
+/* File: x86/op_iget_object_quick.S */
+    /* For: iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    %eax, OUT_ARG1(%esp)
+    EXPORT_PC
+    call    artIGetObjectFromMterp          # (obj, offset)
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf,rINSTbl                   # rINST <- A
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_quick: /* 0xe6 */
+/* File: x86/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST rINST                    # rINST <- v[A]
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    movl    rINST, (%ecx,%eax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide_quick: /* 0xe7 */
+/* File: x86/op_iput_wide_quick.S */
+    /* iput-wide-quick vA, vB, offset@CCCC */
+    movzbl    rINSTbl, %ecx                 # ecx<- BA
+    sarl      $4, %ecx                     # ecx<- B
+    GET_VREG  %ecx %ecx                     # vB (object we're operating on)
+    testl     %ecx, %ecx                    # is object null?
+    je        common_errNullObject
+    movzwl    2(rPC), %eax                  # eax<- field byte offset
+    leal      (%ecx,%eax,1), %ecx           # ecx<- Address of 64-bit target
+    andb      $0xf, rINSTbl                # rINST<- A
+    GET_WIDE_FP_VREG %xmm0 rINST            # xmm0<- fp[A]/fp[A+1]
+    movq      %xmm0, (%ecx)                 # obj.field<- r0/r1
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object_quick: /* 0xe8 */
+/* File: x86/op_iput_object_quick.S */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST 232
+    movl    rINST, OUT_ARG2(%esp)
+    call    MterpIputObjectQuick
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_quick: /* 0xe9 */
+/* File: x86/op_invoke_virtual_quick.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuick
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 233
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeVirtualQuick
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range_quick: /* 0xea */
+/* File: x86/op_invoke_virtual_range_quick.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuickRange
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 234
+    movl    rINST, OUT_ARG3(%esp)
+    call    MterpInvokeVirtualQuickRange
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean_quick: /* 0xeb */
+/* File: x86/op_iput_boolean_quick.S */
+/* File: x86/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST rINST                    # rINST <- v[A]
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    movb    rINSTbl, (%ecx,%eax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte_quick: /* 0xec */
+/* File: x86/op_iput_byte_quick.S */
+/* File: x86/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST rINST                    # rINST <- v[A]
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    movb    rINSTbl, (%ecx,%eax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char_quick: /* 0xed */
+/* File: x86/op_iput_char_quick.S */
+/* File: x86/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST rINST                    # rINST <- v[A]
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    movw    rINSTw, (%ecx,%eax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short_quick: /* 0xee */
+/* File: x86/op_iput_short_quick.S */
+/* File: x86/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST rINST                    # rINST <- v[A]
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    movw    rINSTw, (%ecx,%eax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean_quick: /* 0xef */
+/* File: x86/op_iget_boolean_quick.S */
+/* File: x86/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    movsbl (%ecx,%eax,1), %eax
+    andb    $0xf,rINSTbl                   # rINST <- A
+    SET_VREG %eax rINST                     # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte_quick: /* 0xf0 */
+/* File: x86/op_iget_byte_quick.S */
+/* File: x86/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    movsbl (%ecx,%eax,1), %eax
+    andb    $0xf,rINSTbl                   # rINST <- A
+    SET_VREG %eax rINST                     # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char_quick: /* 0xf1 */
+/* File: x86/op_iget_char_quick.S */
+/* File: x86/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    movzwl (%ecx,%eax,1), %eax
+    andb    $0xf,rINSTbl                   # rINST <- A
+    SET_VREG %eax rINST                     # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short_quick: /* 0xf2 */
+/* File: x86/op_iget_short_quick.S */
+/* File: x86/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    movswl (%ecx,%eax,1), %eax
+    andb    $0xf,rINSTbl                   # rINST <- A
+    SET_VREG %eax rINST                     # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_lambda: /* 0xf3 */
+/* Transfer stub to alternate interpreter */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f4: /* 0xf4 */
+/* File: x86/op_unused_f4.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_capture_variable: /* 0xf5 */
+/* Transfer stub to alternate interpreter */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_create_lambda: /* 0xf6 */
+/* Transfer stub to alternate interpreter */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_liberate_variable: /* 0xf7 */
+/* Transfer stub to alternate interpreter */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_box_lambda: /* 0xf8 */
+/* Transfer stub to alternate interpreter */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unbox_lambda: /* 0xf9 */
+/* Transfer stub to alternate interpreter */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fa: /* 0xfa */
+/* File: x86/op_unused_fa.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fb: /* 0xfb */
+/* File: x86/op_unused_fb.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fc: /* 0xfc */
+/* File: x86/op_unused_fc.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fd: /* 0xfd */
+/* File: x86/op_unused_fd.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fe: /* 0xfe */
+/* File: x86/op_unused_fe.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_ff: /* 0xff */
+/* File: x86/op_unused_ff.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+    .balign 128
+    .size   artMterpAsmInstructionStart, .-artMterpAsmInstructionStart
+    .global artMterpAsmInstructionEnd
+artMterpAsmInstructionEnd:
+
+/*
+ * ===========================================================================
+ *  Sister implementations
+ * ===========================================================================
+ */
+    .global artMterpAsmSisterStart
+    .type   artMterpAsmSisterStart, %function
+    .text
+    .balign 4
+artMterpAsmSisterStart:
+
+    .size   artMterpAsmSisterStart, .-artMterpAsmSisterStart
+    .global artMterpAsmSisterEnd
+artMterpAsmSisterEnd:
+
+
+    .global artMterpAsmAltInstructionStart
+    .type   artMterpAsmAltInstructionStart, %function
+    .text
+
+artMterpAsmAltInstructionStart = .L_ALT_op_nop
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_nop: /* 0x00 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(0*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move: /* 0x01 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(1*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_from16: /* 0x02 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(2*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_16: /* 0x03 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(3*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide: /* 0x04 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(4*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_from16: /* 0x05 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(5*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_16: /* 0x06 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(6*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object: /* 0x07 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(7*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_from16: /* 0x08 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(8*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_16: /* 0x09 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(9*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result: /* 0x0a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(10*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_wide: /* 0x0b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(11*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_object: /* 0x0c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(12*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_exception: /* 0x0d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(13*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void: /* 0x0e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(14*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return: /* 0x0f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(15*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_wide: /* 0x10 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(16*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_object: /* 0x11 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(17*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_4: /* 0x12 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(18*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_16: /* 0x13 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(19*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const: /* 0x14 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(20*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_high16: /* 0x15 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(21*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_16: /* 0x16 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(22*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_32: /* 0x17 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(23*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide: /* 0x18 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(24*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_high16: /* 0x19 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(25*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string: /* 0x1a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(26*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string_jumbo: /* 0x1b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(27*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_class: /* 0x1c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(28*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_enter: /* 0x1d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(29*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_exit: /* 0x1e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(30*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_check_cast: /* 0x1f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(31*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_instance_of: /* 0x20 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(32*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_array_length: /* 0x21 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(33*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_instance: /* 0x22 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(34*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_array: /* 0x23 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(35*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array: /* 0x24 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(36*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array_range: /* 0x25 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(37*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_fill_array_data: /* 0x26 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(38*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_throw: /* 0x27 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(39*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto: /* 0x28 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(40*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_16: /* 0x29 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(41*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_32: /* 0x2a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(42*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_packed_switch: /* 0x2b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(43*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sparse_switch: /* 0x2c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(44*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_float: /* 0x2d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(45*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_float: /* 0x2e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(46*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_double: /* 0x2f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(47*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_double: /* 0x30 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(48*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmp_long: /* 0x31 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(49*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eq: /* 0x32 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(50*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ne: /* 0x33 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(51*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lt: /* 0x34 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(52*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ge: /* 0x35 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(53*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gt: /* 0x36 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(54*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_le: /* 0x37 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(55*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eqz: /* 0x38 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(56*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_nez: /* 0x39 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(57*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ltz: /* 0x3a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(58*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gez: /* 0x3b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(59*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gtz: /* 0x3c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(60*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lez: /* 0x3d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(61*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3e: /* 0x3e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(62*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3f: /* 0x3f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(63*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_40: /* 0x40 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(64*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_41: /* 0x41 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(65*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_42: /* 0x42 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(66*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_43: /* 0x43 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(67*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget: /* 0x44 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(68*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_wide: /* 0x45 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(69*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_object: /* 0x46 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(70*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_boolean: /* 0x47 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(71*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_byte: /* 0x48 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(72*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_char: /* 0x49 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(73*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_short: /* 0x4a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(74*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput: /* 0x4b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(75*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_wide: /* 0x4c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(76*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_object: /* 0x4d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(77*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_boolean: /* 0x4e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(78*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_byte: /* 0x4f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(79*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_char: /* 0x50 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(80*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_short: /* 0x51 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(81*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget: /* 0x52 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(82*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide: /* 0x53 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(83*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object: /* 0x54 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(84*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean: /* 0x55 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(85*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte: /* 0x56 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(86*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char: /* 0x57 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(87*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short: /* 0x58 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(88*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput: /* 0x59 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(89*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide: /* 0x5a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(90*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object: /* 0x5b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(91*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean: /* 0x5c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(92*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte: /* 0x5d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(93*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char: /* 0x5e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(94*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short: /* 0x5f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(95*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget: /* 0x60 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(96*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_wide: /* 0x61 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(97*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_object: /* 0x62 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(98*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_boolean: /* 0x63 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(99*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_byte: /* 0x64 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(100*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_char: /* 0x65 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(101*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_short: /* 0x66 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(102*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput: /* 0x67 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(103*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_wide: /* 0x68 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(104*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_object: /* 0x69 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(105*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_boolean: /* 0x6a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(106*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_byte: /* 0x6b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(107*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_char: /* 0x6c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(108*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_short: /* 0x6d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(109*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual: /* 0x6e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(110*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super: /* 0x6f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(111*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct: /* 0x70 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(112*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static: /* 0x71 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(113*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface: /* 0x72 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(114*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void_no_barrier: /* 0x73 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(115*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range: /* 0x74 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(116*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super_range: /* 0x75 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(117*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct_range: /* 0x76 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(118*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static_range: /* 0x77 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(119*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface_range: /* 0x78 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(120*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_79: /* 0x79 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(121*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_7a: /* 0x7a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(122*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_int: /* 0x7b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(123*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_int: /* 0x7c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(124*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_long: /* 0x7d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(125*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_long: /* 0x7e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(126*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_float: /* 0x7f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(127*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_double: /* 0x80 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(128*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_long: /* 0x81 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(129*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_float: /* 0x82 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(130*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_double: /* 0x83 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(131*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_int: /* 0x84 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(132*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_float: /* 0x85 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(133*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_double: /* 0x86 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(134*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_int: /* 0x87 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(135*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_long: /* 0x88 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(136*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_double: /* 0x89 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(137*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_int: /* 0x8a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(138*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_long: /* 0x8b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(139*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_float: /* 0x8c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(140*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_byte: /* 0x8d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(141*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_char: /* 0x8e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(142*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_short: /* 0x8f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(143*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int: /* 0x90 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(144*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int: /* 0x91 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(145*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int: /* 0x92 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(146*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int: /* 0x93 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(147*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int: /* 0x94 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(148*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int: /* 0x95 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(149*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int: /* 0x96 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(150*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int: /* 0x97 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(151*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int: /* 0x98 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(152*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int: /* 0x99 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(153*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int: /* 0x9a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(154*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long: /* 0x9b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(155*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long: /* 0x9c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(156*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long: /* 0x9d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(157*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long: /* 0x9e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(158*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long: /* 0x9f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(159*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long: /* 0xa0 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(160*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long: /* 0xa1 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(161*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long: /* 0xa2 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(162*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long: /* 0xa3 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(163*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long: /* 0xa4 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(164*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long: /* 0xa5 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(165*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float: /* 0xa6 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(166*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float: /* 0xa7 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(167*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float: /* 0xa8 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(168*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float: /* 0xa9 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(169*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float: /* 0xaa */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(170*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double: /* 0xab */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(171*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double: /* 0xac */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(172*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double: /* 0xad */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(173*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double: /* 0xae */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(174*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double: /* 0xaf */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(175*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_2addr: /* 0xb0 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(176*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int_2addr: /* 0xb1 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(177*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_2addr: /* 0xb2 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(178*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_2addr: /* 0xb3 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(179*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_2addr: /* 0xb4 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(180*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_2addr: /* 0xb5 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(181*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_2addr: /* 0xb6 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(182*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_2addr: /* 0xb7 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(183*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_2addr: /* 0xb8 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(184*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_2addr: /* 0xb9 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(185*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_2addr: /* 0xba */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(186*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long_2addr: /* 0xbb */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(187*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long_2addr: /* 0xbc */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(188*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long_2addr: /* 0xbd */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(189*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long_2addr: /* 0xbe */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(190*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long_2addr: /* 0xbf */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(191*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long_2addr: /* 0xc0 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(192*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long_2addr: /* 0xc1 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(193*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long_2addr: /* 0xc2 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(194*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long_2addr: /* 0xc3 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(195*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long_2addr: /* 0xc4 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(196*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long_2addr: /* 0xc5 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(197*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float_2addr: /* 0xc6 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(198*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float_2addr: /* 0xc7 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(199*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float_2addr: /* 0xc8 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(200*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float_2addr: /* 0xc9 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(201*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float_2addr: /* 0xca */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(202*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double_2addr: /* 0xcb */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(203*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double_2addr: /* 0xcc */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(204*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double_2addr: /* 0xcd */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(205*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double_2addr: /* 0xce */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(206*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double_2addr: /* 0xcf */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(207*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit16: /* 0xd0 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(208*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int: /* 0xd1 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(209*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit16: /* 0xd2 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(210*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit16: /* 0xd3 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(211*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit16: /* 0xd4 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(212*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit16: /* 0xd5 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(213*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit16: /* 0xd6 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(214*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit16: /* 0xd7 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(215*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit8: /* 0xd8 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(216*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int_lit8: /* 0xd9 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(217*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit8: /* 0xda */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(218*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit8: /* 0xdb */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(219*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit8: /* 0xdc */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(220*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit8: /* 0xdd */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(221*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit8: /* 0xde */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(222*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit8: /* 0xdf */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(223*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_lit8: /* 0xe0 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(224*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_lit8: /* 0xe1 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(225*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_lit8: /* 0xe2 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(226*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_quick: /* 0xe3 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(227*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide_quick: /* 0xe4 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(228*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object_quick: /* 0xe5 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(229*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_quick: /* 0xe6 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(230*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide_quick: /* 0xe7 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(231*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object_quick: /* 0xe8 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(232*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_quick: /* 0xe9 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(233*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range_quick: /* 0xea */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(234*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean_quick: /* 0xeb */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(235*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte_quick: /* 0xec */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(236*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char_quick: /* 0xed */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(237*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short_quick: /* 0xee */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(238*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean_quick: /* 0xef */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(239*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte_quick: /* 0xf0 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(240*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char_quick: /* 0xf1 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(241*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short_quick: /* 0xf2 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(242*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_lambda: /* 0xf3 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(243*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f4: /* 0xf4 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(244*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_capture_variable: /* 0xf5 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(245*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_create_lambda: /* 0xf6 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(246*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_liberate_variable: /* 0xf7 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(247*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_box_lambda: /* 0xf8 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(248*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unbox_lambda: /* 0xf9 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(249*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fa: /* 0xfa */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(250*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fb: /* 0xfb */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(251*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fc: /* 0xfc */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(252*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fd: /* 0xfd */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(253*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fe: /* 0xfe */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(254*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_ff: /* 0xff */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(255*128)
+
+    .balign 128
+    .size   artMterpAsmAltInstructionStart, .-artMterpAsmAltInstructionStart
+    .global artMterpAsmAltInstructionEnd
+artMterpAsmAltInstructionEnd:
+/* File: x86/footer.S */
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align  2
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+#define MTERP_LOGGING 0
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogDivideByZeroException
+#endif
+    jmp     MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogArrayIndexException
+#endif
+    jmp     MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogNegativeArraySizeException
+#endif
+    jmp     MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogNoSuchMethodException
+#endif
+    jmp     MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogNullObjectException
+#endif
+    jmp     MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    call    MterpLogExceptionThrownException
+#endif
+    jmp     MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    THREAD_FLAGS_OFFSET(%eax), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    call    MterpLogSuspendFallback
+#endif
+    jmp     MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    movl    rSELF, %eax
+    testl   $-1, THREAD_EXCEPTION_OFFSET(%eax)
+    jz      MterpFallback
+    /* intentional fallthrough - handle pending exception. */
+
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpHandleException
+    testl   %eax, %eax
+    jz      MterpExceptionReturn
+    REFRESH_IBASE
+    movl    OFF_FP_CODE_ITEM(rFP), %eax
+    movl    OFF_FP_DEX_PC(rFP), %ecx
+    lea     CODEITEM_INSNS_OFFSET(%eax), rPC
+    lea     (rPC, %ecx, 2), rPC
+    movl    rPC, OFF_FP_DEX_PC_PTR(rFP)
+    /* resume execution at catch block */
+    FETCH_INST
+    GOTO_NEXT
+    /* NOTE: no fallthrough */
+
+/*
+ * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
+ */
+MterpCheckSuspendAndContinue:
+    movl    rSELF, %eax
+    EXPORT_PC
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    MterpSuspendCheck
+    REFRESH_IBASE
+1:
+    GOTO_NEXT
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogFallback
+#endif
+MterpCommonFallback:
+    xor     %eax, %eax
+    jmp     MterpDone
+
+/*
+ * On entry:
+ *  uint32_t* rFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    movl    $1, %eax
+    jmp     MterpDone
+MterpReturn:
+    movl    OFF_FP_RESULT_REGISTER(rFP), %edx
+    movl    %eax, (%edx)
+    movl    %ecx, 4(%edx)
+    mov     $1, %eax
+MterpDone:
+    /* Restore callee save register */
+    movl    EBP_SPILL(%esp), %ebp
+    movl    EDI_SPILL(%esp), %edi
+    movl    ESI_SPILL(%esp), %esi
+    movl    EBX_SPILL(%esp), %ebx
+
+    /* pop up frame */
+    addl    $FRAME_SIZE, %esp
+    .cfi_adjust_cfa_offset -FRAME_SIZE
+    ret
+
+    .cfi_endproc
+    .size   ExecuteMterpImpl, .-ExecuteMterpImpl
+
diff --git a/runtime/interpreter/mterp/rebuild.sh b/runtime/interpreter/mterp/rebuild.sh
index a325fff..8b26976 100755
--- a/runtime/interpreter/mterp/rebuild.sh
+++ b/runtime/interpreter/mterp/rebuild.sh
@@ -21,4 +21,4 @@
 set -e
 
 # for arch in arm x86 mips arm64 x86_64 mips64; do TARGET_ARCH_EXT=$arch make -f Makefile_mterp; done
-for arch in arm; do TARGET_ARCH_EXT=$arch make -f Makefile_mterp; done
+for arch in arm x86; do TARGET_ARCH_EXT=$arch make -f Makefile_mterp; done
diff --git a/runtime/interpreter/mterp/x86/alt_stub.S b/runtime/interpreter/mterp/x86/alt_stub.S
new file mode 100644
index 0000000..6462fc5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/alt_stub.S
@@ -0,0 +1,20 @@
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    MterpCheckBefore                # (self, shadow_frame)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(${opnum}*${handler_size_bytes})
diff --git a/runtime/interpreter/mterp/x86/bincmp.S b/runtime/interpreter/mterp/x86/bincmp.S
new file mode 100644
index 0000000..a9a8c3a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/bincmp.S
@@ -0,0 +1,28 @@
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andb    $$0xf, %cl                      # ecx <- A
+    GET_VREG %eax %ecx                      # eax <- vA
+    sarl    $$4, rINST                      # rINST <- B
+    cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    movl    $$2, %eax                       # assume not taken
+    j${revcmp}   1f
+    movswl  2(rPC),%eax                     # Get signed branch offset
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86/bindiv.S b/runtime/interpreter/mterp/x86/bindiv.S
new file mode 100644
index 0000000..742f758
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/bindiv.S
@@ -0,0 +1,48 @@
+%default {"result":"","special":"","rem":""}
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    GET_VREG %ecx %ecx                      # ecx <- vCC
+    mov     rIBASE, LOCAL0(%esp)
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    movl    %eax, %edx
+    orl     %ecx, %edx
+    test    $$0xFFFFFF00, %edx              # If both arguments are less
+                                            #   than 8-bit and +ve
+    jz      .L${opcode}_8                   # Do 8-bit divide
+    test    $$0xFFFF0000, %edx              # If both arguments are less
+                                            #   than 16-bit and +ve
+    jz      .L${opcode}_16                  # Do 16-bit divide
+    cmpl    $$-1, %ecx
+    jne     .L${opcode}_32
+    cmpl    $$0x80000000, %eax
+    jne     .L${opcode}_32
+    movl    $special, $result
+    jmp     .L${opcode}_finish
+.L${opcode}_32:
+    cltd
+    idivl   %ecx
+    jmp     .L${opcode}_finish
+.L${opcode}_8:
+    div     %cl                             # 8-bit divide otherwise.
+                                            # Remainder in %ah, quotient in %al
+    .if $rem
+    movl    %eax, %edx
+    shr     $$8, %edx
+    .else
+    andl    $$0x000000FF, %eax
+    .endif
+    jmp     .L${opcode}_finish
+.L${opcode}_16:
+    xorl    %edx, %edx                      # Clear %edx before divide
+    div     %cx
+.L${opcode}_finish:
+    SET_VREG $result rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/bindiv2addr.S b/runtime/interpreter/mterp/x86/bindiv2addr.S
new file mode 100644
index 0000000..ee7c523
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/bindiv2addr.S
@@ -0,0 +1,29 @@
+%default {"result":"","special":""}
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # eax <- BA
+    mov     rIBASE, LOCAL0(%esp)
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- vBB
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $$-1, %ecx
+    jne     .L${opcode}_continue_div2addr
+    cmpl    $$0x80000000, %eax
+    jne     .L${opcode}_continue_div2addr
+    movl    $special, $result
+    SET_VREG $result rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.L${opcode}_continue_div2addr:
+    cltd
+    idivl   %ecx
+    SET_VREG $result rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/bindivLit16.S b/runtime/interpreter/mterp/x86/bindivLit16.S
new file mode 100644
index 0000000..a2c4334
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/bindivLit16.S
@@ -0,0 +1,29 @@
+%default {"result":"","special":""}
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem/lit16 vA, vB, #+CCCC */
+    /* Need A in rINST, ssssCCCC in ecx, vB in eax */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $$4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $$-1, %ecx
+    jne     .L${opcode}_continue_div
+    cmpl    $$0x80000000, %eax
+    jne     .L${opcode}_continue_div
+    movl    $special, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.L${opcode}_continue_div:
+    mov     rIBASE, LOCAL0(%esp)
+    cltd
+    idivl   %ecx
+    SET_VREG $result rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/bindivLit8.S b/runtime/interpreter/mterp/x86/bindivLit8.S
new file mode 100644
index 0000000..61bee06
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/bindivLit8.S
@@ -0,0 +1,26 @@
+%default {"result":"","special":""}
+/*
+ * 32-bit div/rem "lit8" binary operation.  Handles special case of
+ * op0=minint & op1=-1
+ */
+    /* div/rem/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG  %eax %eax                     # eax <- rBB
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $$0x80000000, %eax
+    jne     .L${opcode}_continue_div
+    cmpl    $$-1, %ecx
+    jne     .L${opcode}_continue_div
+    movl    $special, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.L${opcode}_continue_div:
+    mov     rIBASE, LOCAL0(%esp)
+    cltd
+    idivl   %ecx
+    SET_VREG $result rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/binop.S b/runtime/interpreter/mterp/x86/binop.S
new file mode 100644
index 0000000..5383f25
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/binop.S
@@ -0,0 +1,17 @@
+%default {"result":"%eax"}
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    $instr                                  # ex: addl    (rFP,%ecx,4),%eax
+    SET_VREG $result rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/binop1.S b/runtime/interpreter/mterp/x86/binop1.S
new file mode 100644
index 0000000..cd51d0c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/binop1.S
@@ -0,0 +1,13 @@
+%default {"result":"%eax","tmp":"%ecx"}
+/*
+ * Generic 32-bit binary operation in which both operands loaded to
+ * registers (op0 in eax, op1 in ecx).
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    $instr                                  # ex: addl    %ecx,%eax
+    SET_VREG $result rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/binop2addr.S b/runtime/interpreter/mterp/x86/binop2addr.S
new file mode 100644
index 0000000..abee4db
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/binop2addr.S
@@ -0,0 +1,19 @@
+%default {"result":"%eax"}
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $$0xf, %cl                      # ecx <- A
+    $instr                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/binopLit16.S b/runtime/interpreter/mterp/x86/binopLit16.S
new file mode 100644
index 0000000..6c7fe61
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/binopLit16.S
@@ -0,0 +1,19 @@
+%default {"result":"%eax"}
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $$4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    $instr                                  # for example: addl %ecx, %eax
+    SET_VREG $result rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/binopLit8.S b/runtime/interpreter/mterp/x86/binopLit8.S
new file mode 100644
index 0000000..924685d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/binopLit8.S
@@ -0,0 +1,18 @@
+%default {"result":"%eax"}
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax %eax                      # eax <- rBB
+    $instr                                  # ex: addl %ecx,%eax
+    SET_VREG $result rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/binopWide.S b/runtime/interpreter/mterp/x86/binopWide.S
new file mode 100644
index 0000000..9f7106e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/binopWide.S
@@ -0,0 +1,15 @@
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    movl    rIBASE,LOCAL0(%esp)             # save rIBASE
+    GET_VREG rIBASE %eax                    # rIBASE <- v[BB+0]
+    GET_VREG_HIGH %eax %eax                 # eax <- v[BB+1]
+    $instr1                                 # ex: addl   (rFP,%ecx,4),rIBASE
+    $instr2                                 # ex: adcl   4(rFP,%ecx,4),%eax
+    SET_VREG rIBASE rINST                   # v[AA+0] <- rIBASE
+    movl    LOCAL0(%esp),rIBASE             # restore rIBASE
+    SET_VREG_HIGH %eax rINST                # v[AA+1] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/binopWide2addr.S b/runtime/interpreter/mterp/x86/binopWide2addr.S
new file mode 100644
index 0000000..7560af4
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/binopWide2addr.S
@@ -0,0 +1,13 @@
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx<- BA
+    sarl    $$4,%ecx                        # ecx<- B
+    GET_VREG %eax %ecx                      # eax<- v[B+0]
+    GET_VREG_HIGH %ecx %ecx                 # eax<- v[B+1]
+    andb    $$0xF,rINSTbl                   # rINST<- A
+    $instr1                                 # ex: addl   %eax,(rFP,rINST,4)
+    $instr2                                 # ex: adcl   %ecx,4(rFP,rINST,4)
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/cvtfp_int.S b/runtime/interpreter/mterp/x86/cvtfp_int.S
new file mode 100644
index 0000000..a8bad63
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/cvtfp_int.S
@@ -0,0 +1,61 @@
+%default {"srcdouble":"1","tgtlong":"1"}
+/* On fp to int conversions, Java requires that
+ * if the result > maxint, it should be clamped to maxint.  If it is less
+ * than minint, it should be clamped to minint.  If it is a nan, the result
+ * should be zero.  Further, the rounding mode is to truncate.  This model
+ * differs from what is delivered normally via the x86 fpu, so we have
+ * to play some games.
+ */
+    /* float/double to int/long vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    .if $srcdouble
+    fldl    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .else
+    flds    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .endif
+    ftst
+    fnstcw  LOCAL0(%esp)                    # remember original rounding mode
+    movzwl  LOCAL0(%esp), %eax
+    movb    $$0xc, %ah
+    movw    %ax, LOCAL0+2(%esp)
+    fldcw   LOCAL0+2(%esp)                  # set "to zero" rounding mode
+    andb    $$0xf, %cl                      # ecx <- A
+    .if $tgtlong
+    fistpll VREG_ADDRESS(%ecx)              # convert and store
+    .else
+    fistpl  VREG_ADDRESS(%ecx)              # convert and store
+    .endif
+    fldcw   LOCAL0(%esp)                    # restore previous rounding mode
+    .if $tgtlong
+    movl    $$0x80000000, %eax
+    xorl    VREG_HIGH_ADDRESS(%ecx), %eax
+    orl     VREG_ADDRESS(%ecx), %eax
+    .else
+    cmpl    $$0x80000000, VREG_ADDRESS(%ecx)
+    .endif
+    je      .L${opcode}_special_case # fix up result
+
+.L${opcode}_finish:
+    xor     %eax, %eax
+    mov     %eax, VREG_REF_ADDRESS(%ecx)
+    .if $tgtlong
+    mov     %eax, VREG_REF_HIGH_ADDRESS(%ecx)
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.L${opcode}_special_case:
+    fnstsw  %ax
+    sahf
+    jp      .L${opcode}_isNaN
+    adcl    $$-1, VREG_ADDRESS(%ecx)
+    .if $tgtlong
+    adcl    $$-1, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+   jmp      .L${opcode}_finish
+.L${opcode}_isNaN:
+    movl    $$0, VREG_ADDRESS(%ecx)
+    .if $tgtlong
+    movl    $$0, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+    jmp     .L${opcode}_finish
diff --git a/runtime/interpreter/mterp/x86/entry.S b/runtime/interpreter/mterp/x86/entry.S
new file mode 100644
index 0000000..a24ef70
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/entry.S
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Interpreter entry point.
+ */
+
+    .text
+    .global ExecuteMterpImpl
+    .type   ExecuteMterpImpl, %function
+
+/*
+ * On entry:
+ *  0  Thread* self
+ *  1  code_item
+ *  2  ShadowFrame
+ *  3  JValue* result_register
+ *
+ */
+
+ExecuteMterpImpl:
+    .cfi_startproc
+    /* Allocate frame */
+    subl    $$FRAME_SIZE, %esp
+    .cfi_adjust_cfa_offset FRAME_SIZE
+
+    /* Spill callee save regs */
+    movl    %ebp, EBP_SPILL(%esp)
+    movl    %edi, EDI_SPILL(%esp)
+    movl    %esi, ESI_SPILL(%esp)
+    movl    %ebx, EBX_SPILL(%esp)
+
+    /* Load ShadowFrame pointer */
+    movl    IN_ARG2(%esp), %edx
+
+    /* Remember the return register */
+    movl    IN_ARG3(%esp), %eax
+    movl    %eax, SHADOWFRAME_RESULT_REGISTER_OFFSET(%edx)
+
+    /* Remember the code_item */
+    movl    IN_ARG1(%esp), %ecx
+    movl    %ecx, SHADOWFRAME_CODE_ITEM_OFFSET(%edx)
+
+    /* set up "named" registers */
+    movl    SHADOWFRAME_NUMBER_OF_VREGS_OFFSET(%edx), %eax
+    leal    SHADOWFRAME_VREGS_OFFSET(%edx), rFP
+    leal    (rFP, %eax, 4), rREFS
+    movl    SHADOWFRAME_DEX_PC_OFFSET(%edx), %eax
+    lea     CODEITEM_INSNS_OFFSET(%ecx), rPC
+    lea     (rPC, %eax, 2), rPC
+    EXPORT_PC
+
+    /* Starting ibase */
+    REFRESH_IBASE
+
+    /* start executing the instruction at rPC */
+    FETCH_INST
+    GOTO_NEXT
+    /* NOTE: no fallthrough */
diff --git a/runtime/interpreter/mterp/x86/fallback.S b/runtime/interpreter/mterp/x86/fallback.S
new file mode 100644
index 0000000..8d61166
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/fallback.S
@@ -0,0 +1,3 @@
+/* Transfer stub to alternate interpreter */
+    jmp     MterpFallback
+
diff --git a/runtime/interpreter/mterp/x86/footer.S b/runtime/interpreter/mterp/x86/footer.S
new file mode 100644
index 0000000..a2a36c4
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/footer.S
@@ -0,0 +1,186 @@
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align  2
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+#define MTERP_LOGGING 0
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogDivideByZeroException
+#endif
+    jmp     MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogArrayIndexException
+#endif
+    jmp     MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogNegativeArraySizeException
+#endif
+    jmp     MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogNoSuchMethodException
+#endif
+    jmp     MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogNullObjectException
+#endif
+    jmp     MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    call    MterpLogExceptionThrownException
+#endif
+    jmp     MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    THREAD_FLAGS_OFFSET(%eax), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    call    MterpLogSuspendFallback
+#endif
+    jmp     MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    movl    rSELF, %eax
+    testl   $$-1, THREAD_EXCEPTION_OFFSET(%eax)
+    jz      MterpFallback
+    /* intentional fallthrough - handle pending exception. */
+
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpHandleException
+    testl   %eax, %eax
+    jz      MterpExceptionReturn
+    REFRESH_IBASE
+    movl    OFF_FP_CODE_ITEM(rFP), %eax
+    movl    OFF_FP_DEX_PC(rFP), %ecx
+    lea     CODEITEM_INSNS_OFFSET(%eax), rPC
+    lea     (rPC, %ecx, 2), rPC
+    movl    rPC, OFF_FP_DEX_PC_PTR(rFP)
+    /* resume execution at catch block */
+    FETCH_INST
+    GOTO_NEXT
+    /* NOTE: no fallthrough */
+
+/*
+ * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
+ */
+MterpCheckSuspendAndContinue:
+    movl    rSELF, %eax
+    EXPORT_PC
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    MterpSuspendCheck
+    REFRESH_IBASE
+1:
+    GOTO_NEXT
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpLogFallback
+#endif
+MterpCommonFallback:
+    xor     %eax, %eax
+    jmp     MterpDone
+
+/*
+ * On entry:
+ *  uint32_t* rFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    movl    $$1, %eax
+    jmp     MterpDone
+MterpReturn:
+    movl    OFF_FP_RESULT_REGISTER(rFP), %edx
+    movl    %eax, (%edx)
+    movl    %ecx, 4(%edx)
+    mov     $$1, %eax
+MterpDone:
+    /* Restore callee save register */
+    movl    EBP_SPILL(%esp), %ebp
+    movl    EDI_SPILL(%esp), %edi
+    movl    ESI_SPILL(%esp), %esi
+    movl    EBX_SPILL(%esp), %ebx
+
+    /* pop up frame */
+    addl    $$FRAME_SIZE, %esp
+    .cfi_adjust_cfa_offset -FRAME_SIZE
+    ret
+
+    .cfi_endproc
+    .size   ExecuteMterpImpl, .-ExecuteMterpImpl
diff --git a/runtime/interpreter/mterp/x86/fpcmp.S b/runtime/interpreter/mterp/x86/fpcmp.S
new file mode 100644
index 0000000..2b98667
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/fpcmp.S
@@ -0,0 +1,35 @@
+%default {"suff":"d","nanval":"pos"}
+/*
+ * Compare two floating-point values.  Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ *     if (x == y) {
+ *         return 0;
+ *     } else if (x < y) {
+ *         return -1;
+ *     } else if (x > y) {
+ *         return 1;
+ *     } else {
+ *         return nanval ? 1 : -1;
+ *     }
+ * }
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx<- CC
+    movzbl  2(rPC), %eax                    # eax<- BB
+    movs${suff} VREG_ADDRESS(%eax), %xmm0
+    xor     %eax, %eax
+    ucomis${suff} VREG_ADDRESS(%ecx), %xmm0
+    jp      .L${opcode}_nan_is_${nanval}
+    je      .L${opcode}_finish
+    jb      .L${opcode}_less
+.L${opcode}_nan_is_pos:
+    incl    %eax
+    jmp     .L${opcode}_finish
+.L${opcode}_nan_is_neg:
+.L${opcode}_less:
+    decl    %eax
+.L${opcode}_finish:
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/fpcvt.S b/runtime/interpreter/mterp/x86/fpcvt.S
new file mode 100644
index 0000000..7808285
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/fpcvt.S
@@ -0,0 +1,17 @@
+%default {"instr":"","load":"","store":"","wide":"0"}
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    $load   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $$0xf, %cl                      # ecx <- A
+    $instr
+    $store  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if $wide
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/header.S b/runtime/interpreter/mterp/x86/header.S
new file mode 100644
index 0000000..2481785
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/header.S
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat rFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via rFP &
+  number_of_vregs_.
+
+ */
+
+/*
+x86 ABI general notes:
+
+Caller save set:
+   eax, edx, ecx, st(0)-st(7)
+Callee save set:
+   ebx, esi, edi, ebp
+Return regs:
+   32-bit in eax
+   64-bit in edx:eax (low-order 32 in eax)
+   fp on top of fp stack st(0)
+
+Parameters passed on stack, pushed right-to-left.  On entry to target, first
+parm is at 4(%esp).  Traditional entry code is:
+
+functEntry:
+    push    %ebp             # save old frame pointer
+    mov     %ebp,%esp        # establish new frame pointer
+    sub     FrameSize,%esp   # Allocate storage for spill, locals & outs
+
+Once past the prologue, arguments are referenced at ((argno + 2)*4)(%ebp)
+
+Stack must be 16-byte aligned to support SSE in native code.
+
+If we're not doing variable stack allocation (alloca), the frame pointer can be
+eliminated and all arg references adjusted to be esp relative.
+*/
+
+/*
+Mterp and x86 notes:
+
+Some key interpreter variables will be assigned to registers.
+
+  nick     reg   purpose
+  rPC      esi   interpreted program counter, used for fetching instructions
+  rFP      edi   interpreted frame pointer, used for accessing locals and args
+  rINSTw   bx    first 16-bit code of current instruction
+  rINSTbl  bl    opcode portion of instruction word
+  rINSTbh  bh    high byte of inst word, usually contains src/tgt reg names
+  rIBASE   edx   base of instruction handler table
+  rREFS    ebp   base of object references in shadow frame.
+
+Notes:
+   o High order 16 bits of ebx must be zero on entry to handler
+   o rPC, rFP, rINSTw/rINSTbl valid on handler entry and exit
+   o eax and ecx are scratch, rINSTw/ebx sometimes scratch
+
+Macros are provided for common operations.  Each macro MUST emit only
+one instruction to make instruction-counting easier.  They MUST NOT alter
+unspecified registers or condition codes.
+*/
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+/* Frame size must be 16-byte aligned.
+ * Remember about 4 bytes for return address
+ */
+#define FRAME_SIZE     44
+
+/* Frame diagram while executing ExecuteMterpImpl, high to low addresses */
+#define IN_ARG3        (FRAME_SIZE + 16)
+#define IN_ARG2        (FRAME_SIZE + 12)
+#define IN_ARG1        (FRAME_SIZE +  8)
+#define IN_ARG0        (FRAME_SIZE +  4)
+#define CALLER_RP      (FRAME_SIZE +  0)
+/* Spill offsets relative to %esp */
+#define EBP_SPILL      (FRAME_SIZE -  4)
+#define EDI_SPILL      (FRAME_SIZE -  8)
+#define ESI_SPILL      (FRAME_SIZE - 12)
+#define EBX_SPILL      (FRAME_SIZE - 16)
+#define LOCAL0         (FRAME_SIZE - 20)
+#define LOCAL1         (FRAME_SIZE - 24)
+#define LOCAL2         (FRAME_SIZE - 28)
+/* Out Arg offsets, relative to %esp */
+#define OUT_ARG3       ( 12)
+#define OUT_ARG2       (  8)
+#define OUT_ARG1       (  4)
+#define OUT_ARG0       (  0)  /* <- ExecuteMterpImpl esp + 0 */
+
+/* During bringup, we'll use the shadow frame model instead of rFP */
+/* single-purpose registers, given names for clarity */
+#define rSELF    IN_ARG0(%esp)
+#define rPC      %esi
+#define rFP      %edi
+#define rINST    %ebx
+#define rINSTw   %bx
+#define rINSTbh  %bh
+#define rINSTbl  %bl
+#define rIBASE   %edx
+#define rREFS    %ebp
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
+/*
+ *
+ * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
+ * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
+ * mterp should do so as well.
+ */
+#define MTERP_SUSPEND 0
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    movl    rPC, OFF_FP_DEX_PC_PTR(rFP)
+.endm
+
+/*
+ * Refresh handler table.
+ * IBase handles uses the caller save register so we must restore it after each call.
+ * Also it is used as a result of some 64-bit operations (like imul) and we should
+ * restore it in such cases also.
+ *
+ * TODO: Consider spilling the IBase instead of restoring it from Thread structure.
+ */
+.macro REFRESH_IBASE
+    movl    rSELF, rIBASE
+    movl    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
+.endm
+
+/*
+ * If rSELF is already loaded then we can use it from known reg.
+ */
+.macro REFRESH_IBASE_FROM_SELF _reg
+    movl    THREAD_CURRENT_IBASE_OFFSET(\_reg), rIBASE
+.endm
+
+/*
+ * Refresh rINST.
+ * At enter to handler rINST does not contain the opcode number.
+ * However some utilities require the full value, so this macro
+ * restores the opcode number.
+ */
+.macro REFRESH_INST _opnum
+    movb    rINSTbl, rINSTbh
+    movb    $$\_opnum, rINSTbl
+.endm
+
+/*
+ * Fetch the next instruction from rPC into rINSTw.  Does not advance rPC.
+ */
+.macro FETCH_INST
+    movzwl  (rPC), rINST
+.endm
+
+/*
+ * Remove opcode from rINST, compute the address of handler and jump to it.
+ */
+.macro GOTO_NEXT
+    movzx   rINSTbl,%eax
+    movzbl  rINSTbh,rINST
+    shll    $$${handler_size_bits}, %eax
+    addl    rIBASE, %eax
+    jmp     *%eax
+.endm
+
+/*
+ * Advance rPC by instruction count.
+ */
+.macro ADVANCE_PC _count
+    leal    2*\_count(rPC), rPC
+.endm
+
+/*
+ * Advance rPC by instruction count, fetch instruction and jump to handler.
+ */
+.macro ADVANCE_PC_FETCH_AND_GOTO_NEXT _count
+    ADVANCE_PC \_count
+    FETCH_INST
+    GOTO_NEXT
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+#define VREG_ADDRESS(_vreg) (rFP,_vreg,4)
+#define VREG_HIGH_ADDRESS(_vreg) 4(rFP,_vreg,4)
+#define VREG_REF_ADDRESS(_vreg) (rREFS,_vreg,4)
+#define VREG_REF_HIGH_ADDRESS(_vreg) 4(rREFS,_vreg,4)
+
+.macro GET_VREG _reg _vreg
+    movl    (rFP,\_vreg,4), \_reg
+.endm
+
+/* Read wide value to xmm. */
+.macro GET_WIDE_FP_VREG _reg _vreg
+    movq    (rFP,\_vreg,4), \_reg
+.endm
+
+.macro SET_VREG _reg _vreg
+    movl    \_reg, (rFP,\_vreg,4)
+    movl    $$0, (rREFS,\_vreg,4)
+.endm
+
+/* Write wide value from xmm. xmm is clobbered. */
+.macro SET_WIDE_FP_VREG _reg _vreg
+    movq    \_reg, (rFP,\_vreg,4)
+    pxor    \_reg, \_reg
+    movq    \_reg, (rREFS,\_vreg,4)
+.endm
+
+.macro SET_VREG_OBJECT _reg _vreg
+    movl    \_reg, (rFP,\_vreg,4)
+    movl    \_reg, (rREFS,\_vreg,4)
+.endm
+
+.macro GET_VREG_HIGH _reg _vreg
+    movl    4(rFP,\_vreg,4), \_reg
+.endm
+
+.macro SET_VREG_HIGH _reg _vreg
+    movl    \_reg, 4(rFP,\_vreg,4)
+    movl    $$0, 4(rREFS,\_vreg,4)
+.endm
+
+.macro CLEAR_REF _vreg
+    movl    $$0,  (rREFS,\_vreg,4)
+.endm
+
+.macro CLEAR_WIDE_REF _vreg
+    movl    $$0,  (rREFS,\_vreg,4)
+    movl    $$0, 4(rREFS,\_vreg,4)
+.endm
diff --git a/runtime/interpreter/mterp/x86/invoke.S b/runtime/interpreter/mterp/x86/invoke.S
new file mode 100644
index 0000000..80f7822
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/invoke.S
@@ -0,0 +1,20 @@
+%default { "helper":"UndefinedInvokeHandler" }
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern $helper
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_ARG3(%esp)
+    call    $helper
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_add_double.S b/runtime/interpreter/mterp/x86/op_add_double.S
new file mode 100644
index 0000000..de2708f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_double.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"adds","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_add_double_2addr.S b/runtime/interpreter/mterp/x86/op_add_double_2addr.S
new file mode 100644
index 0000000..538c9ab
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_double_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"adds","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_add_float.S b/runtime/interpreter/mterp/x86/op_add_float.S
new file mode 100644
index 0000000..80b1736
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_float.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"adds","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_add_float_2addr.S b/runtime/interpreter/mterp/x86/op_add_float_2addr.S
new file mode 100644
index 0000000..6649253
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_float_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"adds","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_add_int.S b/runtime/interpreter/mterp/x86/op_add_int.S
new file mode 100644
index 0000000..f71a56b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_int.S
@@ -0,0 +1 @@
+%include "x86/binop.S" {"instr":"addl    (rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_add_int_2addr.S b/runtime/interpreter/mterp/x86/op_add_int_2addr.S
new file mode 100644
index 0000000..5d43b65
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/binop2addr.S" {"instr":"addl    %eax, (rFP,%ecx,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_add_int_lit16.S b/runtime/interpreter/mterp/x86/op_add_int_lit16.S
new file mode 100644
index 0000000..4f34d17
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_int_lit16.S
@@ -0,0 +1 @@
+%include "x86/binopLit16.S" {"instr":"addl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_add_int_lit8.S b/runtime/interpreter/mterp/x86/op_add_int_lit8.S
new file mode 100644
index 0000000..3f14744
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"addl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_add_long.S b/runtime/interpreter/mterp/x86/op_add_long.S
new file mode 100644
index 0000000..dce0c26
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_long.S
@@ -0,0 +1 @@
+%include "x86/binopWide.S" {"instr1":"addl    (rFP,%ecx,4), rIBASE", "instr2":"adcl    4(rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_add_long_2addr.S b/runtime/interpreter/mterp/x86/op_add_long_2addr.S
new file mode 100644
index 0000000..7847640
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_long_2addr.S
@@ -0,0 +1 @@
+%include "x86/binopWide2addr.S" {"instr1":"addl    %eax, (rFP,rINST,4)","instr2":"adcl    %ecx, 4(rFP,rINST,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_aget.S b/runtime/interpreter/mterp/x86/op_aget.S
new file mode 100644
index 0000000..52b5236
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aget.S
@@ -0,0 +1,19 @@
+%default { "load":"movl", "shift":"4", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    $load   $data_offset(%eax,%ecx,$shift), %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_aget_boolean.S b/runtime/interpreter/mterp/x86/op_aget_boolean.S
new file mode 100644
index 0000000..d910c94
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aget_boolean.S
@@ -0,0 +1 @@
+%include "x86/op_aget.S" { "load":"movzbl", "shift":"1", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aget_byte.S b/runtime/interpreter/mterp/x86/op_aget_byte.S
new file mode 100644
index 0000000..aba9ffc
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aget_byte.S
@@ -0,0 +1 @@
+%include "x86/op_aget.S" { "load":"movsbl", "shift":"1", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aget_char.S b/runtime/interpreter/mterp/x86/op_aget_char.S
new file mode 100644
index 0000000..748e410
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aget_char.S
@@ -0,0 +1 @@
+%include "x86/op_aget.S" { "load":"movzwl", "shift":"2", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aget_object.S b/runtime/interpreter/mterp/x86/op_aget_object.S
new file mode 100644
index 0000000..61f3e91
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aget_object.S
@@ -0,0 +1,20 @@
+/*
+ * Array object get.  vAA <- vBB[vCC].
+ *
+ * for: aget-object
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecs <- vCC (requested index)
+    EXPORT_PC
+    movl    %eax, OUT_ARG0(%esp)
+    movl    %ecx, OUT_ARG1(%esp)
+    call    artAGetObjectFromMterp          # (array, index)
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    SET_VREG_OBJECT %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_aget_short.S b/runtime/interpreter/mterp/x86/op_aget_short.S
new file mode 100644
index 0000000..6eaf5d9
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aget_short.S
@@ -0,0 +1 @@
+%include "x86/op_aget.S" { "load":"movswl", "shift":"2", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aget_wide.S b/runtime/interpreter/mterp/x86/op_aget_wide.S
new file mode 100644
index 0000000..663adc6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aget_wide.S
@@ -0,0 +1,16 @@
+/*
+ * Array get, 64 bits.  vAA <- vBB[vCC].
+ */
+    /* aget-wide vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_WIDE_ARRAY_DATA_OFFSET(%eax,%ecx,8), %eax
+    movq    (%eax), %xmm0                   # xmm0 <- vBB[vCC]
+    SET_WIDE_FP_VREG %xmm0 rINST            # vAA <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_and_int.S b/runtime/interpreter/mterp/x86/op_and_int.S
new file mode 100644
index 0000000..6272c4e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_and_int.S
@@ -0,0 +1 @@
+%include "x86/binop.S" {"instr":"andl    (rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_and_int_2addr.S b/runtime/interpreter/mterp/x86/op_and_int_2addr.S
new file mode 100644
index 0000000..95df873
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_and_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/binop2addr.S" {"instr":"andl    %eax, (rFP,%ecx,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_and_int_lit16.S b/runtime/interpreter/mterp/x86/op_and_int_lit16.S
new file mode 100644
index 0000000..b062064
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_and_int_lit16.S
@@ -0,0 +1 @@
+%include "x86/binopLit16.S" {"instr":"andl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_and_int_lit8.S b/runtime/interpreter/mterp/x86/op_and_int_lit8.S
new file mode 100644
index 0000000..99915df
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_and_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"andl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_and_long.S b/runtime/interpreter/mterp/x86/op_and_long.S
new file mode 100644
index 0000000..f8514ea
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_and_long.S
@@ -0,0 +1 @@
+%include "x86/binopWide.S" {"instr1":"andl    (rFP,%ecx,4), rIBASE", "instr2":"andl    4(rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_and_long_2addr.S b/runtime/interpreter/mterp/x86/op_and_long_2addr.S
new file mode 100644
index 0000000..37249b8
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_and_long_2addr.S
@@ -0,0 +1 @@
+%include "x86/binopWide2addr.S" {"instr1":"andl    %eax, (rFP,rINST,4)","instr2":"andl    %ecx, 4(rFP,rINST,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_aput.S b/runtime/interpreter/mterp/x86/op_aput.S
new file mode 100644
index 0000000..2ea465d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aput.S
@@ -0,0 +1,20 @@
+%default { "reg":"rINST", "store":"movl", "shift":"4", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    $data_offset(%eax,%ecx,$shift), %eax
+    GET_VREG rINST rINST
+    $store  $reg, (%eax)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_aput_boolean.S b/runtime/interpreter/mterp/x86/op_aput_boolean.S
new file mode 100644
index 0000000..e7fdd53
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aput_boolean.S
@@ -0,0 +1 @@
+%include "x86/op_aput.S" { "reg":"rINSTbl", "store":"movb", "shift":"1", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aput_byte.S b/runtime/interpreter/mterp/x86/op_aput_byte.S
new file mode 100644
index 0000000..491d03c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aput_byte.S
@@ -0,0 +1 @@
+%include "x86/op_aput.S" { "reg":"rINSTbl", "store":"movb", "shift":"1", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aput_char.S b/runtime/interpreter/mterp/x86/op_aput_char.S
new file mode 100644
index 0000000..ca42cf0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aput_char.S
@@ -0,0 +1 @@
+%include "x86/op_aput.S" { "reg":"rINSTw", "store":"movw", "shift":"2", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aput_object.S b/runtime/interpreter/mterp/x86/op_aput_object.S
new file mode 100644
index 0000000..2af5acb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aput_object.S
@@ -0,0 +1,15 @@
+/*
+ * Store an object into an array.  vBB[vCC] <- vAA.
+ */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_ARG2(%esp)
+    call    MterpAputObject            # (array, index)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_aput_short.S b/runtime/interpreter/mterp/x86/op_aput_short.S
new file mode 100644
index 0000000..5e63482
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aput_short.S
@@ -0,0 +1 @@
+%include "x86/op_aput.S" { "reg":"rINSTw", "store":"movw", "shift":"2", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aput_wide.S b/runtime/interpreter/mterp/x86/op_aput_wide.S
new file mode 100644
index 0000000..7a33371
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aput_wide.S
@@ -0,0 +1,17 @@
+/*
+ * Array put, 64 bits.  vBB[vCC] <- vAA.
+ *
+ */
+    /* aput-wide vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB (array object)
+    GET_VREG %ecx %ecx                      # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_WIDE_ARRAY_DATA_OFFSET(%eax,%ecx,8), %eax
+    GET_WIDE_FP_VREG %xmm0 rINST            # xmm0 <- vAA
+    movq    %xmm0, (%eax)                   # vBB[vCC] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_array_length.S b/runtime/interpreter/mterp/x86/op_array_length.S
new file mode 100644
index 0000000..3e42a7c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_array_length.S
@@ -0,0 +1,12 @@
+/*
+ * Return the length of an array.
+ */
+    mov     rINST, %eax                     # eax <- BA
+    sarl    $$4, rINST                      # rINST <- B
+    GET_VREG %ecx rINST                     # ecx <- vB (object ref)
+    testl   %ecx, %ecx                      # is null?
+    je      common_errNullObject
+    andb    $$0xf, %al                      # eax <- A
+    movl    MIRROR_ARRAY_LENGTH_OFFSET(%ecx), rINST
+    SET_VREG rINST %eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_check_cast.S b/runtime/interpreter/mterp/x86/op_check_cast.S
new file mode 100644
index 0000000..018432a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_check_cast.S
@@ -0,0 +1,18 @@
+/*
+ * Check to see if a cast from one class to another is allowed.
+ */
+    /* check-cast vAA, class@BBBB */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    leal    VREG_ADDRESS(rINST), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    OFF_FP_METHOD(rFP),%eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    MterpCheckCast                  # (index, &obj, method, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_cmp_long.S b/runtime/interpreter/mterp/x86/op_cmp_long.S
new file mode 100644
index 0000000..bd86738
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_cmp_long.S
@@ -0,0 +1,27 @@
+/*
+ * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
+ * register based on the results of the comparison.
+ */
+    /* cmp-long vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG_HIGH %eax %eax                 # eax <- v[BB+1], BB is clobbered
+    cmpl    VREG_HIGH_ADDRESS(%ecx), %eax
+    jl      .L${opcode}_smaller
+    jg      .L${opcode}_bigger
+    movzbl  2(rPC), %eax                    # eax <- BB, restore BB
+    GET_VREG %eax %eax                      # eax <- v[BB]
+    sub     VREG_ADDRESS(%ecx), %eax
+    ja      .L${opcode}_bigger
+    jb      .L${opcode}_smaller
+.L${opcode}_finish:
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.L${opcode}_bigger:
+    movl    $$1, %eax
+    jmp     .L${opcode}_finish
+
+.L${opcode}_smaller:
+    movl    $$-1, %eax
+    jmp     .L${opcode}_finish
diff --git a/runtime/interpreter/mterp/x86/op_cmpg_double.S b/runtime/interpreter/mterp/x86/op_cmpg_double.S
new file mode 100644
index 0000000..a73ba55
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_cmpg_double.S
@@ -0,0 +1 @@
+%include "x86/fpcmp.S" {"suff":"d","nanval":"pos"}
diff --git a/runtime/interpreter/mterp/x86/op_cmpg_float.S b/runtime/interpreter/mterp/x86/op_cmpg_float.S
new file mode 100644
index 0000000..648051b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_cmpg_float.S
@@ -0,0 +1 @@
+%include "x86/fpcmp.S" {"suff":"s","nanval":"pos"}
diff --git a/runtime/interpreter/mterp/x86/op_cmpl_double.S b/runtime/interpreter/mterp/x86/op_cmpl_double.S
new file mode 100644
index 0000000..058163e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_cmpl_double.S
@@ -0,0 +1 @@
+%include "x86/fpcmp.S" {"suff":"d","nanval":"neg"}
diff --git a/runtime/interpreter/mterp/x86/op_cmpl_float.S b/runtime/interpreter/mterp/x86/op_cmpl_float.S
new file mode 100644
index 0000000..302f078
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_cmpl_float.S
@@ -0,0 +1 @@
+%include "x86/fpcmp.S" {"suff":"s","nanval":"neg"}
diff --git a/runtime/interpreter/mterp/x86/op_const.S b/runtime/interpreter/mterp/x86/op_const.S
new file mode 100644
index 0000000..dc69530
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const.S
@@ -0,0 +1,4 @@
+    /* const vAA, #+BBBBbbbb */
+    movl    2(rPC), %eax                    # grab all 32 bits at once
+    SET_VREG %eax rINST                     # vAA<- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_const_16.S b/runtime/interpreter/mterp/x86/op_const_16.S
new file mode 100644
index 0000000..f5707cf
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_16.S
@@ -0,0 +1,4 @@
+    /* const/16 vAA, #+BBBB */
+    movswl  2(rPC), %ecx                    # ecx <- ssssBBBB
+    SET_VREG %ecx rINST                     # vAA <- ssssBBBB
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_4.S b/runtime/interpreter/mterp/x86/op_const_4.S
new file mode 100644
index 0000000..c336411
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_4.S
@@ -0,0 +1,7 @@
+    /* const/4 vA, #+B */
+    movsx   rINSTbl, %eax                   # eax <-ssssssBx
+    movl    $$0xf, rINST
+    andl    %eax, rINST                     # rINST <- A
+    sarl    $$4, %eax
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_const_class.S b/runtime/interpreter/mterp/x86/op_const_class.S
new file mode 100644
index 0000000..eceb8bc
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_class.S
@@ -0,0 +1,14 @@
+    /* const/class vAA, Class@BBBB */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, OUT_ARG1(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    MterpConstClass                 # (index, tgt_reg, shadow_frame, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_high16.S b/runtime/interpreter/mterp/x86/op_const_high16.S
new file mode 100644
index 0000000..da78d1b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_high16.S
@@ -0,0 +1,5 @@
+    /* const/high16 vAA, #+BBBB0000 */
+    movzwl  2(rPC), %eax                    # eax <- 0000BBBB
+    sall    $$16, %eax                      # eax <- BBBB0000
+    SET_VREG %eax rINST                     # vAA <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_string.S b/runtime/interpreter/mterp/x86/op_const_string.S
new file mode 100644
index 0000000..9acd6fe
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_string.S
@@ -0,0 +1,14 @@
+    /* const/string vAA, String@BBBB */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, OUT_ARG1(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    MterpConstString                # (index, tgt_reg, shadow_frame, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_string_jumbo.S b/runtime/interpreter/mterp/x86/op_const_string_jumbo.S
new file mode 100644
index 0000000..5c728b2
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_string_jumbo.S
@@ -0,0 +1,14 @@
+    /* const/string vAA, String@BBBBBBBB */
+    EXPORT_PC
+    movl    2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, OUT_ARG1(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    MterpConstString                # (index, tgt_reg, shadow_frame, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_const_wide.S b/runtime/interpreter/mterp/x86/op_const_wide.S
new file mode 100644
index 0000000..745490e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_wide.S
@@ -0,0 +1,7 @@
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    movl    2(rPC), %eax                    # eax <- lsw
+    movzbl  rINSTbl, %ecx                   # ecx <- AA
+    movl    6(rPC), rINST                   # rINST <- msw
+    SET_VREG %eax %ecx
+    SET_VREG_HIGH  rINST %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 5
diff --git a/runtime/interpreter/mterp/x86/op_const_wide_16.S b/runtime/interpreter/mterp/x86/op_const_wide_16.S
new file mode 100644
index 0000000..8029cfe
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_wide_16.S
@@ -0,0 +1,8 @@
+    /* const-wide/16 vAA, #+BBBB */
+    movswl  2(rPC), %eax                    # eax <- ssssBBBB
+    movl    rIBASE, %ecx                    # preserve rIBASE (cltd trashes it)
+    cltd                                    # rIBASE:eax <- ssssssssssssBBBB
+    SET_VREG_HIGH rIBASE rINST              # store msw
+    SET_VREG %eax rINST                     # store lsw
+    movl    %ecx, rIBASE                    # restore rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_wide_32.S b/runtime/interpreter/mterp/x86/op_const_wide_32.S
new file mode 100644
index 0000000..3e23d3a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_wide_32.S
@@ -0,0 +1,8 @@
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    movl    2(rPC), %eax                    # eax <- BBBBbbbb
+    movl    rIBASE, %ecx                    # preserve rIBASE (cltd trashes it)
+    cltd                                    # rIBASE:eax <- ssssssssssssBBBB
+    SET_VREG_HIGH rIBASE rINST              # store msw
+    SET_VREG %eax rINST                     # store lsw
+    movl    %ecx, rIBASE                    # restore rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_const_wide_high16.S b/runtime/interpreter/mterp/x86/op_const_wide_high16.S
new file mode 100644
index 0000000..d2a1119
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_wide_high16.S
@@ -0,0 +1,7 @@
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    movzwl  2(rPC), %eax                    # eax <- 0000BBBB
+    sall    $$16, %eax                      # eax <- BBBB0000
+    SET_VREG_HIGH %eax rINST                # v[AA+1] <- eax
+    xorl    %eax, %eax
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_div_double.S b/runtime/interpreter/mterp/x86/op_div_double.S
new file mode 100644
index 0000000..575716d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_double.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"divs","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_div_double_2addr.S b/runtime/interpreter/mterp/x86/op_div_double_2addr.S
new file mode 100644
index 0000000..8229a31
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_double_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"divs","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_div_float.S b/runtime/interpreter/mterp/x86/op_div_float.S
new file mode 100644
index 0000000..250f1dc
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_float.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"divs","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_div_float_2addr.S b/runtime/interpreter/mterp/x86/op_div_float_2addr.S
new file mode 100644
index 0000000..c30d148
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_float_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"divs","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_div_int.S b/runtime/interpreter/mterp/x86/op_div_int.S
new file mode 100644
index 0000000..5fc8fa5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_int.S
@@ -0,0 +1 @@
+%include "x86/bindiv.S" {"result":"%eax","special":"$0x80000000","rem":"0"}
diff --git a/runtime/interpreter/mterp/x86/op_div_int_2addr.S b/runtime/interpreter/mterp/x86/op_div_int_2addr.S
new file mode 100644
index 0000000..04cf1ba
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/bindiv2addr.S" {"result":"%eax","special":"$0x80000000"}
diff --git a/runtime/interpreter/mterp/x86/op_div_int_lit16.S b/runtime/interpreter/mterp/x86/op_div_int_lit16.S
new file mode 100644
index 0000000..dd396bb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_int_lit16.S
@@ -0,0 +1 @@
+%include "x86/bindivLit16.S" {"result":"%eax","special":"$0x80000000"}
diff --git a/runtime/interpreter/mterp/x86/op_div_int_lit8.S b/runtime/interpreter/mterp/x86/op_div_int_lit8.S
new file mode 100644
index 0000000..3cbd9d0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/bindivLit8.S" {"result":"%eax","special":"$0x80000000"}
diff --git a/runtime/interpreter/mterp/x86/op_div_long.S b/runtime/interpreter/mterp/x86/op_div_long.S
new file mode 100644
index 0000000..5772826
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_long.S
@@ -0,0 +1,23 @@
+%default {"routine":"art_quick_ldiv"}
+/* art_quick_* methods has quick abi,
+ *   so use eax, ecx, edx, ebx for args
+ */
+    /* div vAA, vBB, vCC */
+    .extern $routine
+    mov     rIBASE, LOCAL0(%esp)            # save rIBASE/%edx
+    mov     rINST, LOCAL1(%esp)             # save rINST/%ebx
+    movzbl  3(rPC), %eax                    # eax <- CC
+    GET_VREG %ecx %eax
+    GET_VREG_HIGH %ebx %eax
+    movl    %ecx, %edx
+    orl     %ebx, %ecx
+    jz      common_errDivideByZero
+    movzbl  2(rPC), %eax                    # eax <- BB
+    GET_VREG_HIGH %ecx %eax
+    GET_VREG %eax %eax
+    call    $routine
+    mov     LOCAL1(%esp), rINST             # restore rINST/%ebx
+    SET_VREG_HIGH rIBASE rINST
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE            # restore rIBASE/%edx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_div_long_2addr.S b/runtime/interpreter/mterp/x86/op_div_long_2addr.S
new file mode 100644
index 0000000..2696042
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_long_2addr.S
@@ -0,0 +1,25 @@
+%default {"routine":"art_quick_ldiv"}
+/* art_quick_* methods has quick abi,
+ *   so use eax, ecx, edx, ebx for args
+ */
+    /* div/2addr vA, vB */
+    .extern   $routine
+    mov     rIBASE, LOCAL0(%esp)            # save rIBASE/%edx
+    movzbl  rINSTbl, %eax
+    shrl    $$4, %eax                       # eax <- B
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    mov     rINST, LOCAL1(%esp)             # save rINST/%ebx
+    movl    %ebx, %ecx
+    GET_VREG %edx %eax
+    GET_VREG_HIGH %ebx %eax
+    movl    %edx, %eax
+    orl     %ebx, %eax
+    jz      common_errDivideByZero
+    GET_VREG %eax %ecx
+    GET_VREG_HIGH %ecx %ecx
+    call    $routine
+    mov     LOCAL1(%esp), rINST             # restore rINST/%ebx
+    SET_VREG_HIGH rIBASE rINST
+    SET_VREG %eax rINST
+    mov     LOCAL0(%esp), rIBASE            # restore rIBASE/%edx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_double_to_float.S b/runtime/interpreter/mterp/x86/op_double_to_float.S
new file mode 100644
index 0000000..5135d60
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_double_to_float.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"load":"fldl","store":"fstps"}
diff --git a/runtime/interpreter/mterp/x86/op_double_to_int.S b/runtime/interpreter/mterp/x86/op_double_to_int.S
new file mode 100644
index 0000000..9c4e11c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_double_to_int.S
@@ -0,0 +1 @@
+%include "x86/cvtfp_int.S" {"srcdouble":"1","tgtlong":"0"}
diff --git a/runtime/interpreter/mterp/x86/op_double_to_long.S b/runtime/interpreter/mterp/x86/op_double_to_long.S
new file mode 100644
index 0000000..fe0eee2
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_double_to_long.S
@@ -0,0 +1 @@
+%include "x86/cvtfp_int.S" {"srcdouble":"1","tgtlong":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_fill_array_data.S b/runtime/interpreter/mterp/x86/op_fill_array_data.S
new file mode 100644
index 0000000..0cb05f6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_fill_array_data.S
@@ -0,0 +1,12 @@
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC
+    movl    2(rPC), %ecx                    # ecx <- BBBBbbbb
+    leal    (rPC,%ecx,2), %ecx              # ecx <- PC + BBBBbbbb*2
+    GET_VREG %eax rINST                     # eax <- vAA (array object)
+    movl    %eax, OUT_ARG0(%esp)
+    movl    %ecx, OUT_ARG1(%esp)
+    call    MterpFillArrayData              # (obj, payload)
+    REFRESH_IBASE
+    testl   %eax, %eax                      # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_filled_new_array.S b/runtime/interpreter/mterp/x86/op_filled_new_array.S
new file mode 100644
index 0000000..c08b09f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_filled_new_array.S
@@ -0,0 +1,20 @@
+%default { "helper":"MterpFilledNewArray" }
+/*
+ * Create a new array with elements filled from registers.
+ *
+ * for: filled-new-array, filled-new-array/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+    .extern $helper
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)
+    call    $helper
+    REFRESH_IBASE
+    testl   %eax, %eax                      # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_filled_new_array_range.S b/runtime/interpreter/mterp/x86/op_filled_new_array_range.S
new file mode 100644
index 0000000..841059e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_filled_new_array_range.S
@@ -0,0 +1 @@
+%include "x86/op_filled_new_array.S" { "helper":"MterpFilledNewArrayRange" }
diff --git a/runtime/interpreter/mterp/x86/op_float_to_double.S b/runtime/interpreter/mterp/x86/op_float_to_double.S
new file mode 100644
index 0000000..12a3e14
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_float_to_double.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"load":"flds","store":"fstpl","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_float_to_int.S b/runtime/interpreter/mterp/x86/op_float_to_int.S
new file mode 100644
index 0000000..ac57388
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_float_to_int.S
@@ -0,0 +1 @@
+%include "x86/cvtfp_int.S" {"srcdouble":"0","tgtlong":"0"}
diff --git a/runtime/interpreter/mterp/x86/op_float_to_long.S b/runtime/interpreter/mterp/x86/op_float_to_long.S
new file mode 100644
index 0000000..be1d982
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_float_to_long.S
@@ -0,0 +1 @@
+%include "x86/cvtfp_int.S" {"srcdouble":"0","tgtlong":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_goto.S b/runtime/interpreter/mterp/x86/op_goto.S
new file mode 100644
index 0000000..411399d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_goto.S
@@ -0,0 +1,19 @@
+/*
+ * Unconditional branch, 8-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ */
+    /* goto +AA */
+    movsbl  rINSTbl, %eax                   # eax <- ssssssAA
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      1f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+1:
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86/op_goto_16.S b/runtime/interpreter/mterp/x86/op_goto_16.S
new file mode 100644
index 0000000..4f04f9e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_goto_16.S
@@ -0,0 +1,19 @@
+/*
+ * Unconditional branch, 16-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ */
+    /* goto/16 +AAAA */
+    movswl  2(rPC), %eax                    # eax <- ssssAAAA
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      1f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+1:
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86/op_goto_32.S b/runtime/interpreter/mterp/x86/op_goto_32.S
new file mode 100644
index 0000000..48f6e5a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_goto_32.S
@@ -0,0 +1,24 @@
+/*
+ * Unconditional branch, 32-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ *
+ * Unlike most opcodes, this one is allowed to branch to itself, so
+ * our "backward branch" test must be "<=0" instead of "<0".  Because
+ * we need the V bit set, we'll use an adds to convert from Dalvik
+ * offset to byte offset.
+ */
+    /* goto/32 +AAAAAAAA */
+    movl    2(rPC), %eax                    # eax <- AAAAAAAA
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      1f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+1:
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86/op_if_eq.S b/runtime/interpreter/mterp/x86/op_if_eq.S
new file mode 100644
index 0000000..5413d98
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_eq.S
@@ -0,0 +1 @@
+%include "x86/bincmp.S" { "revcmp":"ne" }
diff --git a/runtime/interpreter/mterp/x86/op_if_eqz.S b/runtime/interpreter/mterp/x86/op_if_eqz.S
new file mode 100644
index 0000000..53dc99e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_eqz.S
@@ -0,0 +1 @@
+%include "x86/zcmp.S" { "revcmp":"ne" }
diff --git a/runtime/interpreter/mterp/x86/op_if_ge.S b/runtime/interpreter/mterp/x86/op_if_ge.S
new file mode 100644
index 0000000..c2ba3c6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_ge.S
@@ -0,0 +1 @@
+%include "x86/bincmp.S" { "revcmp":"l" }
diff --git a/runtime/interpreter/mterp/x86/op_if_gez.S b/runtime/interpreter/mterp/x86/op_if_gez.S
new file mode 100644
index 0000000..cd2c772
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_gez.S
@@ -0,0 +1 @@
+%include "x86/zcmp.S" { "revcmp":"l" }
diff --git a/runtime/interpreter/mterp/x86/op_if_gt.S b/runtime/interpreter/mterp/x86/op_if_gt.S
new file mode 100644
index 0000000..9fe84bb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_gt.S
@@ -0,0 +1 @@
+%include "x86/bincmp.S" { "revcmp":"le" }
diff --git a/runtime/interpreter/mterp/x86/op_if_gtz.S b/runtime/interpreter/mterp/x86/op_if_gtz.S
new file mode 100644
index 0000000..b454ffd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_gtz.S
@@ -0,0 +1 @@
+%include "x86/zcmp.S" { "revcmp":"le" }
diff --git a/runtime/interpreter/mterp/x86/op_if_le.S b/runtime/interpreter/mterp/x86/op_if_le.S
new file mode 100644
index 0000000..93571a7
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_le.S
@@ -0,0 +1 @@
+%include "x86/bincmp.S" { "revcmp":"g" }
diff --git a/runtime/interpreter/mterp/x86/op_if_lez.S b/runtime/interpreter/mterp/x86/op_if_lez.S
new file mode 100644
index 0000000..779c77f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_lez.S
@@ -0,0 +1 @@
+%include "x86/zcmp.S" { "revcmp":"g" }
diff --git a/runtime/interpreter/mterp/x86/op_if_lt.S b/runtime/interpreter/mterp/x86/op_if_lt.S
new file mode 100644
index 0000000..1fb1521
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_lt.S
@@ -0,0 +1 @@
+%include "x86/bincmp.S" { "revcmp":"ge" }
diff --git a/runtime/interpreter/mterp/x86/op_if_ltz.S b/runtime/interpreter/mterp/x86/op_if_ltz.S
new file mode 100644
index 0000000..155c356
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_ltz.S
@@ -0,0 +1 @@
+%include "x86/zcmp.S" { "revcmp":"ge" }
diff --git a/runtime/interpreter/mterp/x86/op_if_ne.S b/runtime/interpreter/mterp/x86/op_if_ne.S
new file mode 100644
index 0000000..7e1b065
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_ne.S
@@ -0,0 +1 @@
+%include "x86/bincmp.S" { "revcmp":"e" }
diff --git a/runtime/interpreter/mterp/x86/op_if_nez.S b/runtime/interpreter/mterp/x86/op_if_nez.S
new file mode 100644
index 0000000..8951f5b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_nez.S
@@ -0,0 +1 @@
+%include "x86/zcmp.S" { "revcmp":"e" }
diff --git a/runtime/interpreter/mterp/x86/op_iget.S b/runtime/interpreter/mterp/x86/op_iget.S
new file mode 100644
index 0000000..868ffd0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget.S
@@ -0,0 +1,29 @@
+%default { "is_object":"0", "helper":"artGet32InstanceFromCode"}
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    $helper
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    .if $is_object
+    SET_VREG_OBJECT %eax rINST              # fp[A] <-value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <-value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iget_boolean.S b/runtime/interpreter/mterp/x86/op_iget_boolean.S
new file mode 100644
index 0000000..9ddad04
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_boolean.S
@@ -0,0 +1 @@
+%include "x86/op_iget.S" { "helper":"artGetBooleanInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_boolean_quick.S b/runtime/interpreter/mterp/x86/op_iget_boolean_quick.S
new file mode 100644
index 0000000..02b0c16
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_boolean_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iget_quick.S" { "load":"movsbl" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_byte.S b/runtime/interpreter/mterp/x86/op_iget_byte.S
new file mode 100644
index 0000000..8250788
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_byte.S
@@ -0,0 +1 @@
+%include "x86/op_iget.S" { "helper":"artGetByteInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_byte_quick.S b/runtime/interpreter/mterp/x86/op_iget_byte_quick.S
new file mode 100644
index 0000000..02b0c16
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_byte_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iget_quick.S" { "load":"movsbl" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_char.S b/runtime/interpreter/mterp/x86/op_iget_char.S
new file mode 100644
index 0000000..e9d2156
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_char.S
@@ -0,0 +1 @@
+%include "x86/op_iget.S" { "helper":"artGetCharInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_char_quick.S b/runtime/interpreter/mterp/x86/op_iget_char_quick.S
new file mode 100644
index 0000000..a5d9712
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_char_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iget_quick.S" { "load":"movzwl" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_object.S b/runtime/interpreter/mterp/x86/op_iget_object.S
new file mode 100644
index 0000000..3abeefc
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_object.S
@@ -0,0 +1 @@
+%include "x86/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_object_quick.S b/runtime/interpreter/mterp/x86/op_iget_object_quick.S
new file mode 100644
index 0000000..b09772f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_object_quick.S
@@ -0,0 +1,17 @@
+    /* For: iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    %eax, OUT_ARG1(%esp)
+    EXPORT_PC
+    call    artIGetObjectFromMterp          # (obj, offset)
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $$0xf,rINSTbl                   # rINST <- A
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iget_quick.S b/runtime/interpreter/mterp/x86/op_iget_quick.S
new file mode 100644
index 0000000..372071c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_quick.S
@@ -0,0 +1,13 @@
+%default { "load":"movl"}
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    ${load} (%ecx,%eax,1), %eax
+    andb    $$0xf,rINSTbl                   # rINST <- A
+    SET_VREG %eax rINST                     # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iget_short.S b/runtime/interpreter/mterp/x86/op_iget_short.S
new file mode 100644
index 0000000..c8fad89
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_short.S
@@ -0,0 +1 @@
+%include "x86/op_iget.S" { "helper":"artGetShortInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_short_quick.S b/runtime/interpreter/mterp/x86/op_iget_short_quick.S
new file mode 100644
index 0000000..2c3aeb6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_short_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iget_quick.S" { "load":"movswl" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_wide.S b/runtime/interpreter/mterp/x86/op_iget_wide.S
new file mode 100644
index 0000000..58e5a65
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_wide.S
@@ -0,0 +1,25 @@
+/*
+ * 64-bit instance field get.
+ *
+ * for: iget-wide
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artGet64InstanceFromCode
+    mov     rSELF, %ecx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    SET_VREG %eax rINST
+    SET_VREG_HIGH %edx rINST
+    REFRESH_IBASE_FROM_SELF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iget_wide_quick.S b/runtime/interpreter/mterp/x86/op_iget_wide_quick.S
new file mode 100644
index 0000000..8be336b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_wide_quick.S
@@ -0,0 +1,11 @@
+    /* iget-wide-quick vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    movq    (%ecx,%eax,1), %xmm0
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    SET_WIDE_FP_VREG %xmm0 rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_instance_of.S b/runtime/interpreter/mterp/x86/op_instance_of.S
new file mode 100644
index 0000000..c9bfba5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_instance_of.S
@@ -0,0 +1,26 @@
+/*
+ * Check to see if an object reference is an instance of a class.
+ *
+ * Most common situation is a non-null object, being compared against
+ * an already-resolved class.
+ */
+    /* instance-of vA, vB, class@CCCC */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, %eax                     # eax <- BA
+    sarl    $$4, %eax                       # eax <- B
+    leal    VREG_ADDRESS(%eax), %ecx        # Get object address
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    OFF_FP_METHOD(rFP),%eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    MterpInstanceOf                 # (index, &obj, method, self)
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    andb    $$0xf, rINSTbl                  # rINSTbl <- A
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_int_to_byte.S b/runtime/interpreter/mterp/x86/op_int_to_byte.S
new file mode 100644
index 0000000..b4e8d22
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_int_to_byte.S
@@ -0,0 +1 @@
+%include "x86/unop.S" {"instr":"movsbl  %al, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_int_to_char.S b/runtime/interpreter/mterp/x86/op_int_to_char.S
new file mode 100644
index 0000000..4608971
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_int_to_char.S
@@ -0,0 +1 @@
+%include "x86/unop.S" {"instr":"movzwl  %ax,%eax"}
diff --git a/runtime/interpreter/mterp/x86/op_int_to_double.S b/runtime/interpreter/mterp/x86/op_int_to_double.S
new file mode 100644
index 0000000..3e9921e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_int_to_double.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"load":"fildl","store":"fstpl","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_int_to_float.S b/runtime/interpreter/mterp/x86/op_int_to_float.S
new file mode 100644
index 0000000..849540d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_int_to_float.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"load":"fildl","store":"fstps"}
diff --git a/runtime/interpreter/mterp/x86/op_int_to_long.S b/runtime/interpreter/mterp/x86/op_int_to_long.S
new file mode 100644
index 0000000..736ea69
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_int_to_long.S
@@ -0,0 +1,12 @@
+    /* int to long vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- +A
+    sarl    $$4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    movl    rIBASE, %ecx                    # cltd trashes rIBASE/edx
+    cltd                                    # rINST:eax<- sssssssBBBBBBBB
+    SET_VREG_HIGH rIBASE rINST              # v[A+1] <- rIBASE
+    SET_VREG %eax rINST                     # v[A+0] <- %eax
+    movl    %ecx, rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
diff --git a/runtime/interpreter/mterp/x86/op_int_to_short.S b/runtime/interpreter/mterp/x86/op_int_to_short.S
new file mode 100644
index 0000000..90d0ae6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_int_to_short.S
@@ -0,0 +1 @@
+%include "x86/unop.S" {"instr":"movswl %ax, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_invoke_direct.S b/runtime/interpreter/mterp/x86/op_invoke_direct.S
new file mode 100644
index 0000000..76fb9a6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_direct.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeDirect" }
diff --git a/runtime/interpreter/mterp/x86/op_invoke_direct_range.S b/runtime/interpreter/mterp/x86/op_invoke_direct_range.S
new file mode 100644
index 0000000..a6ab604
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_direct_range.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeDirectRange" }
diff --git a/runtime/interpreter/mterp/x86/op_invoke_interface.S b/runtime/interpreter/mterp/x86/op_invoke_interface.S
new file mode 100644
index 0000000..91c24f5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_interface.S
@@ -0,0 +1,8 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeInterface" }
+/*
+ * Handle an interface method call.
+ *
+ * for: invoke-interface, invoke-interface/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/x86/op_invoke_interface_range.S b/runtime/interpreter/mterp/x86/op_invoke_interface_range.S
new file mode 100644
index 0000000..e478beb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_interface_range.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeInterfaceRange" }
diff --git a/runtime/interpreter/mterp/x86/op_invoke_static.S b/runtime/interpreter/mterp/x86/op_invoke_static.S
new file mode 100644
index 0000000..b4c1236
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_static.S
@@ -0,0 +1,2 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeStatic" }
+
diff --git a/runtime/interpreter/mterp/x86/op_invoke_static_range.S b/runtime/interpreter/mterp/x86/op_invoke_static_range.S
new file mode 100644
index 0000000..3dc8a26
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_static_range.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeStaticRange" }
diff --git a/runtime/interpreter/mterp/x86/op_invoke_super.S b/runtime/interpreter/mterp/x86/op_invoke_super.S
new file mode 100644
index 0000000..be20edd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_super.S
@@ -0,0 +1,8 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeSuper" }
+/*
+ * Handle a "super" method call.
+ *
+ * for: invoke-super, invoke-super/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/x86/op_invoke_super_range.S b/runtime/interpreter/mterp/x86/op_invoke_super_range.S
new file mode 100644
index 0000000..f36bf72
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_super_range.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeSuperRange" }
diff --git a/runtime/interpreter/mterp/x86/op_invoke_virtual.S b/runtime/interpreter/mterp/x86/op_invoke_virtual.S
new file mode 100644
index 0000000..7e9c456
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_virtual.S
@@ -0,0 +1,8 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeVirtual" }
+/*
+ * Handle a virtual method call.
+ *
+ * for: invoke-virtual, invoke-virtual/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/x86/op_invoke_virtual_quick.S b/runtime/interpreter/mterp/x86/op_invoke_virtual_quick.S
new file mode 100644
index 0000000..2dc9ab6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_virtual_quick.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeVirtualQuick" }
diff --git a/runtime/interpreter/mterp/x86/op_invoke_virtual_range.S b/runtime/interpreter/mterp/x86/op_invoke_virtual_range.S
new file mode 100644
index 0000000..d1d20d2
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_virtual_range.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeVirtualRange" }
diff --git a/runtime/interpreter/mterp/x86/op_invoke_virtual_range_quick.S b/runtime/interpreter/mterp/x86/op_invoke_virtual_range_quick.S
new file mode 100644
index 0000000..21bfc55
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_virtual_range_quick.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeVirtualQuickRange" }
diff --git a/runtime/interpreter/mterp/x86/op_iput.S b/runtime/interpreter/mterp/x86/op_iput.S
new file mode 100644
index 0000000..f8a6549
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput.S
@@ -0,0 +1,25 @@
+%default { "handler":"artSet32InstanceFromMterp" }
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern $handler
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $$4, %ecx                       # ecx<- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $$0xf, rINSTbl                  # rINST<- A
+    GET_VREG %eax, rINST
+    movl    %eax, OUT_ARG2(%esp)            # fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    $handler
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_boolean.S b/runtime/interpreter/mterp/x86/op_iput_boolean.S
new file mode 100644
index 0000000..11cab88
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_boolean.S
@@ -0,0 +1 @@
+%include "x86/op_iput.S" { "handler":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_boolean_quick.S b/runtime/interpreter/mterp/x86/op_iput_boolean_quick.S
new file mode 100644
index 0000000..93865de
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_boolean_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iput_quick.S" { "reg":"rINSTbl", "store":"movb" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_byte.S b/runtime/interpreter/mterp/x86/op_iput_byte.S
new file mode 100644
index 0000000..11cab88
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_byte.S
@@ -0,0 +1 @@
+%include "x86/op_iput.S" { "handler":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_byte_quick.S b/runtime/interpreter/mterp/x86/op_iput_byte_quick.S
new file mode 100644
index 0000000..93865de
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_byte_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iput_quick.S" { "reg":"rINSTbl", "store":"movb" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_char.S b/runtime/interpreter/mterp/x86/op_iput_char.S
new file mode 100644
index 0000000..abbf2bd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_char.S
@@ -0,0 +1 @@
+%include "x86/op_iput.S" { "handler":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_char_quick.S b/runtime/interpreter/mterp/x86/op_iput_char_quick.S
new file mode 100644
index 0000000..4ec8029
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_char_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iput_quick.S" { "reg":"rINSTw", "store":"movw" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_object.S b/runtime/interpreter/mterp/x86/op_iput_object.S
new file mode 100644
index 0000000..20d57aa
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_object.S
@@ -0,0 +1,13 @@
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    MterpIputObject
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_object_quick.S b/runtime/interpreter/mterp/x86/op_iput_object_quick.S
new file mode 100644
index 0000000..4c7f4bd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_object_quick.S
@@ -0,0 +1,11 @@
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_ARG2(%esp)
+    call    MterpIputObjectQuick
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_quick.S b/runtime/interpreter/mterp/x86/op_iput_quick.S
new file mode 100644
index 0000000..e2f7caf
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_quick.S
@@ -0,0 +1,13 @@
+%default { "reg":"rINST", "store":"movl" }
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST rINST                    # rINST <- v[A]
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    ${store}    ${reg}, (%ecx,%eax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_short.S b/runtime/interpreter/mterp/x86/op_iput_short.S
new file mode 100644
index 0000000..abbf2bd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_short.S
@@ -0,0 +1 @@
+%include "x86/op_iput.S" { "handler":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_short_quick.S b/runtime/interpreter/mterp/x86/op_iput_short_quick.S
new file mode 100644
index 0000000..4ec8029
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_short_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iput_quick.S" { "reg":"rINSTw", "store":"movw" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_wide.S b/runtime/interpreter/mterp/x86/op_iput_wide.S
new file mode 100644
index 0000000..92cb770
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_wide.S
@@ -0,0 +1,19 @@
+    /* iput-wide vA, vB, field@CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl,%ecx                    # ecx <- BA
+    sarl    $$4,%ecx                        # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $$0xf,rINSTbl                   # rINST <- A
+    leal    VREG_ADDRESS(rINST), %eax
+    movl    %eax, OUT_ARG2(%esp)            # &fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    artSet64InstanceFromMterp
+    testl   %eax, %eax
+    jnz     MterpPossibleException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_wide_quick.S b/runtime/interpreter/mterp/x86/op_iput_wide_quick.S
new file mode 100644
index 0000000..72285c5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_wide_quick.S
@@ -0,0 +1,12 @@
+    /* iput-wide-quick vA, vB, offset@CCCC */
+    movzbl    rINSTbl, %ecx                 # ecx<- BA
+    sarl      $$4, %ecx                     # ecx<- B
+    GET_VREG  %ecx %ecx                     # vB (object we're operating on)
+    testl     %ecx, %ecx                    # is object null?
+    je        common_errNullObject
+    movzwl    2(rPC), %eax                  # eax<- field byte offset
+    leal      (%ecx,%eax,1), %ecx           # ecx<- Address of 64-bit target
+    andb      $$0xf, rINSTbl                # rINST<- A
+    GET_WIDE_FP_VREG %xmm0 rINST            # xmm0<- fp[A]/fp[A+1]
+    movq      %xmm0, (%ecx)                 # obj.field<- r0/r1
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_long_to_double.S b/runtime/interpreter/mterp/x86/op_long_to_double.S
new file mode 100644
index 0000000..2c7f905
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_long_to_double.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"load":"fildll","store":"fstpl","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_long_to_float.S b/runtime/interpreter/mterp/x86/op_long_to_float.S
new file mode 100644
index 0000000..e500e39
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_long_to_float.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"load":"fildll","store":"fstps"}
diff --git a/runtime/interpreter/mterp/x86/op_long_to_int.S b/runtime/interpreter/mterp/x86/op_long_to_int.S
new file mode 100644
index 0000000..1c39b96
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_long_to_int.S
@@ -0,0 +1,2 @@
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+%include "x86/op_move.S"
diff --git a/runtime/interpreter/mterp/x86/op_monitor_enter.S b/runtime/interpreter/mterp/x86/op_monitor_enter.S
new file mode 100644
index 0000000..8236fb3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_monitor_enter.S
@@ -0,0 +1,14 @@
+/*
+ * Synchronize on an object.
+ */
+    /* monitor-enter vAA */
+    EXPORT_PC
+    GET_VREG %ecx rINST
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    artLockObjectFromCode           # (object, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_monitor_exit.S b/runtime/interpreter/mterp/x86/op_monitor_exit.S
new file mode 100644
index 0000000..56d4eb3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_monitor_exit.S
@@ -0,0 +1,18 @@
+/*
+ * Unlock an object.
+ *
+ * Exceptions that occur when unlocking a monitor need to appear as
+ * if they happened at the following instruction.  See the Dalvik
+ * instruction spec.
+ */
+    /* monitor-exit vAA */
+    EXPORT_PC
+    GET_VREG %ecx rINST
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    artUnlockObjectFromCode         # (object, self)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_move.S b/runtime/interpreter/mterp/x86/op_move.S
new file mode 100644
index 0000000..0a531be
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move.S
@@ -0,0 +1,13 @@
+%default { "is_object":"0" }
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- BA
+    andb    $$0xf, %al                      # eax <- A
+    shrl    $$4, rINST                      # rINST <- B
+    GET_VREG rINST rINST
+    .if $is_object
+    SET_VREG_OBJECT rINST %eax              # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST %eax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_move_16.S b/runtime/interpreter/mterp/x86/op_move_16.S
new file mode 100644
index 0000000..0773f41
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_16.S
@@ -0,0 +1,12 @@
+%default { "is_object":"0" }
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    movzwl  4(rPC), %ecx                    # ecx <- BBBB
+    movzwl  2(rPC), %eax                    # eax <- AAAA
+    GET_VREG rINST %ecx
+    .if $is_object
+    SET_VREG_OBJECT rINST %eax              # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST %eax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_move_exception.S b/runtime/interpreter/mterp/x86/op_move_exception.S
new file mode 100644
index 0000000..e37cdfa
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_exception.S
@@ -0,0 +1,6 @@
+    /* move-exception vAA */
+    movl    rSELF, %ecx
+    movl    THREAD_EXCEPTION_OFFSET(%ecx), %eax
+    SET_VREG_OBJECT %eax rINST              # fp[AA] <- exception object
+    movl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_move_from16.S b/runtime/interpreter/mterp/x86/op_move_from16.S
new file mode 100644
index 0000000..623a4d3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_from16.S
@@ -0,0 +1,12 @@
+%default { "is_object":"0" }
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    movzx   rINSTbl, %eax                   # eax <- AA
+    movw    2(rPC), rINSTw                  # rINSTw <- BBBB
+    GET_VREG rINST rINST                    # rINST <- fp[BBBB]
+    .if $is_object
+    SET_VREG_OBJECT rINST %eax              # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST %eax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_move_object.S b/runtime/interpreter/mterp/x86/op_move_object.S
new file mode 100644
index 0000000..a6a7c90
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_object.S
@@ -0,0 +1 @@
+%include "x86/op_move.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_move_object_16.S b/runtime/interpreter/mterp/x86/op_move_object_16.S
new file mode 100644
index 0000000..e0c8527
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_object_16.S
@@ -0,0 +1 @@
+%include "x86/op_move_16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_move_object_from16.S b/runtime/interpreter/mterp/x86/op_move_object_from16.S
new file mode 100644
index 0000000..e623820
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_object_from16.S
@@ -0,0 +1 @@
+%include "x86/op_move_from16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_move_result.S b/runtime/interpreter/mterp/x86/op_move_result.S
new file mode 100644
index 0000000..414f2cb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_result.S
@@ -0,0 +1,11 @@
+%default { "is_object":"0" }
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    movl    OFF_FP_RESULT_REGISTER(rFP), %eax    # get pointer to result JType.
+    movl    (%eax), %eax                    # r0 <- result.i.
+    .if $is_object
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- fp[B]
+    .else
+    SET_VREG %eax rINST                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_move_result_object.S b/runtime/interpreter/mterp/x86/op_move_result_object.S
new file mode 100644
index 0000000..cbf5e1d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_result_object.S
@@ -0,0 +1 @@
+%include "x86/op_move_result.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_move_result_wide.S b/runtime/interpreter/mterp/x86/op_move_result_wide.S
new file mode 100644
index 0000000..0c1683b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_result_wide.S
@@ -0,0 +1,7 @@
+    /* move-result-wide vAA */
+    movl    OFF_FP_RESULT_REGISTER(rFP), %eax    # get pointer to result JType.
+    movl    4(%eax), %ecx                   # Get high
+    movl    (%eax), %eax                    # Get low
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    SET_VREG_HIGH %ecx rINST                # v[AA+1] <- ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_move_wide.S b/runtime/interpreter/mterp/x86/op_move_wide.S
new file mode 100644
index 0000000..9c0e985
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_wide.S
@@ -0,0 +1,8 @@
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, rINST                      # rINST <- B
+    andb    $$0xf, %cl                      # ecx <- A
+    GET_WIDE_FP_VREG %xmm0 rINST            # xmm0 <- v[B]
+    SET_WIDE_FP_VREG %xmm0 %ecx             # v[A] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_move_wide_16.S b/runtime/interpreter/mterp/x86/op_move_wide_16.S
new file mode 100644
index 0000000..7522c27
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_wide_16.S
@@ -0,0 +1,7 @@
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzwl  4(rPC), %ecx                    # ecx<- BBBB
+    movzwl  2(rPC), %eax                    # eax<- AAAA
+    GET_WIDE_FP_VREG %xmm0 %ecx             # xmm0 <- v[B]
+    SET_WIDE_FP_VREG %xmm0 %eax             # v[A] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_move_wide_from16.S b/runtime/interpreter/mterp/x86/op_move_wide_from16.S
new file mode 100644
index 0000000..5ad2cb4
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_wide_from16.S
@@ -0,0 +1,7 @@
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzwl  2(rPC), %ecx                    # ecx <- BBBB
+    movzbl  rINSTbl, %eax                   # eax <- AAAA
+    GET_WIDE_FP_VREG %xmm0 %ecx             # xmm0 <- v[B]
+    SET_WIDE_FP_VREG %xmm0 %eax             # v[A] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_mul_double.S b/runtime/interpreter/mterp/x86/op_mul_double.S
new file mode 100644
index 0000000..7cef4c0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_double.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"muls","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_mul_double_2addr.S b/runtime/interpreter/mterp/x86/op_mul_double_2addr.S
new file mode 100644
index 0000000..bb722b6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_double_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"muls","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_mul_float.S b/runtime/interpreter/mterp/x86/op_mul_float.S
new file mode 100644
index 0000000..1156230
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_float.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"muls","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_mul_float_2addr.S b/runtime/interpreter/mterp/x86/op_mul_float_2addr.S
new file mode 100644
index 0000000..e9316df
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_float_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"muls","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_mul_int.S b/runtime/interpreter/mterp/x86/op_mul_int.S
new file mode 100644
index 0000000..a367ab7
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_int.S
@@ -0,0 +1,12 @@
+    /*
+     * 32-bit binary multiplication.
+     */
+    /* mul vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax %eax                      # eax <- vBB
+    mov     rIBASE, LOCAL0(%esp)
+    imull   (rFP,%ecx,4), %eax              # trashes rIBASE/edx
+    mov     LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_mul_int_2addr.S b/runtime/interpreter/mterp/x86/op_mul_int_2addr.S
new file mode 100644
index 0000000..6005075
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_int_2addr.S
@@ -0,0 +1,10 @@
+    /* mul vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $$0xf, %cl                      # ecx <- A
+    mov     rIBASE, LOCAL0(%esp)
+    imull   (rFP,%ecx,4), %eax              # trashes rIBASE/edx
+    mov     LOCAL0(%esp), rIBASE
+    SET_VREG %eax %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_mul_int_lit16.S b/runtime/interpreter/mterp/x86/op_mul_int_lit16.S
new file mode 100644
index 0000000..1c0fde3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_int_lit16.S
@@ -0,0 +1,12 @@
+    /* mul/lit16 vA, vB, #+CCCC */
+    /* Need A in rINST, ssssCCCC in ecx, vB in eax */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $$4, %eax                       # eax <- B
+    GET_VREG %eax %eax                      # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    mov     rIBASE, LOCAL0(%esp)
+    imull   %ecx, %eax                      # trashes rIBASE/edx
+    mov     LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_mul_int_lit8.S b/runtime/interpreter/mterp/x86/op_mul_int_lit8.S
new file mode 100644
index 0000000..4d7a22d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_int_lit8.S
@@ -0,0 +1,9 @@
+    /* mul/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG  %eax  %eax                    # eax <- rBB
+    mov     rIBASE, LOCAL0(%esp)
+    imull   %ecx, %eax                      # trashes rIBASE/edx
+    mov     LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_mul_long.S b/runtime/interpreter/mterp/x86/op_mul_long.S
new file mode 100644
index 0000000..3746e41
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_long.S
@@ -0,0 +1,33 @@
+/*
+ * Signed 64-bit integer multiply.
+ *
+ * We could definately use more free registers for
+ * this code.   We spill rINSTw (ebx),
+ * giving us eax, ebc, ecx and edx as computational
+ * temps.  On top of that, we'll spill edi (rFP)
+ * for use as the vB pointer and esi (rPC) for use
+ * as the vC pointer.  Yuck.
+ *
+ */
+    /* mul-long vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- B
+    movzbl  3(rPC), %ecx                    # ecx <- C
+    mov     rPC, LOCAL0(%esp)               # save Interpreter PC
+    mov     rFP, LOCAL1(%esp)               # save FP
+    mov     rIBASE, LOCAL2(%esp)            # save rIBASE
+    leal    (rFP,%eax,4), %esi              # esi <- &v[B]
+    leal    (rFP,%ecx,4), rFP               # rFP <- &v[C]
+    movl    4(%esi), %ecx                   # ecx <- Bmsw
+    imull   (rFP), %ecx                     # ecx <- (Bmsw*Clsw)
+    movl    4(rFP), %eax                    # eax <- Cmsw
+    imull   (%esi), %eax                    # eax <- (Cmsw*Blsw)
+    addl    %eax, %ecx                      # ecx <- (Bmsw*Clsw)+(Cmsw*Blsw)
+    movl    (rFP), %eax                     # eax <- Clsw
+    mull    (%esi)                          # eax <- (Clsw*Alsw)
+    mov     LOCAL0(%esp), rPC               # restore Interpreter PC
+    mov     LOCAL1(%esp), rFP               # restore FP
+    leal    (%ecx,rIBASE), rIBASE           # full result now in rIBASE:%eax
+    SET_VREG_HIGH rIBASE rINST              # v[B+1] <- rIBASE
+    mov     LOCAL2(%esp), rIBASE            # restore IBASE
+    SET_VREG %eax rINST                     # v[B] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_mul_long_2addr.S b/runtime/interpreter/mterp/x86/op_mul_long_2addr.S
new file mode 100644
index 0000000..565a57c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_long_2addr.S
@@ -0,0 +1,35 @@
+/*
+ * Signed 64-bit integer multiply, 2-addr version
+ *
+ * We could definately use more free registers for
+ * this code.  We must spill %edx (rIBASE) because it
+ * is used by imul.  We'll also spill rINST (ebx),
+ * giving us eax, ebc, ecx and rIBASE as computational
+ * temps.  On top of that, we'll spill %esi (edi)
+ * for use as the vA pointer and rFP (esi) for use
+ * as the vB pointer.  Yuck.
+ */
+    /* mul-long/2addr vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- BA
+    andb    $$0xf, %al                      # eax <- A
+    CLEAR_WIDE_REF %eax                     # clear refs in advance
+    sarl    $$4, rINST                      # rINST <- B
+    mov     rPC, LOCAL0(%esp)               # save Interpreter PC
+    mov     rFP, LOCAL1(%esp)               # save FP
+    mov     rIBASE, LOCAL2(%esp)            # save rIBASE
+    leal    (rFP,%eax,4), %esi              # esi <- &v[A]
+    leal    (rFP,rINST,4), rFP              # rFP <- &v[B]
+    movl    4(%esi), %ecx                   # ecx <- Amsw
+    imull   (rFP), %ecx                     # ecx <- (Amsw*Blsw)
+    movl    4(rFP), %eax                    # eax <- Bmsw
+    imull   (%esi), %eax                    # eax <- (Bmsw*Alsw)
+    addl    %eax, %ecx                      # ecx <- (Amsw*Blsw)+(Bmsw*Alsw)
+    movl    (rFP), %eax                     # eax <- Blsw
+    mull    (%esi)                          # eax <- (Blsw*Alsw)
+    leal    (%ecx,rIBASE), rIBASE           # full result now in %edx:%eax
+    movl    rIBASE, 4(%esi)                 # v[A+1] <- rIBASE
+    movl    %eax, (%esi)                    # v[A] <- %eax
+    mov     LOCAL0(%esp), rPC               # restore Interpreter PC
+    mov     LOCAL2(%esp), rIBASE            # restore IBASE
+    mov     LOCAL1(%esp), rFP               # restore FP
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_neg_double.S b/runtime/interpreter/mterp/x86/op_neg_double.S
new file mode 100644
index 0000000..fac4322
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_neg_double.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"instr":"fchs","load":"fldl","store":"fstpl","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_neg_float.S b/runtime/interpreter/mterp/x86/op_neg_float.S
new file mode 100644
index 0000000..30f071b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_neg_float.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"instr":"fchs","load":"flds","store":"fstps"}
diff --git a/runtime/interpreter/mterp/x86/op_neg_int.S b/runtime/interpreter/mterp/x86/op_neg_int.S
new file mode 100644
index 0000000..67d4d18
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_neg_int.S
@@ -0,0 +1 @@
+%include "x86/unop.S" {"instr":"negl    %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_neg_long.S b/runtime/interpreter/mterp/x86/op_neg_long.S
new file mode 100644
index 0000000..7cc17f0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_neg_long.S
@@ -0,0 +1,13 @@
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax %ecx                      # eax <- v[B+0]
+    GET_VREG_HIGH %ecx %ecx                 # ecx <- v[B+1]
+    negl    %eax
+    adcl    $$0, %ecx
+    negl    %ecx
+    SET_VREG %eax rINST                     # v[A+0] <- eax
+    SET_VREG_HIGH %ecx rINST                # v[A+1] <- ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
diff --git a/runtime/interpreter/mterp/x86/op_new_array.S b/runtime/interpreter/mterp/x86/op_new_array.S
new file mode 100644
index 0000000..6852183
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_new_array.S
@@ -0,0 +1,21 @@
+/*
+ * Allocate an array of objects, specified with the array class
+ * and a count.
+ *
+ * The verifier guarantees that this is an array class, so we don't
+ * check for it here.
+ */
+    /* new-array vA, vB, class@CCCC */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    MterpNewArray
+    REFRESH_IBASE
+    testl   %eax, %eax                      # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_new_instance.S b/runtime/interpreter/mterp/x86/op_new_instance.S
new file mode 100644
index 0000000..a3632e8
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_new_instance.S
@@ -0,0 +1,16 @@
+/*
+ * Create a new instance of a class.
+ */
+    /* new-instance vAA, class@BBBB */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_ARG2(%esp)
+    call    MterpNewInstance
+    REFRESH_IBASE
+    testl   %eax, %eax                 # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_nop.S b/runtime/interpreter/mterp/x86/op_nop.S
new file mode 100644
index 0000000..4cb68e3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_nop.S
@@ -0,0 +1 @@
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_not_int.S b/runtime/interpreter/mterp/x86/op_not_int.S
new file mode 100644
index 0000000..335ab09
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_not_int.S
@@ -0,0 +1 @@
+%include "x86/unop.S" {"instr":"notl %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_not_long.S b/runtime/interpreter/mterp/x86/op_not_long.S
new file mode 100644
index 0000000..55666a1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_not_long.S
@@ -0,0 +1,11 @@
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax %ecx                      # eax <- v[B+0]
+    GET_VREG_HIGH %ecx %ecx                 # ecx <- v[B+1]
+    notl    %eax
+    notl    %ecx
+    SET_VREG %eax rINST                     # v[A+0] <- eax
+    SET_VREG_HIGH %ecx rINST                # v[A+1] <- ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_or_int.S b/runtime/interpreter/mterp/x86/op_or_int.S
new file mode 100644
index 0000000..ebe2ec2
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_or_int.S
@@ -0,0 +1 @@
+%include "x86/binop.S" {"instr":"orl     (rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_or_int_2addr.S b/runtime/interpreter/mterp/x86/op_or_int_2addr.S
new file mode 100644
index 0000000..36c17db
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_or_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/binop2addr.S" {"instr":"orl     %eax, (rFP,%ecx,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_or_int_lit16.S b/runtime/interpreter/mterp/x86/op_or_int_lit16.S
new file mode 100644
index 0000000..0a88ff59
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_or_int_lit16.S
@@ -0,0 +1 @@
+%include "x86/binopLit16.S" {"instr":"orl     %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_or_int_lit8.S b/runtime/interpreter/mterp/x86/op_or_int_lit8.S
new file mode 100644
index 0000000..0670b67
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_or_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"orl     %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_or_long.S b/runtime/interpreter/mterp/x86/op_or_long.S
new file mode 100644
index 0000000..09ca539
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_or_long.S
@@ -0,0 +1 @@
+%include "x86/binopWide.S" {"instr1":"orl     (rFP,%ecx,4), rIBASE", "instr2":"orl     4(rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_or_long_2addr.S b/runtime/interpreter/mterp/x86/op_or_long_2addr.S
new file mode 100644
index 0000000..2062e81
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_or_long_2addr.S
@@ -0,0 +1 @@
+%include "x86/binopWide2addr.S" {"instr1":"orl     %eax, (rFP,rINST,4)","instr2":"orl     %ecx, 4(rFP,rINST,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_packed_switch.S b/runtime/interpreter/mterp/x86/op_packed_switch.S
new file mode 100644
index 0000000..4e39a48
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_packed_switch.S
@@ -0,0 +1,29 @@
+%default { "func":"MterpDoPackedSwitch" }
+/*
+ * Handle a packed-switch or sparse-switch instruction.  In both cases
+ * we decode it and hand it off to a helper function.
+ *
+ * We don't really expect backward branches in a switch statement, but
+ * they're perfectly legal, so we check for them here.
+ *
+ * for: packed-switch, sparse-switch
+ */
+    /* op vAA, +BBBB */
+    movl    2(rPC), %ecx                    # ecx <- BBBBbbbb
+    GET_VREG %eax rINST                     # eax <- vAA
+    leal    (rPC,%ecx,2), %ecx              # ecx <- PC + BBBBbbbb*2
+    movl    %eax, OUT_ARG1(%esp)            # ARG1 <- vAA
+    movl    %ecx, OUT_ARG0(%esp)            # ARG0 <- switchData
+    call    $func
+    addl    %eax, %eax
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    REFRESH_IBASE
+    jg      1f
+#if MTERP_SUSPEND
+    #     REFRESH_IBASE - we did it above.
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+1:
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86/op_rem_double.S b/runtime/interpreter/mterp/x86/op_rem_double.S
new file mode 100644
index 0000000..4b52a06
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_double.S
@@ -0,0 +1,14 @@
+    /* rem_double vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx <- BB
+    movzbl  2(rPC), %eax                    # eax <- CC
+    fldl    VREG_ADDRESS(%ecx)              # %st1 <- fp[vBB]
+    fldl    VREG_ADDRESS(%eax)              # %st0 <- fp[vCC]
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstpl   VREG_ADDRESS(rINST)             # fp[vAA] <- %st
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_rem_double_2addr.S b/runtime/interpreter/mterp/x86/op_rem_double_2addr.S
new file mode 100644
index 0000000..5a0e669
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_double_2addr.S
@@ -0,0 +1,15 @@
+    /* rem_double/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    fldl    VREG_ADDRESS(rINST)             # vB to fp stack
+    andb    $$0xf, %cl                      # ecx <- A
+    fldl    VREG_ADDRESS(%ecx)              # vA to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstpl   VREG_ADDRESS(%ecx)              # %st to vA
+    CLEAR_WIDE_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_rem_float.S b/runtime/interpreter/mterp/x86/op_rem_float.S
new file mode 100644
index 0000000..05e0bf1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_float.S
@@ -0,0 +1,14 @@
+    /* rem_float vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx <- BB
+    movzbl  2(rPC), %eax                    # eax <- CC
+    flds    VREG_ADDRESS(%ecx)              # vBB to fp stack
+    flds    VREG_ADDRESS(%eax)              # vCC to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstps   VREG_ADDRESS(rINST)             # %st to vAA
+    CLEAR_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_rem_float_2addr.S b/runtime/interpreter/mterp/x86/op_rem_float_2addr.S
new file mode 100644
index 0000000..29f84e6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_float_2addr.S
@@ -0,0 +1,15 @@
+    /* rem_float/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    flds    VREG_ADDRESS(rINST)             # vB to fp stack
+    andb    $$0xf, %cl                      # ecx <- A
+    flds    VREG_ADDRESS(%ecx)              # vA to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstps   VREG_ADDRESS(%ecx)              # %st to vA
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_rem_int.S b/runtime/interpreter/mterp/x86/op_rem_int.S
new file mode 100644
index 0000000..d25b93c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_int.S
@@ -0,0 +1 @@
+%include "x86/bindiv.S" {"result":"rIBASE","special":"$0","rem":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_rem_int_2addr.S b/runtime/interpreter/mterp/x86/op_rem_int_2addr.S
new file mode 100644
index 0000000..c788e0e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/bindiv2addr.S" {"result":"rIBASE","special":"$0"}
diff --git a/runtime/interpreter/mterp/x86/op_rem_int_lit16.S b/runtime/interpreter/mterp/x86/op_rem_int_lit16.S
new file mode 100644
index 0000000..3df9d39
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_int_lit16.S
@@ -0,0 +1 @@
+%include "x86/bindivLit16.S" {"result":"rIBASE","special":"$0"}
diff --git a/runtime/interpreter/mterp/x86/op_rem_int_lit8.S b/runtime/interpreter/mterp/x86/op_rem_int_lit8.S
new file mode 100644
index 0000000..56e19c6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/bindivLit8.S" {"result":"rIBASE","special":"$0"}
diff --git a/runtime/interpreter/mterp/x86/op_rem_long.S b/runtime/interpreter/mterp/x86/op_rem_long.S
new file mode 100644
index 0000000..0ffe1f6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_long.S
@@ -0,0 +1 @@
+%include "x86/op_div_long.S" {"routine":"art_quick_lmod"}
diff --git a/runtime/interpreter/mterp/x86/op_rem_long_2addr.S b/runtime/interpreter/mterp/x86/op_rem_long_2addr.S
new file mode 100644
index 0000000..4b97735
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_long_2addr.S
@@ -0,0 +1 @@
+%include "x86/op_div_long_2addr.S" {"routine":"art_quick_lmod"}
diff --git a/runtime/interpreter/mterp/x86/op_return.S b/runtime/interpreter/mterp/x86/op_return.S
new file mode 100644
index 0000000..183b3bf
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_return.S
@@ -0,0 +1,17 @@
+/*
+ * Return a 32-bit value.
+ *
+ * for: return, return-object
+ */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    call    MterpThreadFenceForConstructor
+    movl    rSELF, %eax
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    MterpSuspendCheck
+1:
+    GET_VREG %eax rINST                     # eax <- vAA
+    xorl    %ecx, %ecx
+    jmp     MterpReturn
diff --git a/runtime/interpreter/mterp/x86/op_return_object.S b/runtime/interpreter/mterp/x86/op_return_object.S
new file mode 100644
index 0000000..12c84b3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_return_object.S
@@ -0,0 +1 @@
+%include "x86/op_return.S"
diff --git a/runtime/interpreter/mterp/x86/op_return_void.S b/runtime/interpreter/mterp/x86/op_return_void.S
new file mode 100644
index 0000000..f3e24c7
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_return_void.S
@@ -0,0 +1,11 @@
+    .extern MterpThreadFenceForConstructor
+    call    MterpThreadFenceForConstructor
+    movl    rSELF, %eax
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    MterpSuspendCheck
+1:
+    xorl    %eax, %eax
+    xorl    %ecx, %ecx
+    jmp     MterpReturn
diff --git a/runtime/interpreter/mterp/x86/op_return_void_no_barrier.S b/runtime/interpreter/mterp/x86/op_return_void_no_barrier.S
new file mode 100644
index 0000000..add4e20
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_return_void_no_barrier.S
@@ -0,0 +1,9 @@
+    movl    rSELF, %eax
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    MterpSuspendCheck
+1:
+    xorl    %eax, %eax
+    xorl    %ecx, %ecx
+    jmp     MterpReturn
diff --git a/runtime/interpreter/mterp/x86/op_return_wide.S b/runtime/interpreter/mterp/x86/op_return_wide.S
new file mode 100644
index 0000000..34a3380
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_return_wide.S
@@ -0,0 +1,15 @@
+/*
+ * Return a 64-bit value.
+ */
+    /* return-wide vAA */
+    .extern MterpThreadFenceForConstructor
+    call    MterpThreadFenceForConstructor
+    movl    rSELF, %eax
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    MterpSuspendCheck
+1:
+    GET_VREG %eax rINST                     # eax <- v[AA+0]
+    GET_VREG_HIGH %ecx rINST                # ecx <- v[AA+1]
+    jmp     MterpReturn
diff --git a/runtime/interpreter/mterp/x86/op_rsub_int.S b/runtime/interpreter/mterp/x86/op_rsub_int.S
new file mode 100644
index 0000000..d6449c6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rsub_int.S
@@ -0,0 +1,2 @@
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+%include "x86/binopLit16.S" {"instr":"subl    %eax, %ecx","result":"%ecx"}
diff --git a/runtime/interpreter/mterp/x86/op_rsub_int_lit8.S b/runtime/interpreter/mterp/x86/op_rsub_int_lit8.S
new file mode 100644
index 0000000..15d0e35
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rsub_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"subl    %eax, %ecx" , "result":"%ecx"}
diff --git a/runtime/interpreter/mterp/x86/op_sget.S b/runtime/interpreter/mterp/x86/op_sget.S
new file mode 100644
index 0000000..ed5aedf
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sget.S
@@ -0,0 +1,26 @@
+%default { "is_object":"0", "helper":"artGet32StaticFromCode" }
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+ */
+    /* op vAA, field@BBBB */
+    .extern $helper
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    $helper
+    movl    rSELF, %ecx
+    REFRESH_IBASE_FROM_SELF %ecx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    .if $is_object
+    SET_VREG_OBJECT %eax rINST              # fp[A] <- value
+    .else
+    SET_VREG %eax rINST                     # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sget_boolean.S b/runtime/interpreter/mterp/x86/op_sget_boolean.S
new file mode 100644
index 0000000..f058dd8
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sget_boolean.S
@@ -0,0 +1 @@
+%include "x86/op_sget.S" {"helper":"artGetBooleanStaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_byte.S b/runtime/interpreter/mterp/x86/op_sget_byte.S
new file mode 100644
index 0000000..c952f40
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sget_byte.S
@@ -0,0 +1 @@
+%include "x86/op_sget.S" {"helper":"artGetByteStaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_char.S b/runtime/interpreter/mterp/x86/op_sget_char.S
new file mode 100644
index 0000000..d7bd410
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sget_char.S
@@ -0,0 +1 @@
+%include "x86/op_sget.S" {"helper":"artGetCharStaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_object.S b/runtime/interpreter/mterp/x86/op_sget_object.S
new file mode 100644
index 0000000..1c95f9a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sget_object.S
@@ -0,0 +1 @@
+%include "x86/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_short.S b/runtime/interpreter/mterp/x86/op_sget_short.S
new file mode 100644
index 0000000..6475306
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sget_short.S
@@ -0,0 +1 @@
+%include "x86/op_sget.S" {"helper":"artGetShortStaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_wide.S b/runtime/interpreter/mterp/x86/op_sget_wide.S
new file mode 100644
index 0000000..76b993b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sget_wide.S
@@ -0,0 +1,21 @@
+/*
+ * SGET_WIDE handler wrapper.
+ *
+ */
+    /* sget-wide vAA, field@BBBB */
+    .extern artGet64StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    artGet64StaticFromCode
+    movl    rSELF, %ecx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    SET_VREG %eax rINST                     # fp[A]<- low part
+    SET_VREG_HIGH %edx rINST                # fp[A+1]<- high part
+    REFRESH_IBASE_FROM_SELF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_shl_int.S b/runtime/interpreter/mterp/x86/op_shl_int.S
new file mode 100644
index 0000000..6a41d1c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shl_int.S
@@ -0,0 +1 @@
+%include "x86/binop1.S" {"instr":"sall    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_shl_int_2addr.S b/runtime/interpreter/mterp/x86/op_shl_int_2addr.S
new file mode 100644
index 0000000..72abb8e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shl_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/shop2addr.S" {"instr":"sall    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_shl_int_lit8.S b/runtime/interpreter/mterp/x86/op_shl_int_lit8.S
new file mode 100644
index 0000000..b8d6069
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shl_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"sall    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_shl_long.S b/runtime/interpreter/mterp/x86/op_shl_long.S
new file mode 100644
index 0000000..56d13e3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shl_long.S
@@ -0,0 +1,29 @@
+/*
+ * Long integer shift.  This is different from the generic 32/64-bit
+ * binary operations because vAA/vBB are 64-bit but vCC (the shift
+ * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+ * 6 bits of the shift distance.  x86 shifts automatically mask off
+ * the low 5 bits of %cl, so have to handle the 64 > shiftcount > 31
+ * case specially.
+ */
+    /* shl-long vAA, vBB, vCC */
+    /* ecx gets shift count */
+    /* Need to spill rINST */
+    /* rINSTw gets AA */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE %eax               # ecx <- v[BB+1]
+    GET_VREG %ecx %ecx                      # ecx <- vCC
+    GET_VREG %eax %eax                      # eax <- v[BB+0]
+    shldl   %eax,rIBASE
+    sall    %cl, %eax
+    testb   $$32, %cl
+    je      2f
+    movl    %eax, rIBASE
+    xorl    %eax, %eax
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[AA+0] <- %eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_shl_long_2addr.S b/runtime/interpreter/mterp/x86/op_shl_long_2addr.S
new file mode 100644
index 0000000..5da873f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shl_long_2addr.S
@@ -0,0 +1,26 @@
+/*
+ * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+ * 32-bit shift distance.
+ */
+    /* shl-long/2addr vA, vB */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- v[AA+0]
+    sarl    $$4, %ecx                       # ecx <- B
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE rINST              # rIBASE <- v[AA+1]
+    GET_VREG %ecx %ecx                      # ecx <- vBB
+    shldl   %eax, rIBASE
+    sall    %cl, %eax
+    testb   $$32, %cl
+    je      2f
+    movl    %eax, rIBASE
+    xorl    %eax, %eax
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_shr_int.S b/runtime/interpreter/mterp/x86/op_shr_int.S
new file mode 100644
index 0000000..687b2c3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shr_int.S
@@ -0,0 +1 @@
+%include "x86/binop1.S" {"instr":"sarl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_shr_int_2addr.S b/runtime/interpreter/mterp/x86/op_shr_int_2addr.S
new file mode 100644
index 0000000..533b0e9
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shr_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/shop2addr.S" {"instr":"sarl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_shr_int_lit8.S b/runtime/interpreter/mterp/x86/op_shr_int_lit8.S
new file mode 100644
index 0000000..ebd1bea
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shr_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"sarl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_shr_long.S b/runtime/interpreter/mterp/x86/op_shr_long.S
new file mode 100644
index 0000000..4490a9a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shr_long.S
@@ -0,0 +1,29 @@
+/*
+ * Long integer shift.  This is different from the generic 32/64-bit
+ * binary operations because vAA/vBB are 64-bit but vCC (the shift
+ * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+ * 6 bits of the shift distance.  x86 shifts automatically mask off
+ * the low 5 bits of %cl, so have to handle the 64 > shiftcount > 31
+ * case specially.
+ */
+    /* shr-long vAA, vBB, vCC */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE %eax               # rIBASE<- v[BB+1]
+    GET_VREG %ecx %ecx                      # ecx <- vCC
+    GET_VREG %eax %eax                      # eax <- v[BB+0]
+    shrdl   rIBASE, %eax
+    sarl    %cl, rIBASE
+    testb   $$32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    sarl    $$31, rIBASE
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_shr_long_2addr.S b/runtime/interpreter/mterp/x86/op_shr_long_2addr.S
new file mode 100644
index 0000000..57494f9
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shr_long_2addr.S
@@ -0,0 +1,26 @@
+/*
+ * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+ * 32-bit shift distance.
+ */
+    /* shl-long/2addr vA, vB */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- v[AA+0]
+    sarl    $$4, %ecx                       # ecx <- B
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE rINST              # rIBASE <- v[AA+1]
+    GET_VREG %ecx %ecx                      # ecx <- vBB
+    shrdl   rIBASE, %eax
+    sarl    %cl, rIBASE
+    testb   $$32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    sarl    $$31, rIBASE
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_sparse_switch.S b/runtime/interpreter/mterp/x86/op_sparse_switch.S
new file mode 100644
index 0000000..fdaec47
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sparse_switch.S
@@ -0,0 +1 @@
+%include "x86/op_packed_switch.S" { "func":"MterpDoSparseSwitch" }
diff --git a/runtime/interpreter/mterp/x86/op_sput.S b/runtime/interpreter/mterp/x86/op_sput.S
new file mode 100644
index 0000000..04a8f23
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sput.S
@@ -0,0 +1,22 @@
+%default { "helper":"artSet32StaticFromCode"}
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern $helper
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    GET_VREG rINST rINST
+    movl    rINST, OUT_ARG1(%esp)           # fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    $helper
+    testl   %eax, %eax
+    jnz     MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sput_boolean.S b/runtime/interpreter/mterp/x86/op_sput_boolean.S
new file mode 100644
index 0000000..63601bd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sput_boolean.S
@@ -0,0 +1 @@
+%include "x86/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sput_byte.S b/runtime/interpreter/mterp/x86/op_sput_byte.S
new file mode 100644
index 0000000..63601bd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sput_byte.S
@@ -0,0 +1 @@
+%include "x86/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sput_char.S b/runtime/interpreter/mterp/x86/op_sput_char.S
new file mode 100644
index 0000000..1749f7c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sput_char.S
@@ -0,0 +1 @@
+%include "x86/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sput_object.S b/runtime/interpreter/mterp/x86/op_sput_object.S
new file mode 100644
index 0000000..0480e00
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sput_object.S
@@ -0,0 +1,13 @@
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    MterpSputObject
+    testl   %eax, %eax
+    jz      MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sput_short.S b/runtime/interpreter/mterp/x86/op_sput_short.S
new file mode 100644
index 0000000..1749f7c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sput_short.S
@@ -0,0 +1 @@
+%include "x86/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sput_wide.S b/runtime/interpreter/mterp/x86/op_sput_wide.S
new file mode 100644
index 0000000..d58d5af
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sput_wide.S
@@ -0,0 +1,20 @@
+/*
+ * SPUT_WIDE handler wrapper.
+ *
+ */
+    /* sput-wide vAA, field@BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    leal    VREG_ADDRESS(rINST), %eax
+    movl    %eax, OUT_ARG2(%esp)            # &fp[AA]
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    artSet64IndirectStaticFromMterp
+    testl   %eax, %eax
+    jnz     MterpException
+    REFRESH_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sub_double.S b/runtime/interpreter/mterp/x86/op_sub_double.S
new file mode 100644
index 0000000..e83afeb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_double.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"subs","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_sub_double_2addr.S b/runtime/interpreter/mterp/x86/op_sub_double_2addr.S
new file mode 100644
index 0000000..af9a2ab
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_double_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"subs","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_sub_float.S b/runtime/interpreter/mterp/x86/op_sub_float.S
new file mode 100644
index 0000000..423d834
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_float.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"subs","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_sub_float_2addr.S b/runtime/interpreter/mterp/x86/op_sub_float_2addr.S
new file mode 100644
index 0000000..18de000
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_float_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"subs","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_sub_int.S b/runtime/interpreter/mterp/x86/op_sub_int.S
new file mode 100644
index 0000000..7fe03fb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_int.S
@@ -0,0 +1 @@
+%include "x86/binop.S" {"instr":"subl    (rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_sub_int_2addr.S b/runtime/interpreter/mterp/x86/op_sub_int_2addr.S
new file mode 100644
index 0000000..cc9bf60
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/binop2addr.S" {"instr":"subl    %eax, (rFP,%ecx,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_sub_long.S b/runtime/interpreter/mterp/x86/op_sub_long.S
new file mode 100644
index 0000000..014591e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_long.S
@@ -0,0 +1 @@
+%include "x86/binopWide.S" {"instr1":"subl    (rFP,%ecx,4), rIBASE", "instr2":"sbbl    4(rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_sub_long_2addr.S b/runtime/interpreter/mterp/x86/op_sub_long_2addr.S
new file mode 100644
index 0000000..7498029
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_long_2addr.S
@@ -0,0 +1 @@
+%include "x86/binopWide2addr.S" {"instr1":"subl    %eax, (rFP,rINST,4)","instr2":"sbbl    %ecx, 4(rFP,rINST,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_throw.S b/runtime/interpreter/mterp/x86/op_throw.S
new file mode 100644
index 0000000..15b20b5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_throw.S
@@ -0,0 +1,11 @@
+/*
+ * Throw an exception object in the current thread.
+ */
+    /* throw vAA */
+    EXPORT_PC
+    GET_VREG %eax rINST                     # eax<- vAA (exception object)
+    testl   %eax, %eax
+    jz      common_errNullObject
+    movl    rSELF,%ecx
+    movl    %eax, THREAD_EXCEPTION_OFFSET(%ecx)
+    jmp     MterpException
diff --git a/runtime/interpreter/mterp/x86/op_unused_3e.S b/runtime/interpreter/mterp/x86/op_unused_3e.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_3e.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_3f.S b/runtime/interpreter/mterp/x86/op_unused_3f.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_3f.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_40.S b/runtime/interpreter/mterp/x86/op_unused_40.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_40.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_41.S b/runtime/interpreter/mterp/x86/op_unused_41.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_41.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_42.S b/runtime/interpreter/mterp/x86/op_unused_42.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_42.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_43.S b/runtime/interpreter/mterp/x86/op_unused_43.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_43.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_79.S b/runtime/interpreter/mterp/x86/op_unused_79.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_79.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_7a.S b/runtime/interpreter/mterp/x86/op_unused_7a.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_7a.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_f4.S b/runtime/interpreter/mterp/x86/op_unused_f4.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_f4.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_fa.S b/runtime/interpreter/mterp/x86/op_unused_fa.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_fa.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_fb.S b/runtime/interpreter/mterp/x86/op_unused_fb.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_fb.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_fc.S b/runtime/interpreter/mterp/x86/op_unused_fc.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_fc.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_fd.S b/runtime/interpreter/mterp/x86/op_unused_fd.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_fd.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_fe.S b/runtime/interpreter/mterp/x86/op_unused_fe.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_fe.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_ff.S b/runtime/interpreter/mterp/x86/op_unused_ff.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_ff.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_ushr_int.S b/runtime/interpreter/mterp/x86/op_ushr_int.S
new file mode 100644
index 0000000..dfe25ff
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_ushr_int.S
@@ -0,0 +1 @@
+%include "x86/binop1.S" {"instr":"shrl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_ushr_int_2addr.S b/runtime/interpreter/mterp/x86/op_ushr_int_2addr.S
new file mode 100644
index 0000000..c14bc98
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_ushr_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/shop2addr.S" {"instr":"shrl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_ushr_int_lit8.S b/runtime/interpreter/mterp/x86/op_ushr_int_lit8.S
new file mode 100644
index 0000000..e129f6b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_ushr_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"shrl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_ushr_long.S b/runtime/interpreter/mterp/x86/op_ushr_long.S
new file mode 100644
index 0000000..287946e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_ushr_long.S
@@ -0,0 +1,29 @@
+/*
+ * Long integer shift.  This is different from the generic 32/64-bit
+ * binary operations because vAA/vBB are 64-bit but vCC (the shift
+ * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+ * 6 bits of the shift distance.  x86 shifts automatically mask off
+ * the low 5 bits of %cl, so have to handle the 64 > shiftcount > 31
+ * case specially.
+ */
+    /* shr-long vAA, vBB, vCC */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE %eax               # rIBASE <- v[BB+1]
+    GET_VREG %ecx %ecx                      # ecx <- vCC
+    GET_VREG %eax %eax                      # eax <- v[BB+0]
+    shrdl   rIBASE, %eax
+    shrl    %cl, rIBASE
+    testb   $$32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    xorl    rIBASE, rIBASE
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[BB+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_ushr_long_2addr.S b/runtime/interpreter/mterp/x86/op_ushr_long_2addr.S
new file mode 100644
index 0000000..39c2724
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_ushr_long_2addr.S
@@ -0,0 +1,26 @@
+/*
+ * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+ * 32-bit shift distance.
+ */
+    /* shl-long/2addr vA, vB */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- v[AA+0]
+    sarl    $$4, %ecx                       # ecx <- B
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE rINST              # rIBASE <- v[AA+1]
+    GET_VREG %ecx %ecx                      # ecx <- vBB
+    shrdl   rIBASE, %eax
+    shrl    %cl, rIBASE
+    testb   $$32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    xorl    rIBASE, rIBASE
+2:
+    SET_VREG_HIGH rIBASE rINST              # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax rINST                     # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_xor_int.S b/runtime/interpreter/mterp/x86/op_xor_int.S
new file mode 100644
index 0000000..35aca6a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_xor_int.S
@@ -0,0 +1 @@
+%include "x86/binop.S" {"instr":"xorl    (rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_xor_int_2addr.S b/runtime/interpreter/mterp/x86/op_xor_int_2addr.S
new file mode 100644
index 0000000..d7b70e2
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_xor_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/binop2addr.S" {"instr":"xorl    %eax, (rFP,%ecx,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_xor_int_lit16.S b/runtime/interpreter/mterp/x86/op_xor_int_lit16.S
new file mode 100644
index 0000000..115f0a0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_xor_int_lit16.S
@@ -0,0 +1 @@
+%include "x86/binopLit16.S" {"instr":"xorl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_xor_int_lit8.S b/runtime/interpreter/mterp/x86/op_xor_int_lit8.S
new file mode 100644
index 0000000..243971c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_xor_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"xorl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_xor_long.S b/runtime/interpreter/mterp/x86/op_xor_long.S
new file mode 100644
index 0000000..0d3c0f5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_xor_long.S
@@ -0,0 +1 @@
+%include "x86/binopWide.S" {"instr1":"xorl    (rFP,%ecx,4), rIBASE", "instr2":"xorl    4(rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_xor_long_2addr.S b/runtime/interpreter/mterp/x86/op_xor_long_2addr.S
new file mode 100644
index 0000000..b5000e4
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_xor_long_2addr.S
@@ -0,0 +1 @@
+%include "x86/binopWide2addr.S" {"instr1":"xorl    %eax, (rFP,rINST,4)","instr2":"xorl    %ecx, 4(rFP,rINST,4)"}
diff --git a/runtime/interpreter/mterp/x86/shop2addr.S b/runtime/interpreter/mterp/x86/shop2addr.S
new file mode 100644
index 0000000..94d3545
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/shop2addr.S
@@ -0,0 +1,13 @@
+%default {"result":"%eax"}
+/*
+ * Generic 32-bit "shift/2addr" operation.
+ */
+    /* shift/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # eax <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx %ecx                      # eax <- vBB
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax rINST                     # eax <- vAA
+    $instr                                  # ex: sarl %cl, %eax
+    SET_VREG $result rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/sseBinop.S b/runtime/interpreter/mterp/x86/sseBinop.S
new file mode 100644
index 0000000..63a1e21
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/sseBinop.S
@@ -0,0 +1,9 @@
+%default {"instr":"","suff":""}
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movs${suff}   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    ${instr}${suff} VREG_ADDRESS(%eax), %xmm0
+    movs${suff}   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movs${suff}   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/sseBinop2Addr.S b/runtime/interpreter/mterp/x86/sseBinop2Addr.S
new file mode 100644
index 0000000..d157e67
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/sseBinop2Addr.S
@@ -0,0 +1,10 @@
+%default {"instr":"","suff":""}
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $$0xf, %ecx                     # ecx <- A
+    movs${suff} VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $$4, rINST                      # rINST<- B
+    ${instr}${suff} VREG_ADDRESS(rINST), %xmm0
+    movs${suff} %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movs${suff} %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/unop.S b/runtime/interpreter/mterp/x86/unop.S
new file mode 100644
index 0000000..00d3e15
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/unop.S
@@ -0,0 +1,13 @@
+%default {"instr":""}
+/*
+ * Generic 32-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx <- A+
+    sarl    $$4,rINST                       # rINST <- B
+    GET_VREG %eax rINST                     # eax <- vB
+    andb    $$0xf,%cl                       # ecx <- A
+    $instr
+    SET_VREG %eax %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/unused.S b/runtime/interpreter/mterp/x86/unused.S
new file mode 100644
index 0000000..c95ef94
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/unused.S
@@ -0,0 +1,4 @@
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
diff --git a/runtime/interpreter/mterp/x86/zcmp.S b/runtime/interpreter/mterp/x86/zcmp.S
new file mode 100644
index 0000000..5ce4f0f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/zcmp.S
@@ -0,0 +1,24 @@
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $$0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    movl    $$2, %eax                       # assume branch not taken
+    j${revcmp}   1f
+    movswl  2(rPC),%eax                     # fetch signed displacement
+1:
+    addl    %eax, %eax                      # eax <- AA * 2
+    leal    (rPC, %eax), rPC
+    FETCH_INST
+    jg      2f                              # AA * 2 > 0 => no suspend check
+#if MTERP_SUSPEND
+    REFRESH_IBASE
+#else
+    jmp     MterpCheckSuspendAndContinue
+#endif
+2:
+    GOTO_NEXT
diff --git a/runtime/jdwp/jdwp_socket.cc b/runtime/jdwp/jdwp_socket.cc
index 4fb6df1..2507fe9 100644
--- a/runtime/jdwp/jdwp_socket.cc
+++ b/runtime/jdwp/jdwp_socket.cc
@@ -30,13 +30,13 @@
 #include "base/stringprintf.h"
 #include "jdwp/jdwp_priv.h"
 
-#define kBasePort           8000
-#define kMaxPort            8040
-
 namespace art {
 
 namespace JDWP {
 
+static constexpr uint16_t kBasePort = 8000;
+static constexpr uint16_t kMaxPort = 8040;
+
 /*
  * JDWP network state.
  *
@@ -275,11 +275,33 @@
    * Start by resolving the host name.
    */
 #if defined(__linux__)
+  // Initial size of the work buffer used in gethostbyname_r.
+  //
+  // The call to gethostbyname_r below requires a user-allocated buffer,
+  // the size of which depends on the system. The initial implementation
+  // used to use a 128-byte buffer, but that was not enough on some
+  // systems (maybe because of IPv6), causing failures in JDWP host
+  // testing; thus it was increased to 256.
+  //
+  // However, we should not use a fixed size: gethostbyname_r's
+  // documentation states that if the work buffer is too small (i.e. if
+  // gethostbyname_r returns `ERANGE`), then the function should be
+  // called again with a bigger buffer. Which we do now, starting with
+  // an initial 256-byte buffer, and doubling it until gethostbyname_r
+  // accepts this size.
+  static constexpr size_t kInitialAuxBufSize = 256;
+
+  std::vector<char> auxBuf(kInitialAuxBufSize);
   hostent he;
-  char auxBuf[128];
   int error;
-  int cc = gethostbyname_r(options->host.c_str(), &he, auxBuf, sizeof(auxBuf), &pEntry, &error);
-  if (cc != 0) {
+  int cc;
+  while ((cc = gethostbyname_r(
+             options->host.c_str(), &he, auxBuf.data(), auxBuf.size(), &pEntry, &error))
+         == ERANGE) {
+    // The work buffer `auxBuf` is too small; enlarge it.
+    auxBuf.resize(auxBuf.size() * 2);
+  }
+  if (cc != 0 || pEntry == nullptr) {
     LOG(WARNING) << "gethostbyname_r('" << options->host << "') failed: " << hstrerror(error);
     return false;
   }
@@ -298,7 +320,8 @@
 
   addr.addrInet.sin_port = htons(options->port);
 
-  LOG(INFO) << "Connecting out to " << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port);
+  LOG(INFO) << "Connecting out to " << inet_ntoa(addr.addrInet.sin_addr) << ":"
+            << ntohs(addr.addrInet.sin_port);
 
   /*
    * Create a socket.
@@ -313,13 +336,15 @@
    * Try to connect.
    */
   if (connect(clientSock, &addr.addrPlain, sizeof(addr)) != 0) {
-    PLOG(ERROR) << "Unable to connect to " << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port);
+    PLOG(ERROR) << "Unable to connect to " << inet_ntoa(addr.addrInet.sin_addr) << ":"
+                << ntohs(addr.addrInet.sin_port);
     close(clientSock);
     clientSock = -1;
     return false;
   }
 
-  LOG(INFO) << "Connection established to " << options->host << " (" << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port) << ")";
+  LOG(INFO) << "Connection established to " << options->host << " ("
+            << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port) << ")";
   SetAwaitingHandshake(true);
   input_count_ = 0;
 
@@ -438,7 +463,8 @@
         }
       }
       if (clientSock >= 0 && FD_ISSET(clientSock, &readfds)) {
-        readCount = read(clientSock, input_buffer_ + input_count_, sizeof(input_buffer_) - input_count_);
+        readCount =
+            read(clientSock, input_buffer_ + input_count_, sizeof(input_buffer_) - input_count_);
         if (readCount < 0) {
           /* read failed */
           if (errno != EINTR) {
@@ -479,7 +505,8 @@
     errno = 0;
     int cc = TEMP_FAILURE_RETRY(write(clientSock, input_buffer_, kMagicHandshakeLen));
     if (cc != kMagicHandshakeLen) {
-      PLOG(ERROR) << "Failed writing handshake bytes (" << cc << " of " << kMagicHandshakeLen << ")";
+      PLOG(ERROR) << "Failed writing handshake bytes ("
+                  << cc << " of " << kMagicHandshakeLen << ")";
       goto fail;
     }
 
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 8f4d24f..4e0146c 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -167,6 +167,7 @@
   // Don't compile the method if we are supposed to be deoptimized.
   instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
   if (instrumentation->AreAllMethodsDeoptimized() || instrumentation->IsDeoptimized(method)) {
+    VLOG(jit) << "JIT not compiling " << PrettyMethod(method) << " due to deoptimization";
     return false;
   }
 
@@ -202,6 +203,13 @@
   }
 }
 
+bool Jit::JitAtFirstUse() {
+  if (instrumentation_cache_ != nullptr) {
+    return instrumentation_cache_->HotMethodThreshold() == 0;
+  }
+  return false;
+}
+
 Jit::~Jit() {
   DCHECK(!save_profiling_info_ || !ProfileSaver::IsStarted());
   if (dump_info_on_shutdown_) {
@@ -217,7 +225,6 @@
 }
 
 void Jit::CreateInstrumentationCache(size_t compile_threshold, size_t warmup_threshold) {
-  CHECK_GT(compile_threshold, 0U);
   instrumentation_cache_.reset(
       new jit::JitInstrumentationCache(compile_threshold, warmup_threshold));
 }
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 429edf6..a80f51f 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -86,6 +86,8 @@
   // into the specified class linker to the jit debug interface,
   void DumpTypeInfoForLoadedTypes(ClassLinker* linker);
 
+  bool JitAtFirstUse();
+
  private:
   Jit();
   bool LoadCompiler(std::string* error_msg);
@@ -142,6 +144,10 @@
   void SetSaveProfilingInfo(bool b) {
     save_profiling_info_ = b;
   }
+  void SetJitAtFirstUse() {
+    use_jit_ = true;
+    compile_threshold_ = 0;
+  }
 
  private:
   bool use_jit_;
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 64b2c89..c6e7fef 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -719,7 +719,7 @@
   }
 }
 
-void JitCodeCache::GetCompiledArtMethods(const std::set<const std::string>& dex_base_locations,
+void JitCodeCache::GetCompiledArtMethods(const std::set<std::string>& dex_base_locations,
                                          std::vector<ArtMethod*>& methods) {
   MutexLock mu(Thread::Current(), lock_);
   for (auto it : method_code_map_) {
@@ -737,8 +737,16 @@
   if (ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
     return false;
   }
-  MutexLock mu(self, lock_);
+
+  // Compiling requires a profiling info object to notify compilation. Create
+  // one if it hasn't been done before.
   ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
+  if (info == nullptr) {
+    ProfilingInfo::Create(self, method, /* retry_allocation */ true);
+  }
+
+  MutexLock mu(self, lock_);
+  info = method->GetProfilingInfo(sizeof(void*));
   if (info == nullptr || info->IsMethodBeingCompiled()) {
     return false;
   }
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 67fa928..69fc553 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -153,7 +153,7 @@
   void* MoreCore(const void* mspace, intptr_t increment);
 
   // Adds to `methods` all the compiled ArtMethods which are part of any of the given dex locations.
-  void GetCompiledArtMethods(const std::set<const std::string>& dex_base_locations,
+  void GetCompiledArtMethods(const std::set<std::string>& dex_base_locations,
                              std::vector<ArtMethod*>& methods)
       REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index 4cbaf2c..d597b36 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -162,14 +162,25 @@
                                                mirror::Object* /*this_object*/,
                                                ArtMethod* method,
                                                uint32_t /*dex_pc*/) {
+  if (UNLIKELY(Runtime::Current()->GetJit()->JitAtFirstUse())) {
+    // The compiler requires a ProfilingInfo object.
+    ProfilingInfo::Create(thread, method, /* retry_allocation */ true);
+    JitCompileTask compile_task(method, JitCompileTask::kCompile);
+    compile_task.Run(thread);
+    return;
+  }
+
   instrumentation_cache_->AddSamples(thread, method, 1);
 }
 
-void JitInstrumentationListener::BackwardBranch(Thread* thread,
-                                                ArtMethod* method,
-                                                int32_t dex_pc_offset) {
-  CHECK_LE(dex_pc_offset, 0);
-  instrumentation_cache_->AddSamples(thread, method, 1);
+void JitInstrumentationListener::Branch(Thread* thread,
+                                        ArtMethod* method,
+                                        uint32_t dex_pc ATTRIBUTE_UNUSED,
+                                        int32_t dex_pc_offset) {
+  if (dex_pc_offset < 0) {
+    // Increment method hotness if it is a backward branch.
+    instrumentation_cache_->AddSamples(thread, method, 1);
+  }
 }
 
 void JitInstrumentationListener::InvokeVirtualOrInterface(Thread* thread,
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
index 15969e4..06559ad 100644
--- a/runtime/jit/jit_instrumentation.h
+++ b/runtime/jit/jit_instrumentation.h
@@ -70,7 +70,7 @@
   void DexPcMoved(Thread* /*self*/, mirror::Object* /*this_object*/,
                   ArtMethod* /*method*/, uint32_t /*new_dex_pc*/) OVERRIDE { }
 
-  void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
+  void Branch(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t dex_pc_offset)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
 
   void InvokeVirtualOrInterface(Thread* thread,
@@ -84,7 +84,7 @@
 
   static constexpr uint32_t kJitEvents =
       instrumentation::Instrumentation::kMethodEntered |
-      instrumentation::Instrumentation::kBackwardBranch |
+      instrumentation::Instrumentation::kBranch |
       instrumentation::Instrumentation::kInvokeVirtualOrInterface;
 
  private:
@@ -101,6 +101,11 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   void CreateThreadPool();
   void DeleteThreadPool(Thread* self);
+
+  size_t HotMethodThreshold() const {
+    return hot_method_threshold_;
+  }
+
   // Wait until there is no more pending compilation tasks.
   void WaitForCompilationToFinish(Thread* self);
 
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
index a132701..b4b872f 100644
--- a/runtime/jit/offline_profiling_info.cc
+++ b/runtime/jit/offline_profiling_info.cc
@@ -24,8 +24,11 @@
 
 #include "art_method-inl.h"
 #include "base/mutex.h"
+#include "base/scoped_flock.h"
 #include "base/stl_util.h"
+#include "base/unix_file/fd_file.h"
 #include "jit/profiling_info.h"
+#include "os.h"
 #include "safe_map.h"
 
 namespace art {
@@ -37,8 +40,17 @@
     return true;
   }
 
+  ScopedFlock flock;
+  std::string error;
+  if (!flock.Init(filename.c_str(), O_RDWR | O_NOFOLLOW | O_CLOEXEC, /* block */ false, &error)) {
+    LOG(WARNING) << "Couldn't lock the profile file " << filename << ": " << error;
+    return false;
+  }
+
+  int fd = flock.GetFile()->Fd();
+
   ProfileCompilationInfo info;
-  if (!info.Load(filename)) {
+  if (!info.Load(fd)) {
     LOG(WARNING) << "Could not load previous profile data from file " << filename;
     return false;
   }
@@ -54,9 +66,14 @@
     }
   }
 
+  if (!flock.GetFile()->ClearContent()) {
+    PLOG(WARNING) << "Could not clear profile file: " << filename;
+    return false;
+  }
+
   // This doesn't need locking because we are trying to lock the file for exclusive
   // access and fail immediately if we can't.
-  bool result = info.Save(filename);
+  bool result = info.Save(fd);
   if (result) {
     VLOG(profiler) << "Successfully saved profile info to " << filename
         << " Size: " << GetFileSizeBytes(filename);
@@ -66,64 +83,20 @@
   return result;
 }
 
-enum OpenMode {
-  READ,
-  READ_WRITE
-};
-
-static int OpenFile(const std::string& filename, OpenMode open_mode) {
-  int fd = -1;
-  switch (open_mode) {
-    case READ:
-      fd = open(filename.c_str(), O_RDONLY);
-      break;
-    case READ_WRITE:
-      // TODO(calin) allow the shared uid of the app to access the file.
-      fd = open(filename.c_str(), O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC);
-      break;
-  }
-
-  if (fd < 0) {
-    PLOG(WARNING) << "Failed to open profile file " << filename;
-    return -1;
-  }
-
-  // Lock the file for exclusive access but don't wait if we can't lock it.
-  int err = flock(fd, LOCK_EX | LOCK_NB);
-  if (err < 0) {
-    PLOG(WARNING) << "Failed to lock profile file " << filename;
-    return -1;
-  }
-  return fd;
-}
-
-static bool CloseDescriptorForFile(int fd, const std::string& filename) {
-  // Now unlock the file, allowing another process in.
-  int err = flock(fd, LOCK_UN);
-  if (err < 0) {
-    PLOG(WARNING) << "Failed to unlock profile file " << filename;
-    return false;
-  }
-
-  // Done, close the file.
-  err = ::close(fd);
-  if (err < 0) {
-    PLOG(WARNING) << "Failed to close descriptor for profile file" << filename;
-    return false;
-  }
-
-  return true;
-}
-
-static void WriteToFile(int fd, const std::ostringstream& os) {
+static bool WriteToFile(int fd, const std::ostringstream& os) {
   std::string data(os.str());
   const char *p = data.c_str();
   size_t length = data.length();
   do {
-    int n = ::write(fd, p, length);
+    int n = TEMP_FAILURE_RETRY(write(fd, p, length));
+    if (n < 0) {
+      PLOG(WARNING) << "Failed to write to descriptor: " << fd;
+      return false;
+    }
     p += n;
     length -= n;
   } while (length > 0);
+  return true;
 }
 
 static constexpr const char kFieldSeparator = ',';
@@ -137,13 +110,8 @@
  *    /system/priv-app/app/app.apk,131232145,11,23,454,54
  *    /system/priv-app/app/app.apk:classes5.dex,218490184,39,13,49,1
  **/
-bool ProfileCompilationInfo::Save(const std::string& filename) {
-  int fd = OpenFile(filename, READ_WRITE);
-  if (fd == -1) {
-    return false;
-  }
-
-  // TODO(calin): Merge with a previous existing profile.
+bool ProfileCompilationInfo::Save(uint32_t fd) {
+  DCHECK_GE(fd, 0u);
   // TODO(calin): Profile this and see how much memory it takes. If too much,
   // write to file directly.
   std::ostringstream os;
@@ -158,9 +126,7 @@
     os << kLineSeparator;
   }
 
-  WriteToFile(fd, os);
-
-  return CloseDescriptorForFile(fd, filename);
+  return WriteToFile(fd, os);
 }
 
 // TODO(calin): This a duplicate of Utils::Split fixing the case where the first character
@@ -222,7 +188,9 @@
       LOG(WARNING) << "Cannot parse method_idx " << parts[i];
       return false;
     }
-    AddData(dex_location, checksum, method_idx);
+    if (!AddData(dex_location, checksum, method_idx)) {
+      return false;
+    }
   }
   return true;
 }
@@ -249,23 +217,18 @@
   return new_line_pos == -1 ? new_line_pos : new_line_pos + 1;
 }
 
-bool ProfileCompilationInfo::Load(const std::string& filename) {
-  int fd = OpenFile(filename, READ);
-  if (fd == -1) {
-    return false;
-  }
+bool ProfileCompilationInfo::Load(uint32_t fd) {
+  DCHECK_GE(fd, 0u);
 
   std::string current_line;
   const int kBufferSize = 1024;
   char buffer[kBufferSize];
-  bool success = true;
 
-  while (success) {
-    int n = read(fd, buffer, kBufferSize);
+  while (true) {
+    int n = TEMP_FAILURE_RETRY(read(fd, buffer, kBufferSize));
     if (n < 0) {
-      PLOG(WARNING) << "Error when reading profile file " << filename;
-      success = false;
-      break;
+      PLOG(WARNING) << "Error when reading profile file";
+      return false;
     } else if (n == 0) {
       break;
     }
@@ -278,17 +241,13 @@
         break;
       }
       if (!ProcessLine(current_line)) {
-        success = false;
-        break;
+        return false;
       }
       // Reset the current line (we just processed it).
       current_line.clear();
     }
   }
-  if (!success) {
-    info_.clear();
-  }
-  return CloseDescriptorForFile(fd, filename) && success;
+  return true;
 }
 
 bool ProfileCompilationInfo::Load(const ProfileCompilationInfo& other) {
@@ -369,4 +328,8 @@
   return os.str();
 }
 
+bool ProfileCompilationInfo::Equals(ProfileCompilationInfo& other) {
+  return info_.Equals(other.info_);
+}
+
 }  // namespace art
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
index 26e1ac3..ffd1433 100644
--- a/runtime/jit/offline_profiling_info.h
+++ b/runtime/jit/offline_profiling_info.h
@@ -39,15 +39,18 @@
  */
 class ProfileCompilationInfo {
  public:
+  // Saves profile information about the given methods in the given file.
+  // Note that the saving proceeds only if the file can be locked for exclusive access.
+  // If not (the locking is not blocking), the function does not save and returns false.
   static bool SaveProfilingInfo(const std::string& filename,
                                 const std::vector<ArtMethod*>& methods);
 
-  // Loads profile information from the given file.
-  bool Load(const std::string& profile_filename);
+  // Loads profile information from the given file descriptor.
+  bool Load(uint32_t fd);
   // Loads the data from another ProfileCompilationInfo object.
   bool Load(const ProfileCompilationInfo& info);
-  // Saves the profile data to the given file.
-  bool Save(const std::string& profile_filename);
+  // Saves the profile data to the given file descriptor.
+  bool Save(uint32_t fd);
   // Returns the number of methods that were profiled.
   uint32_t GetNumberOfMethods() const;
 
@@ -61,6 +64,9 @@
   std::string DumpInfo(const std::vector<const DexFile*>* dex_files,
                        bool print_full_dex_location = true) const;
 
+  // For testing purposes.
+  bool Equals(ProfileCompilationInfo& other);
+
  private:
   bool AddData(const std::string& dex_location, uint32_t checksum, uint16_t method_idx);
   bool ProcessLine(const std::string& line);
@@ -69,10 +75,18 @@
     explicit DexFileData(uint32_t location_checksum) : checksum(location_checksum) {}
     uint32_t checksum;
     std::set<uint16_t> method_set;
+
+    bool operator==(const DexFileData& other) const {
+      return checksum == other.checksum && method_set == other.method_set;
+    }
   };
 
   using DexFileToProfileInfoMap = SafeMap<const std::string, DexFileData>;
 
+  friend class ProfileCompilationInfoTest;
+  friend class CompilerDriverProfileTest;
+  friend class ProfileAssistantTest;
+
   DexFileToProfileInfoMap info_;
 };
 
diff --git a/runtime/jit/profile_compilation_info_test.cc b/runtime/jit/profile_compilation_info_test.cc
new file mode 100644
index 0000000..482ea06
--- /dev/null
+++ b/runtime/jit/profile_compilation_info_test.cc
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "base/unix_file/fd_file.h"
+#include "art_method-inl.h"
+#include "class_linker-inl.h"
+#include "common_runtime_test.h"
+#include "dex_file.h"
+#include "mirror/class-inl.h"
+#include "mirror/class_loader.h"
+#include "handle_scope-inl.h"
+#include "jit/offline_profiling_info.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+
+class ProfileCompilationInfoTest : public CommonRuntimeTest {
+ protected:
+  std::vector<ArtMethod*> GetVirtualMethods(jobject class_loader,
+                                            const std::string& clazz) {
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    Thread* self = Thread::Current();
+    ScopedObjectAccess soa(self);
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ClassLoader> h_loader(hs.NewHandle(
+        reinterpret_cast<mirror::ClassLoader*>(self->DecodeJObject(class_loader))));
+    mirror::Class* klass = class_linker->FindClass(self, clazz.c_str(), h_loader);
+
+    const auto pointer_size = class_linker->GetImagePointerSize();
+    std::vector<ArtMethod*> methods;
+    for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+      methods.push_back(&m);
+    }
+    return methods;
+  }
+
+  bool AddData(const std::string& dex_location,
+               uint32_t checksum,
+               uint16_t method_index,
+               ProfileCompilationInfo* info) {
+    return info->AddData(dex_location, checksum, method_index);
+  }
+
+  uint32_t GetFd(const ScratchFile& file) {
+    return static_cast<uint32_t>(file.GetFd());
+  }
+};
+
+TEST_F(ProfileCompilationInfoTest, SaveArtMethods) {
+  ScratchFile profile;
+
+  Thread* self = Thread::Current();
+  jobject class_loader;
+  {
+    ScopedObjectAccess soa(self);
+    class_loader = LoadDex("ProfileTestMultiDex");
+  }
+  ASSERT_NE(class_loader, nullptr);
+
+  // Save virtual methods from Main.
+  std::vector<ArtMethod*> main_methods = GetVirtualMethods(class_loader, "LMain;");
+  ASSERT_TRUE(ProfileCompilationInfo::SaveProfilingInfo(profile.GetFilename(), main_methods));
+
+  // Check that what we saved is in the profile.
+  ProfileCompilationInfo info1;
+  ASSERT_TRUE(info1.Load(GetFd(profile)));
+  ASSERT_EQ(info1.GetNumberOfMethods(), main_methods.size());
+  {
+    ScopedObjectAccess soa(self);
+    for (ArtMethod* m : main_methods) {
+      ASSERT_TRUE(info1.ContainsMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
+    }
+  }
+
+  // Save virtual methods from Second.
+  std::vector<ArtMethod*> second_methods = GetVirtualMethods(class_loader, "LSecond;");
+  ASSERT_TRUE(ProfileCompilationInfo::SaveProfilingInfo(profile.GetFilename(), second_methods));
+
+  // Check that what we saved is in the profile (methods form Main and Second).
+  ProfileCompilationInfo info2;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(info2.Load(GetFd(profile)));
+  ASSERT_EQ(info2.GetNumberOfMethods(), main_methods.size() + second_methods.size());
+  {
+    ScopedObjectAccess soa(self);
+    for (ArtMethod* m : main_methods) {
+      ASSERT_TRUE(info2.ContainsMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
+    }
+    for (ArtMethod* m : second_methods) {
+      ASSERT_TRUE(info2.ContainsMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
+    }
+  }
+}
+
+TEST_F(ProfileCompilationInfoTest, SaveFd) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo saved_info;
+  // Save a few methods.
+  for (uint16_t i = 0; i < 10; i++) {
+    ASSERT_TRUE(AddData("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddData("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
+  }
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we get back what we saved.
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info.Equals(saved_info));
+
+  // Save more methods.
+  for (uint16_t i = 0; i < 100; i++) {
+    ASSERT_TRUE(AddData("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddData("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddData("dex_location3", /* checksum */ 3, /* method_idx */ i, &saved_info));
+  }
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we get back everything we saved.
+  ProfileCompilationInfo loaded_info2;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info2.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info2.Equals(saved_info));
+}
+
+TEST_F(ProfileCompilationInfoTest, AddDataFail) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo info;
+  ASSERT_TRUE(AddData("dex_location", /* checksum */ 1, /* method_idx */ 1, &info));
+  // Trying to add info for an existing file but with a different checksum.
+  ASSERT_FALSE(AddData("dex_location", /* checksum */ 2, /* method_idx */ 2, &info));
+}
+
+TEST_F(ProfileCompilationInfoTest, LoadFail) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo info1;
+  ASSERT_TRUE(AddData("dex_location", /* checksum */ 1, /* method_idx */ 1, &info1));
+  // Use the same file, change the checksum.
+  ProfileCompilationInfo info2;
+  ASSERT_TRUE(AddData("dex_location", /* checksum */ 2, /* method_idx */ 2, &info2));
+
+  ASSERT_FALSE(info1.Load(info2));
+}
+
+}  // namespace art
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index ec289ea..b1a5a4b 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -22,16 +22,16 @@
 
 namespace art {
 
-// An arbitrary value to throttle save requests. Set to 500ms for now.
+// An arbitrary value to throttle save requests. Set to 2s for now.
 static constexpr const uint64_t kMilisecondsToNano = 1000000;
-static constexpr const uint64_t kMinimumTimeBetweenCodeCacheUpdatesNs = 500 * kMilisecondsToNano;
+static constexpr const uint64_t kMinimumTimeBetweenCodeCacheUpdatesNs = 2000 * kMilisecondsToNano;
 
 // TODO: read the constants from ProfileOptions,
 // Add a random delay each time we go to sleep so that we don't hammer the CPU
 // with all profile savers running at the same time.
-static constexpr const uint64_t kRandomDelayMaxMs = 10 * 1000;  // 10 seconds
-static constexpr const uint64_t kMaxBackoffMs = 4 * 60 * 1000;  // 4 minutes
-static constexpr const uint64_t kSavePeriodMs = 4 * 1000;  // 4 seconds
+static constexpr const uint64_t kRandomDelayMaxMs = 20 * 1000;  // 20 seconds
+static constexpr const uint64_t kMaxBackoffMs = 5 * 60 * 1000;  // 5 minutes
+static constexpr const uint64_t kSavePeriodMs = 10 * 1000;  // 10 seconds
 static constexpr const double kBackoffCoef = 1.5;
 
 static constexpr const uint32_t kMinimumNrOrMethodsToSave = 10;
@@ -42,13 +42,12 @@
 ProfileSaver::ProfileSaver(const std::string& output_filename,
                            jit::JitCodeCache* jit_code_cache,
                            const std::vector<std::string>& code_paths)
-    : output_filename_(output_filename),
-      jit_code_cache_(jit_code_cache),
-      tracked_dex_base_locations_(code_paths.begin(), code_paths.end()),
+    : jit_code_cache_(jit_code_cache),
       code_cache_last_update_time_ns_(0),
       shutting_down_(false),
       wait_lock_("ProfileSaver wait lock"),
       period_condition_("ProfileSaver period condition", wait_lock_) {
+  AddTrackedLocations(output_filename, code_paths);
 }
 
 void ProfileSaver::Run() {
@@ -86,29 +85,47 @@
 }
 
 bool ProfileSaver::ProcessProfilingInfo() {
-  VLOG(profiler) << "Initiating save profiling information to: " << output_filename_;
-
   uint64_t last_update_time_ns = jit_code_cache_->GetLastUpdateTimeNs();
   if (last_update_time_ns - code_cache_last_update_time_ns_
-      > kMinimumTimeBetweenCodeCacheUpdatesNs) {
-    VLOG(profiler) << "Not enough time has passed since the last code cache update.";
+      < kMinimumTimeBetweenCodeCacheUpdatesNs) {
+    VLOG(profiler) << "Not enough time has passed since the last code cache update."
+        << "Last update: " << last_update_time_ns
+        << " Last save: " << code_cache_last_update_time_ns_;
     return false;
   }
 
   uint64_t start = NanoTime();
   code_cache_last_update_time_ns_ = last_update_time_ns;
-  std::vector<ArtMethod*> methods;
+  SafeMap<std::string, std::set<std::string>> tracked_locations;
   {
-    ScopedObjectAccess soa(Thread::Current());
-    jit_code_cache_->GetCompiledArtMethods(tracked_dex_base_locations_, methods);
+    // Make a copy so that we don't hold the lock while doing I/O.
+    MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+    tracked_locations = tracked_dex_base_locations_;
   }
-  if (methods.size() < kMinimumNrOrMethodsToSave) {
-    VLOG(profiler) << "Not enough information to save. Nr of methods: " << methods.size();
-    return false;
-  }
+  for (const auto& it : tracked_locations) {
+    if (ShuttingDown(Thread::Current())) {
+      return true;
+    }
+    const std::string& filename = it.first;
+    const std::set<std::string>& locations = it.second;
+    std::vector<ArtMethod*> methods;
+    {
+      ScopedObjectAccess soa(Thread::Current());
+      jit_code_cache_->GetCompiledArtMethods(locations, methods);
+    }
+    if (methods.size() < kMinimumNrOrMethodsToSave) {
+      VLOG(profiler) << "Not enough information to save to: " << filename
+          <<" Nr of methods: " << methods.size();
+      return false;
+    }
 
-  ProfileCompilationInfo::SaveProfilingInfo(output_filename_, methods);
-  VLOG(profiler) << "Profile process time: " << PrettyDuration(NanoTime() - start);
+    if (!ProfileCompilationInfo::SaveProfilingInfo(filename, methods)) {
+      LOG(WARNING) << "Could not save profiling info to " << filename;
+      return false;
+    }
+
+    VLOG(profiler) << "Profile process time: " << PrettyDuration(NanoTime() - start);
+  }
   return true;
 }
 
@@ -135,9 +152,13 @@
   DCHECK(jit_code_cache != nullptr);
 
   MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
-  // Don't start two profile saver threads.
   if (instance_ != nullptr) {
-    DCHECK(false) << "Tried to start two profile savers";
+    // If we already have an instance, make sure it uses the same jit_code_cache.
+    // This may be called multiple times via Runtime::registerAppInfo (e.g. for
+    // apps which share the same runtime).
+    DCHECK_EQ(instance_->jit_code_cache_, jit_code_cache);
+    // Add the code_paths to the tracked locations.
+    instance_->AddTrackedLocations(output_filename, code_paths);
     return;
   }
 
@@ -159,7 +180,7 @@
 
   {
     MutexLock profiler_mutex(Thread::Current(), *Locks::profiler_lock_);
-    VLOG(profiler) << "Stopping profile saver thread for file: " << instance_->output_filename_;
+    VLOG(profiler) << "Stopping profile saver thread";
     profile_saver = instance_;
     profiler_pthread = profiler_pthread_;
     if (instance_ == nullptr) {
@@ -200,4 +221,15 @@
   return instance_ != nullptr;
 }
 
+void ProfileSaver::AddTrackedLocations(const std::string& output_filename,
+                                       const std::vector<std::string>& code_paths) {
+  auto it = tracked_dex_base_locations_.find(output_filename);
+  if (it == tracked_dex_base_locations_.end()) {
+    tracked_dex_base_locations_.Put(output_filename,
+                                    std::set<std::string>(code_paths.begin(), code_paths.end()));
+  } else {
+    it->second.insert(code_paths.begin(), code_paths.end());
+  }
+}
+
 }   // namespace art
diff --git a/runtime/jit/profile_saver.h b/runtime/jit/profile_saver.h
index d60142b..3342790 100644
--- a/runtime/jit/profile_saver.h
+++ b/runtime/jit/profile_saver.h
@@ -20,12 +20,14 @@
 #include "base/mutex.h"
 #include "jit_code_cache.h"
 #include "offline_profiling_info.h"
+#include "safe_map.h"
 
 namespace art {
 
 class ProfileSaver {
  public:
-  // Starts the profile saver thread.
+  // Starts the profile saver thread if not already started.
+  // If the saver is already running it adds (output_filename, code_paths) to its tracked locations.
   static void Start(const std::string& output_filename,
                     jit::JitCodeCache* jit_code_cache,
                     const std::vector<std::string>& code_paths)
@@ -58,14 +60,18 @@
   // Returns true if the saver is shutting down (ProfileSaver::Stop() has been called).
   bool ShuttingDown(Thread* self) REQUIRES(!Locks::profiler_lock_);
 
+  void AddTrackedLocations(const std::string& output_filename,
+                           const std::vector<std::string>& code_paths)
+      REQUIRES(Locks::profiler_lock_);
+
   // The only instance of the saver.
   static ProfileSaver* instance_ GUARDED_BY(Locks::profiler_lock_);
   // Profile saver thread.
   static pthread_t profiler_pthread_ GUARDED_BY(Locks::profiler_lock_);
 
-  const std::string output_filename_;
   jit::JitCodeCache* jit_code_cache_;
-  const std::set<const std::string> tracked_dex_base_locations_;
+  SafeMap<std::string, std::set<std::string>> tracked_dex_base_locations_
+      GUARDED_BY(Locks::profiler_lock_);
   uint64_t code_cache_last_update_time_ns_;
   bool shutting_down_ GUARDED_BY(Locks::profiler_lock_);
 
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
index ddaf02f..ab72373 100644
--- a/runtime/jit/profiling_info.h
+++ b/runtime/jit/profiling_info.h
@@ -68,8 +68,13 @@
     return !classes_[1].IsNull() && classes_[kIndividualCacheSize - 1].IsNull();
   }
 
- private:
+  mirror::Class* GetTypeAt(size_t i) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return classes_[i].Read();
+  }
+
   static constexpr uint16_t kIndividualCacheSize = 5;
+
+ private:
   uint32_t dex_pc_;
   GcRoot<mirror::Class> classes_[kIndividualCacheSize];
 
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 3571edb..18c52e4 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -583,6 +583,10 @@
   }
 }
 
+bool MemMap::Sync() {
+  return msync(BaseBegin(), BaseSize(), MS_SYNC) == 0;
+}
+
 bool MemMap::Protect(int prot) {
   if (base_begin_ == nullptr && base_size_ == 0) {
     prot_ = prot;
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index ed21365..ebd550a 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -126,6 +126,8 @@
     return name_;
   }
 
+  bool Sync();
+
   bool Protect(int prot);
 
   void MadviseDontNeedAndZero();
diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc
index edcbcf2..81c855e 100644
--- a/runtime/mem_map_test.cc
+++ b/runtime/mem_map_test.cc
@@ -251,6 +251,10 @@
 #endif
 
 TEST_F(MemMapTest, MapAnonymousExactAddr32bitHighAddr) {
+  // Some MIPS32 hardware (namely the Creator Ci20 development board)
+  // cannot allocate in the 2GB-4GB region.
+  TEST_DISABLED_FOR_MIPS();
+
   CommonInit();
   // This test may not work under valgrind.
   if (RUNNING_ON_MEMORY_TOOL == 0) {
@@ -271,8 +275,8 @@
         break;
       }
     }
-    ASSERT_GE(reinterpret_cast<uintptr_t>(map->End()), 2u * GB);
     ASSERT_TRUE(map.get() != nullptr) << error_msg;
+    ASSERT_GE(reinterpret_cast<uintptr_t>(map->End()), 2u * GB);
     ASSERT_TRUE(error_msg.empty());
     ASSERT_EQ(BaseBegin(map.get()), reinterpret_cast<void*>(start_addr));
   }
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index b6f424b..b3439f7 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -381,19 +381,26 @@
   return (T)static_cast<uintptr_t>(AsIntArray()->GetWithoutChecks(idx));
 }
 
-template<bool kTransactionActive, bool kUnchecked, typename T>
-inline void PointerArray::SetElementPtrSize(uint32_t idx, T element, size_t ptr_size) {
+template<bool kTransactionActive, bool kUnchecked>
+inline void PointerArray::SetElementPtrSize(uint32_t idx, uint64_t element, size_t ptr_size) {
   if (ptr_size == 8) {
     (kUnchecked ? down_cast<LongArray*>(static_cast<Object*>(this)) : AsLongArray())->
-        SetWithoutChecks<kTransactionActive>(idx, (uint64_t)(element));
+        SetWithoutChecks<kTransactionActive>(idx, element);
   } else {
     DCHECK_EQ(ptr_size, 4u);
-    DCHECK_LE((uintptr_t)element, 0xFFFFFFFFu);
+    DCHECK_LE(element, static_cast<uint64_t>(0xFFFFFFFFu));
     (kUnchecked ? down_cast<IntArray*>(static_cast<Object*>(this)) : AsIntArray())
-        ->SetWithoutChecks<kTransactionActive>(idx, static_cast<uint32_t>((uintptr_t)element));
+        ->SetWithoutChecks<kTransactionActive>(idx, static_cast<uint32_t>(element));
   }
 }
 
+template<bool kTransactionActive, bool kUnchecked, typename T>
+inline void PointerArray::SetElementPtrSize(uint32_t idx, T* element, size_t ptr_size) {
+  SetElementPtrSize<kTransactionActive, kUnchecked>(idx,
+                                                    reinterpret_cast<uintptr_t>(element),
+                                                    ptr_size);
+}
+
 template <typename Visitor>
 inline void PointerArray::Fixup(mirror::PointerArray* dest,
                                 size_t pointer_size,
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 50d77eb..2bd6c5b 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -187,8 +187,11 @@
   T GetElementPtrSize(uint32_t idx, size_t ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  template<bool kTransactionActive = false, bool kUnchecked = false>
+  void SetElementPtrSize(uint32_t idx, uint64_t element, size_t ptr_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   template<bool kTransactionActive = false, bool kUnchecked = false, typename T>
-  void SetElementPtrSize(uint32_t idx, T element, size_t ptr_size)
+  void SetElementPtrSize(uint32_t idx, T* element, size_t ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Fixup the pointers in the dest arrays by passing our pointers through the visitor. Only copies
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 53118e0..d5783c0 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -47,11 +47,15 @@
   return GetField32(ObjectSizeOffset());
 }
 
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline Class* Class::GetSuperClass() {
   // Can only get super class for loaded classes (hack for when runtime is
   // initializing)
-  DCHECK(IsLoaded() || IsErroneous() || !Runtime::Current()->IsStarted()) << IsLoaded();
-  return GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, super_class_));
+  DCHECK(IsLoaded<kVerifyFlags>() ||
+         IsErroneous<kVerifyFlags>() ||
+         !Runtime::Current()->IsStarted()) << IsLoaded();
+  return GetFieldObject<Class, kVerifyFlags, kReadBarrierOption>(
+      OFFSET_OF_OBJECT_MEMBER(Class, super_class_));
 }
 
 inline ClassLoader* Class::GetClassLoader() {
@@ -153,7 +157,7 @@
 
 inline LengthPrefixedArray<ArtMethod>* Class::GetMethodsPtr() {
   return reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(
-      GetField64(OFFSET_OF_OBJECT_MEMBER(Class, methods_)));
+      static_cast<uintptr_t>(GetField64(OFFSET_OF_OBJECT_MEMBER(Class, methods_))));
 }
 
 template<VerifyObjectFlags kVerifyFlags>
@@ -204,7 +208,7 @@
 
 inline void Class::SetMethodsPtrInternal(LengthPrefixedArray<ArtMethod>* new_methods) {
   SetField64<false>(OFFSET_OF_OBJECT_MEMBER(Class, methods_),
-                    reinterpret_cast<uint64_t>(new_methods));
+                    static_cast<uint64_t>(reinterpret_cast<uintptr_t>(new_methods)));
 }
 
 template<VerifyObjectFlags kVerifyFlags>
@@ -226,9 +230,12 @@
   return &GetVirtualMethodsSliceUnchecked(pointer_size).At(i);
 }
 
+template<VerifyObjectFlags kVerifyFlags,
+         ReadBarrierOption kReadBarrierOption>
 inline PointerArray* Class::GetVTable() {
-  DCHECK(IsResolved() || IsErroneous());
-  return GetFieldObject<PointerArray>(OFFSET_OF_OBJECT_MEMBER(Class, vtable_));
+  DCHECK(IsResolved<kVerifyFlags>() || IsErroneous<kVerifyFlags>());
+  return GetFieldObject<PointerArray, kVerifyFlags, kReadBarrierOption>(
+      OFFSET_OF_OBJECT_MEMBER(Class, vtable_));
 }
 
 inline PointerArray* Class::GetVTableDuringLinking() {
@@ -499,8 +506,11 @@
   return FindVirtualMethodForVirtual(method, pointer_size);
 }
 
+template<VerifyObjectFlags kVerifyFlags,
+         ReadBarrierOption kReadBarrierOption>
 inline IfTable* Class::GetIfTable() {
-  return GetFieldObject<IfTable>(OFFSET_OF_OBJECT_MEMBER(Class, iftable_));
+  return GetFieldObject<IfTable, kVerifyFlags, kReadBarrierOption>(
+      OFFSET_OF_OBJECT_MEMBER(Class, iftable_));
 }
 
 inline int32_t Class::GetIfTableCount() {
@@ -516,7 +526,7 @@
 }
 
 inline LengthPrefixedArray<ArtField>* Class::GetIFieldsPtr() {
-  DCHECK(IsLoaded() || IsErroneous());
+  DCHECK(IsLoaded() || IsErroneous()) << GetStatus();
   return GetFieldPtr<LengthPrefixedArray<ArtField>*>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_));
 }
 
@@ -747,9 +757,12 @@
   return size;
 }
 
-template <typename Visitor>
+template <bool kVisitNativeRoots,
+          VerifyObjectFlags kVerifyFlags,
+          ReadBarrierOption kReadBarrierOption,
+          typename Visitor>
 inline void Class::VisitReferences(mirror::Class* klass, const Visitor& visitor) {
-  VisitInstanceFieldsReferences(klass, visitor);
+  VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
   // Right after a class is allocated, but not yet loaded
   // (kStatusNotReady, see ClassLinker::LoadClass()), GC may find it
   // and scan it. IsTemp() may call Class::GetAccessFlags() but may
@@ -757,14 +770,16 @@
   // status is kStatusNotReady. To avoid it, rely on IsResolved()
   // only. This is fine because a temp class never goes into the
   // kStatusResolved state.
-  if (IsResolved()) {
+  if (IsResolved<kVerifyFlags>()) {
     // Temp classes don't ever populate imt/vtable or static fields and they are not even
     // allocated with the right size for those. Also, unresolved classes don't have fields
     // linked yet.
-    VisitStaticFieldsReferences(this, visitor);
+    VisitStaticFieldsReferences<kVerifyFlags, kReadBarrierOption>(this, visitor);
   }
-  // Since this class is reachable, we must also visit the associated roots when we scan it.
-  VisitNativeRoots(visitor, Runtime::Current()->GetClassLinker()->GetImagePointerSize());
+  if (kVisitNativeRoots) {
+    // Since this class is reachable, we must also visit the associated roots when we scan it.
+    VisitNativeRoots(visitor, Runtime::Current()->GetClassLinker()->GetImagePointerSize());
+  }
 }
 
 template<ReadBarrierOption kReadBarrierOption>
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index b49fc74..b97d994 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -457,6 +457,16 @@
   return nullptr;
 }
 
+ArtMethod* Class::FindDeclaredDirectMethodByName(const StringPiece& name, size_t pointer_size) {
+  for (auto& method : GetDirectMethods(pointer_size)) {
+    ArtMethod* const np_method = method.GetInterfaceMethodIfProxy(pointer_size);
+    if (name == np_method->GetName()) {
+      return &method;
+    }
+  }
+  return nullptr;
+}
+
 // TODO These should maybe be changed to be named FindOwnedVirtualMethod or something similar
 // because they do not only find 'declared' methods and will return copied methods. This behavior is
 // desired and correct but the naming can lead to confusion because in the java language declared
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 3a06b82..ea614fd 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -494,10 +494,11 @@
         (IsAbstract() && IsArrayClass());
   }
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsObjectArrayClass() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetComponentType<kVerifyFlags>() != nullptr &&
-        !GetComponentType<kVerifyFlags>()->IsPrimitive();
+    mirror::Class* const component_type = GetComponentType<kVerifyFlags, kReadBarrierOption>();
+    return component_type != nullptr && !component_type->IsPrimitive();
   }
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -560,7 +561,7 @@
   // The size of java.lang.Class.class.
   static uint32_t ClassClassSize(size_t pointer_size) {
     // The number of vtable entries in java.lang.Class.
-    uint32_t vtable_entries = Object::kVTableLength + 69;
+    uint32_t vtable_entries = Object::kVTableLength + 72;
     return ComputeClassSize(true, vtable_entries, 0, 0, 4, 1, 0, pointer_size);
   }
 
@@ -656,6 +657,8 @@
   // to themselves. Classes for primitive types may not assign to each other.
   ALWAYS_INLINE bool IsAssignableFrom(Class* src) SHARED_REQUIRES(Locks::mutator_lock_);
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE Class* GetSuperClass() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Get first common super class. It will never return null.
@@ -706,6 +709,10 @@
   ALWAYS_INLINE LengthPrefixedArray<ArtMethod>* GetMethodsPtr()
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  static MemberOffset MethodsOffset() {
+    return MemberOffset(OFFSETOF_MEMBER(Class, methods_));
+  }
+
   ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetMethods(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -787,6 +794,8 @@
   ArtMethod* GetVirtualMethodDuringLinking(size_t i, size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE PointerArray* GetVTable() SHARED_REQUIRES(Locks::mutator_lock_);
 
   ALWAYS_INLINE PointerArray* GetVTableDuringLinking() SHARED_REQUIRES(Locks::mutator_lock_);
@@ -913,6 +922,9 @@
   ArtMethod* FindDeclaredVirtualMethodByName(const StringPiece& name, size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  ArtMethod* FindDeclaredDirectMethodByName(const StringPiece& name, size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   ArtMethod* FindVirtualMethod(const StringPiece& name, const StringPiece& signature,
                                size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -937,6 +949,8 @@
 
   ALWAYS_INLINE int32_t GetIfTableCount() SHARED_REQUIRES(Locks::mutator_lock_);
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE IfTable* GetIfTable() SHARED_REQUIRES(Locks::mutator_lock_);
 
   ALWAYS_INLINE void SetIfTable(IfTable* new_iftable) SHARED_REQUIRES(Locks::mutator_lock_);
@@ -1222,7 +1236,8 @@
 
   // Fix up all of the native pointers in the class by running them through the visitor. Only sets
   // the corresponding entry in dest if visitor(obj) != obj to prevent dirty memory. Dest should be
-  // initialized to a copy of *this to prevent issues.
+  // initialized to a copy of *this to prevent issues. Does not visit the ArtMethod and ArtField
+  // roots.
   template <typename Visitor>
   void FixupNativePointers(mirror::Class* dest, size_t pointer_size, const Visitor& visitor)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -1273,7 +1288,10 @@
   static MemberOffset EmbeddedImTableOffset(size_t pointer_size);
   static MemberOffset EmbeddedVTableOffset(size_t pointer_size);
 
-  template <typename Visitor>
+  template <bool kVisitNativeRoots,
+            VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+            typename Visitor>
   void VisitReferences(mirror::Class* klass, const Visitor& visitor)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -1357,6 +1375,8 @@
   // The slice methods_ [copied_methods_offset_, |methods_|) are the methods that are copied from
   // interfaces such as miranda or default methods. These are copied for resolution purposes as this
   // class is where they are (logically) declared as far as the virtual dispatch is concerned.
+  //
+  // Note that this field is used by the native debugger as the unique identifier for the type.
   uint64_t methods_;
 
   // Static fields length-prefixed array.
diff --git a/runtime/mirror/class_loader-inl.h b/runtime/mirror/class_loader-inl.h
index e22ddd7..84fa80f 100644
--- a/runtime/mirror/class_loader-inl.h
+++ b/runtime/mirror/class_loader-inl.h
@@ -25,15 +25,20 @@
 namespace art {
 namespace mirror {
 
-template <VerifyObjectFlags kVerifyFlags, typename Visitor>
+template <bool kVisitClasses,
+          VerifyObjectFlags kVerifyFlags,
+          ReadBarrierOption kReadBarrierOption,
+          typename Visitor>
 inline void ClassLoader::VisitReferences(mirror::Class* klass, const Visitor& visitor) {
   // Visit instance fields first.
-  VisitInstanceFieldsReferences(klass, visitor);
-  // Visit classes loaded after.
-  ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  ClassTable* const class_table = GetClassTable();
-  if (class_table != nullptr) {
-    class_table->VisitRoots(visitor);
+  VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
+  if (kVisitClasses) {
+    // Visit classes loaded after.
+    ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+    ClassTable* const class_table = GetClassTable();
+    if (class_table != nullptr) {
+      class_table->VisitRoots(visitor);
+    }
   }
 }
 
diff --git a/runtime/mirror/class_loader.h b/runtime/mirror/class_loader.h
index c2a65d6..1957e13 100644
--- a/runtime/mirror/class_loader.h
+++ b/runtime/mirror/class_loader.h
@@ -63,7 +63,10 @@
  private:
   // Visit instance fields of the class loader as well as its associated classes.
   // Null class loader is handled by ClassLinker::VisitClassRoots.
-  template <VerifyObjectFlags kVerifyFlags, typename Visitor>
+  template <bool kVisitClasses,
+            VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+            typename Visitor>
   void VisitReferences(mirror::Class* klass, const Visitor& visitor)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Locks::classlinker_classes_lock_);
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index 975af61..2ecc9fb 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -122,18 +122,23 @@
   }
 }
 
-template <VerifyObjectFlags kVerifyFlags, typename Visitor>
+template <bool kVisitNativeRoots,
+          VerifyObjectFlags kVerifyFlags,
+          ReadBarrierOption kReadBarrierOption,
+          typename Visitor>
 inline void DexCache::VisitReferences(mirror::Class* klass, const Visitor& visitor) {
   // Visit instance fields first.
-  VisitInstanceFieldsReferences(klass, visitor);
+  VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
   // Visit arrays after.
-  GcRoot<mirror::String>* strings = GetStrings();
-  for (size_t i = 0, num_strings = NumStrings(); i != num_strings; ++i) {
-    visitor.VisitRootIfNonNull(strings[i].AddressWithoutBarrier());
-  }
-  GcRoot<mirror::Class>* resolved_types = GetResolvedTypes();
-  for (size_t i = 0, num_types = NumResolvedTypes(); i != num_types; ++i) {
-    visitor.VisitRootIfNonNull(resolved_types[i].AddressWithoutBarrier());
+  if (kVisitNativeRoots) {
+    GcRoot<mirror::String>* strings = GetStrings();
+    for (size_t i = 0, num_strings = NumStrings(); i != num_strings; ++i) {
+      visitor.VisitRootIfNonNull(strings[i].AddressWithoutBarrier());
+    }
+    GcRoot<mirror::Class>* resolved_types = GetResolvedTypes();
+    for (size_t i = 0, num_types = NumResolvedTypes(); i != num_types; ++i) {
+      visitor.VisitRootIfNonNull(resolved_types[i].AddressWithoutBarrier());
+    }
   }
 }
 
diff --git a/runtime/mirror/dex_cache.cc b/runtime/mirror/dex_cache.cc
index 349a319..692c6cb 100644
--- a/runtime/mirror/dex_cache.cc
+++ b/runtime/mirror/dex_cache.cc
@@ -50,11 +50,11 @@
   CHECK_EQ(num_resolved_fields != 0u, resolved_fields != nullptr);
 
   SetDexFile(dex_file);
-  SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(DexCache, location_), location);
-  SetField64<false>(StringsOffset(), reinterpret_cast<uintptr_t>(strings));
-  SetField64<false>(ResolvedTypesOffset(), reinterpret_cast<uintptr_t>(resolved_types));
-  SetField64<false>(ResolvedMethodsOffset(), reinterpret_cast<uintptr_t>(resolved_methods));
-  SetField64<false>(ResolvedFieldsOffset(), reinterpret_cast<uintptr_t>(resolved_fields));
+  SetLocation(location);
+  SetStrings(strings);
+  SetResolvedTypes(resolved_types);
+  SetResolvedMethods(resolved_methods);
+  SetResolvedFields(resolved_fields);
   SetField32<false>(NumStringsOffset(), num_strings);
   SetField32<false>(NumResolvedTypesOffset(), num_resolved_types);
   SetField32<false>(NumResolvedMethodsOffset(), num_resolved_methods);
@@ -79,5 +79,9 @@
   }
 }
 
+void DexCache::SetLocation(mirror::String* location) {
+  SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(DexCache, location_), location);
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index 32eb595..0002076 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -137,18 +137,40 @@
     return GetFieldPtr<GcRoot<String>*>(StringsOffset());
   }
 
+  void SetStrings(GcRoot<String>* strings) ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) {
+    SetFieldPtr<false>(StringsOffset(), strings);
+  }
+
   GcRoot<Class>* GetResolvedTypes() ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) {
     return GetFieldPtr<GcRoot<Class>*>(ResolvedTypesOffset());
   }
 
+  void SetResolvedTypes(GcRoot<Class>* resolved_types)
+      ALWAYS_INLINE
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    SetFieldPtr<false>(ResolvedTypesOffset(), resolved_types);
+  }
+
   ArtMethod** GetResolvedMethods() ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) {
     return GetFieldPtr<ArtMethod**>(ResolvedMethodsOffset());
   }
 
+  void SetResolvedMethods(ArtMethod** resolved_methods)
+      ALWAYS_INLINE
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    SetFieldPtr<false>(ResolvedMethodsOffset(), resolved_methods);
+  }
+
   ArtField** GetResolvedFields() ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) {
     return GetFieldPtr<ArtField**>(ResolvedFieldsOffset());
   }
 
+  void SetResolvedFields(ArtField** resolved_fields)
+      ALWAYS_INLINE
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    SetFieldPtr<false>(ResolvedFieldsOffset(), resolved_fields);
+  }
+
   size_t NumStrings() SHARED_REQUIRES(Locks::mutator_lock_) {
     return GetField32(NumStringsOffset());
   }
@@ -169,11 +191,12 @@
     return GetFieldPtr<const DexFile*>(OFFSET_OF_OBJECT_MEMBER(DexCache, dex_file_));
   }
 
-  void SetDexFile(const DexFile* dex_file) SHARED_REQUIRES(Locks::mutator_lock_)
-      ALWAYS_INLINE {
-    return SetFieldPtr<false>(OFFSET_OF_OBJECT_MEMBER(DexCache, dex_file_), dex_file);
+  void SetDexFile(const DexFile* dex_file) SHARED_REQUIRES(Locks::mutator_lock_) {
+    SetFieldPtr<false>(OFFSET_OF_OBJECT_MEMBER(DexCache, dex_file_), dex_file);
   }
 
+  void SetLocation(mirror::String* location) SHARED_REQUIRES(Locks::mutator_lock_);
+
   // NOTE: Get/SetElementPtrSize() are intended for working with ArtMethod** and ArtField**
   // provided by GetResolvedMethods/Fields() and ArtMethod::GetDexCacheResolvedMethods(),
   // so they need to be public.
@@ -186,7 +209,10 @@
 
  private:
   // Visit instance fields of the dex cache as well as its associated arrays.
-  template <VerifyObjectFlags kVerifyFlags, typename Visitor>
+  template <bool kVisitNativeRoots,
+            VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+            typename Visitor>
   void VisitReferences(mirror::Class* klass, const Visitor& visitor)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
 
diff --git a/runtime/mirror/iftable.h b/runtime/mirror/iftable.h
index b21ecdf..605deac 100644
--- a/runtime/mirror/iftable.h
+++ b/runtime/mirror/iftable.h
@@ -34,8 +34,11 @@
   ALWAYS_INLINE void SetInterface(int32_t i, Class* interface)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   PointerArray* GetMethodArray(int32_t i) SHARED_REQUIRES(Locks::mutator_lock_) {
-    auto* method_array = down_cast<PointerArray*>(Get((i * kMax) + kMethodArray));
+    auto* method_array = down_cast<PointerArray*>(Get<kVerifyFlags, kReadBarrierOption>(
+        (i * kMax) + kMethodArray));
     DCHECK(method_array != nullptr);
     return method_array;
   }
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 4603428..760de9a 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -1031,7 +1031,10 @@
   return success;
 }
 
-template<bool kIsStatic, typename Visitor>
+template<bool kIsStatic,
+         VerifyObjectFlags kVerifyFlags,
+         ReadBarrierOption kReadBarrierOption,
+         typename Visitor>
 inline void Object::VisitFieldsReferences(uint32_t ref_offsets, const Visitor& visitor) {
   if (!kIsStatic && (ref_offsets != mirror::Class::kClassWalkSuper)) {
     // Instance fields and not the slow-path.
@@ -1047,9 +1050,12 @@
     // There is no reference offset bitmap. In the non-static case, walk up the class
     // inheritance hierarchy and find reference offsets the hard way. In the static case, just
     // consider this class.
-    for (mirror::Class* klass = kIsStatic ? AsClass() : GetClass(); klass != nullptr;
-        klass = kIsStatic ? nullptr : klass->GetSuperClass()) {
-      size_t num_reference_fields =
+    for (mirror::Class* klass = kIsStatic
+            ? AsClass<kVerifyFlags, kReadBarrierOption>()
+            : GetClass<kVerifyFlags, kReadBarrierOption>();
+        klass != nullptr;
+        klass = kIsStatic ? nullptr : klass->GetSuperClass<kVerifyFlags, kReadBarrierOption>()) {
+      const size_t num_reference_fields =
           kIsStatic ? klass->NumReferenceStaticFields() : klass->NumReferenceInstanceFields();
       if (num_reference_fields == 0u) {
         continue;
@@ -1072,49 +1078,54 @@
   }
 }
 
-template<typename Visitor>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption, typename Visitor>
 inline void Object::VisitInstanceFieldsReferences(mirror::Class* klass, const Visitor& visitor) {
-  VisitFieldsReferences<false>(klass->GetReferenceInstanceOffsets<kVerifyNone>(), visitor);
+  VisitFieldsReferences<false, kVerifyFlags, kReadBarrierOption>(
+      klass->GetReferenceInstanceOffsets<kVerifyFlags>(), visitor);
 }
 
-template<typename Visitor>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption, typename Visitor>
 inline void Object::VisitStaticFieldsReferences(mirror::Class* klass, const Visitor& visitor) {
   DCHECK(!klass->IsTemp());
-  klass->VisitFieldsReferences<true>(0, visitor);
+  klass->VisitFieldsReferences<true, kVerifyFlags, kReadBarrierOption>(0, visitor);
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsClassLoader() {
-  return GetClass<kVerifyFlags>()->IsClassLoaderClass();
+  return GetClass<kVerifyFlags, kReadBarrierOption>()->IsClassLoaderClass();
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline mirror::ClassLoader* Object::AsClassLoader() {
-  DCHECK(IsClassLoader<kVerifyFlags>());
+  DCHECK((IsClassLoader<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<mirror::ClassLoader*>(this);
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsDexCache() {
-  return GetClass<kVerifyFlags>()->IsDexCacheClass();
+  return GetClass<kVerifyFlags, kReadBarrierOption>()->IsDexCacheClass();
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline mirror::DexCache* Object::AsDexCache() {
-  DCHECK(IsDexCache<kVerifyFlags>());
+  DCHECK((IsDexCache<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<mirror::DexCache*>(this);
 }
 
-template <VerifyObjectFlags kVerifyFlags, typename Visitor, typename JavaLangRefVisitor>
+template <bool kVisitNativeRoots,
+          VerifyObjectFlags kVerifyFlags,
+          ReadBarrierOption kReadBarrierOption,
+          typename Visitor,
+          typename JavaLangRefVisitor>
 inline void Object::VisitReferences(const Visitor& visitor,
                                     const JavaLangRefVisitor& ref_visitor) {
-  mirror::Class* klass = GetClass<kVerifyFlags>();
+  mirror::Class* klass = GetClass<kVerifyFlags, kReadBarrierOption>();
   visitor(this, ClassOffset(), false);
   const uint32_t class_flags = klass->GetClassFlags<kVerifyNone>();
   if (LIKELY(class_flags == kClassFlagNormal)) {
     DCHECK(!klass->IsVariableSize());
     VisitInstanceFieldsReferences(klass, visitor);
-    DCHECK(!klass->IsClassClass());
+    DCHECK((!klass->IsClassClass<kVerifyFlags, kReadBarrierOption>()));
     DCHECK(!klass->IsStringClass());
     DCHECK(!klass->IsClassLoaderClass());
     DCHECK(!klass->IsArrayClass());
@@ -1123,23 +1134,29 @@
       DCHECK(!klass->IsStringClass());
       if (class_flags == kClassFlagClass) {
         DCHECK(klass->IsClassClass());
-        AsClass<kVerifyNone>()->VisitReferences(klass, visitor);
+        AsClass<kVerifyNone>()->VisitReferences<kVisitNativeRoots,
+                                                kVerifyFlags,
+                                                kReadBarrierOption>(klass, visitor);
       } else if (class_flags == kClassFlagObjectArray) {
-        DCHECK(klass->IsObjectArrayClass());
+        DCHECK((klass->IsObjectArrayClass<kVerifyFlags, kReadBarrierOption>()));
         AsObjectArray<mirror::Object, kVerifyNone>()->VisitReferences(visitor);
       } else if ((class_flags & kClassFlagReference) != 0) {
         VisitInstanceFieldsReferences(klass, visitor);
         ref_visitor(klass, AsReference());
       } else if (class_flags == kClassFlagDexCache) {
-        mirror::DexCache* const dex_cache = AsDexCache<kVerifyFlags>();
-        dex_cache->VisitReferences<kVerifyFlags>(klass, visitor);
+        mirror::DexCache* const dex_cache = AsDexCache<kVerifyFlags, kReadBarrierOption>();
+        dex_cache->VisitReferences<kVisitNativeRoots,
+                                   kVerifyFlags,
+                                   kReadBarrierOption>(klass, visitor);
       } else {
-        mirror::ClassLoader* const class_loader = AsClassLoader<kVerifyFlags>();
-        class_loader->VisitReferences<kVerifyFlags>(klass, visitor);
+        mirror::ClassLoader* const class_loader = AsClassLoader<kVerifyFlags, kReadBarrierOption>();
+        class_loader->VisitReferences<kVisitNativeRoots,
+                                      kVerifyFlags,
+                                      kReadBarrierOption>(klass, visitor);
       }
     } else if (kIsDebugBuild) {
-      CHECK(!klass->IsClassClass());
-      CHECK(!klass->IsObjectArrayClass());
+      CHECK((!klass->IsClassClass<kVerifyFlags, kReadBarrierOption>()));
+      CHECK((!klass->IsObjectArrayClass<kVerifyFlags, kReadBarrierOption>()));
       // String still has instance fields for reflection purposes but these don't exist in
       // actual string instances.
       if (!klass->IsStringClass()) {
@@ -1147,7 +1164,7 @@
         mirror::Class* super_class = klass;
         do {
           total_reference_instance_fields += super_class->NumReferenceInstanceFields();
-          super_class = super_class->GetSuperClass();
+          super_class = super_class->GetSuperClass<kVerifyFlags, kReadBarrierOption>();
         } while (super_class != nullptr);
         // The only reference field should be the object's class. This field is handled at the
         // beginning of the function.
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 71e704e..d635002 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -164,14 +164,18 @@
   template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ObjectArray<T>* AsObjectArray() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsClassLoader() SHARED_REQUIRES(Locks::mutator_lock_);
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ClassLoader* AsClassLoader() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsDexCache() SHARED_REQUIRES(Locks::mutator_lock_);
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   DexCache* AsDexCache() SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
@@ -456,6 +460,13 @@
     SetFieldPtrWithSize<kTransactionActive, kCheckTransaction, kVerifyFlags>(
         field_offset, new_value, sizeof(void*));
   }
+  template<bool kTransactionActive, bool kCheckTransaction = true,
+      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, typename T>
+  void SetFieldPtr64(MemberOffset field_offset, T new_value)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    SetFieldPtrWithSize<kTransactionActive, kCheckTransaction, kVerifyFlags>(
+        field_offset, new_value, 8u);
+  }
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, typename T>
@@ -475,7 +486,9 @@
   }
   // TODO fix thread safety analysis broken by the use of template. This should be
   // SHARED_REQUIRES(Locks::mutator_lock_).
-  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+  template <bool kVisitNativeRoots = true,
+            VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
             typename Visitor,
             typename JavaLangRefVisitor = VoidFunctor>
   void VisitReferences(const Visitor& visitor, const JavaLangRefVisitor& ref_visitor)
@@ -495,6 +508,11 @@
       SHARED_REQUIRES(Locks::mutator_lock_) {
     return GetFieldPtrWithSize<T, kVerifyFlags, kIsVolatile>(field_offset, sizeof(void*));
   }
+  template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
+  T GetFieldPtr64(MemberOffset field_offset)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    return GetFieldPtrWithSize<T, kVerifyFlags, kIsVolatile>(field_offset, 8u);
+  }
 
   template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
   ALWAYS_INLINE T GetFieldPtrWithSize(MemberOffset field_offset, size_t pointer_size)
@@ -511,13 +529,20 @@
   }
 
   // TODO: Fixme when anotatalysis works with visitors.
-  template<bool kIsStatic, typename Visitor>
+  template<bool kIsStatic,
+          VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+          ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+          typename Visitor>
   void VisitFieldsReferences(uint32_t ref_offsets, const Visitor& visitor) HOT_ATTR
       NO_THREAD_SAFETY_ANALYSIS;
-  template<typename Visitor>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+           typename Visitor>
   void VisitInstanceFieldsReferences(mirror::Class* klass, const Visitor& visitor) HOT_ATTR
       SHARED_REQUIRES(Locks::mutator_lock_);
-  template<typename Visitor>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+           typename Visitor>
   void VisitStaticFieldsReferences(mirror::Class* klass, const Visitor& visitor) HOT_ATTR
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index 5337760..6f9d642 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -55,13 +55,13 @@
                Runtime::Current()->GetHeap()->GetCurrentAllocator());
 }
 
-template<class T>
+template<class T> template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline T* ObjectArray<T>::Get(int32_t i) {
   if (!CheckIsValidIndex(i)) {
     DCHECK(Thread::Current()->IsExceptionPending());
     return nullptr;
   }
-  return GetFieldObject<T>(OffsetOfElement(i));
+  return GetFieldObject<T, kVerifyFlags, kReadBarrierOption>(OffsetOfElement(i));
 }
 
 template<class T> template<VerifyObjectFlags kVerifyFlags>
diff --git a/runtime/mirror/object_array.h b/runtime/mirror/object_array.h
index b45cafd..1b1295c 100644
--- a/runtime/mirror/object_array.h
+++ b/runtime/mirror/object_array.h
@@ -37,7 +37,9 @@
   static ObjectArray<T>* Alloc(Thread* self, Class* object_array_class, int32_t length)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
-  T* Get(int32_t i) ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_);
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  ALWAYS_INLINE T* Get(int32_t i) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns true if the object can be stored into the array. If not, throws
   // an ArrayStoreException and returns false.
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index da6cf1f..6643ac2 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -154,10 +154,8 @@
                                          jstring javaSourceName,
                                          jstring javaOutputName,
                                          jint flags ATTRIBUTE_UNUSED,
-                                         // class_loader will be used for app images.
-                                         jobject class_loader ATTRIBUTE_UNUSED,
-                                         // dex_elements will be used for app images.
-                                         jobject dex_elements ATTRIBUTE_UNUSED) {
+                                         jobject class_loader,
+                                         jobjectArray dex_elements) {
   ScopedUtfChars sourceName(env, javaSourceName);
   if (sourceName.c_str() == nullptr) {
     return 0;
@@ -174,6 +172,8 @@
 
   dex_files = runtime->GetOatFileManager().OpenDexFilesFromOat(sourceName.c_str(),
                                                                outputName.c_str(),
+                                                               class_loader,
+                                                               dex_elements,
                                                                /*out*/ &oat_file,
                                                                /*out*/ &error_msgs);
 
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index 8febb62..8f108fa 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -314,32 +314,33 @@
   size_t largeObjectsSize = 0;
   size_t largeObjectsUsed = 0;
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  for (gc::space::ContinuousSpace* space : heap->GetContinuousSpaces()) {
-    if (space->IsImageSpace()) {
-      // Currently don't include the image space.
-    } else if (space->IsZygoteSpace()) {
-      gc::space::ZygoteSpace* zygote_space = space->AsZygoteSpace();
-      zygoteSize += zygote_space->Size();
-      zygoteUsed += zygote_space->GetBytesAllocated();
-    } else if (space->IsMallocSpace()) {
-      // This is a malloc space.
-      gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
-      allocSize += malloc_space->GetFootprint();
-      allocUsed += malloc_space->GetBytesAllocated();
-    } else if (space->IsBumpPointerSpace()) {
-      ScopedObjectAccess soa(env);
-      gc::space::BumpPointerSpace* bump_pointer_space = space->AsBumpPointerSpace();
-      allocSize += bump_pointer_space->Size();
-      allocUsed += bump_pointer_space->GetBytesAllocated();
+  {
+    ScopedObjectAccess soa(env);
+    for (gc::space::ContinuousSpace* space : heap->GetContinuousSpaces()) {
+      if (space->IsImageSpace()) {
+        // Currently don't include the image space.
+      } else if (space->IsZygoteSpace()) {
+        gc::space::ZygoteSpace* zygote_space = space->AsZygoteSpace();
+        zygoteSize += zygote_space->Size();
+        zygoteUsed += zygote_space->GetBytesAllocated();
+      } else if (space->IsMallocSpace()) {
+        // This is a malloc space.
+        gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
+        allocSize += malloc_space->GetFootprint();
+        allocUsed += malloc_space->GetBytesAllocated();
+      } else if (space->IsBumpPointerSpace()) {
+        gc::space::BumpPointerSpace* bump_pointer_space = space->AsBumpPointerSpace();
+        allocSize += bump_pointer_space->Size();
+        allocUsed += bump_pointer_space->GetBytesAllocated();
+      }
+    }
+    for (gc::space::DiscontinuousSpace* space : heap->GetDiscontinuousSpaces()) {
+      if (space->IsLargeObjectSpace()) {
+        largeObjectsSize += space->AsLargeObjectSpace()->GetBytesAllocated();
+        largeObjectsUsed += largeObjectsSize;
+      }
     }
   }
-  for (gc::space::DiscontinuousSpace* space : heap->GetDiscontinuousSpaces()) {
-    if (space->IsLargeObjectSpace()) {
-      largeObjectsSize += space->AsLargeObjectSpace()->GetBytesAllocated();
-      largeObjectsUsed += largeObjectsSize;
-    }
-  }
-
   size_t allocFree = allocSize - allocUsed;
   size_t zygoteFree = zygoteSize - zygoteUsed;
   size_t largeObjectsFree = largeObjectsSize - largeObjectsUsed;
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 67d825e..a7881ac 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -65,6 +65,7 @@
     DEBUG_ENABLE_SAFEMODE           = 1 << 3,
     DEBUG_ENABLE_JNI_LOGGING        = 1 << 4,
     DEBUG_GENERATE_DEBUG_INFO       = 1 << 5,
+    DEBUG_ALWAYS_JIT                = 1 << 6,
   };
 
   Runtime* const runtime = Runtime::Current();
@@ -109,6 +110,13 @@
   // This is for backwards compatibility with Dalvik.
   debug_flags &= ~DEBUG_ENABLE_ASSERT;
 
+  if ((debug_flags & DEBUG_ALWAYS_JIT) != 0) {
+    jit::JitOptions* jit_options = runtime->GetJITOptions();
+    CHECK(jit_options != nullptr);
+    jit_options->SetJitAtFirstUse();
+    debug_flags &= ~DEBUG_ALWAYS_JIT;
+  }
+
   if (debug_flags != 0) {
     LOG(ERROR) << StringPrintf("Unknown bits set in debug_flags: %#x", debug_flags);
   }
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 0ddd4a2..a80585a 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -469,14 +469,21 @@
   return soa.AddLocalReference<jobjectArray>(ret.Get());
 }
 
-static jobject Class_getDeclaredAnnotation(JNIEnv* env, jobject javaThis, jclass annotationType) {
+static jobject Class_getDeclaredAnnotation(JNIEnv* env, jobject javaThis, jclass annotationClass) {
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<2> hs(soa.Self());
   Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
+
+  // Handle public contract to throw NPE if the "annotationClass" argument was null.
+  if (UNLIKELY(annotationClass == nullptr)) {
+    ThrowNullPointerException("annotationClass");
+    return nullptr;
+  }
+
   if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
     return nullptr;
   }
-  Handle<mirror::Class> annotation_class(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
+  Handle<mirror::Class> annotation_class(hs.NewHandle(soa.Decode<mirror::Class*>(annotationClass)));
   return soa.AddLocalReference<jobject>(
       klass->GetDexFile().GetAnnotationForClass(klass, annotation_class));
 }
diff --git a/runtime/oat.h b/runtime/oat.h
index 13fd6a4..989e3f9 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -31,7 +31,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '7', '4', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '7', '5', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index d2f563b..8f321a0 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -46,6 +46,7 @@
 #include "oat_file_manager.h"
 #include "os.h"
 #include "runtime.h"
+#include "type_lookup_table.h"
 #include "utils.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 #include "vmap_table.h"
@@ -266,16 +267,15 @@
                                 i);
       return false;
     }
-
-    const char* dex_file_location_data = reinterpret_cast<const char*>(oat);
-    oat += dex_file_location_size;
-    if (UNLIKELY(oat > End())) {
+    if (UNLIKELY(static_cast<size_t>(End() - oat) < dex_file_location_size)) {
       *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu with truncated dex file "
                                     "location",
                                 GetLocation().c_str(),
                                 i);
       return false;
     }
+    const char* dex_file_location_data = reinterpret_cast<const char*>(oat);
+    oat += dex_file_location_size;
 
     std::string dex_file_location = ResolveRelativeEncodedDexLocation(
         abs_dex_location,
@@ -318,6 +318,17 @@
                                 Size());
       return false;
     }
+    if (UNLIKELY(Size() - dex_file_offset < sizeof(DexFile::Header))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with dex file "
+                                    "offset %u of %zu but the size of dex file header is %zu",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                dex_file_offset,
+                                Size(),
+                                sizeof(DexFile::Header));
+      return false;
+    }
 
     const uint8_t* dex_file_pointer = Begin() + dex_file_offset;
     if (UNLIKELY(!DexFile::IsMagicValid(dex_file_pointer))) {
@@ -339,34 +350,75 @@
       return false;
     }
     const DexFile::Header* header = reinterpret_cast<const DexFile::Header*>(dex_file_pointer);
+    if (Size() - dex_file_offset < header->file_size_) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with dex file "
+                                    "offset %u and size %u truncated at %zu",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                dex_file_offset,
+                                header->file_size_,
+                                Size());
+      return false;
+    }
 
-    if (UNLIKELY(oat > End())) {
-      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' with truncated "
-                                "lookup table offset", GetLocation().c_str(), i,
+    uint32_t class_offsets_offset;
+    if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &class_offsets_offset))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' truncated "
+                                    "after class offsets offset",
+                                GetLocation().c_str(),
+                                i,
                                 dex_file_location.c_str());
       return false;
     }
-    uint32_t lookup_table_offset = *reinterpret_cast<const uint32_t*>(oat);
-    oat += sizeof(lookup_table_offset);
-    if (Begin() + lookup_table_offset > End()) {
-      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' with truncated "
-                                "lookup table", GetLocation().c_str(), i,
+    if (UNLIKELY(class_offsets_offset > Size()) ||
+        UNLIKELY((Size() - class_offsets_offset) / sizeof(uint32_t) < header->class_defs_size_)) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with truncated "
+                                    "class offsets, offset %u of %zu, class defs %u",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                class_offsets_offset,
+                                Size(),
+                                header->class_defs_size_);
+      return false;
+    }
+    if (UNLIKELY(!IsAligned<alignof(uint32_t)>(class_offsets_offset))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with unaligned "
+                                    "class offsets, offset %u",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                class_offsets_offset);
+      return false;
+    }
+    const uint32_t* class_offsets_pointer =
+        reinterpret_cast<const uint32_t*>(Begin() + class_offsets_offset);
+
+    uint32_t lookup_table_offset;
+    if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &lookup_table_offset))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' truncated "
+                                    "after lookup table offset",
+                                GetLocation().c_str(),
+                                i,
                                 dex_file_location.c_str());
       return false;
     }
     const uint8_t* lookup_table_data = lookup_table_offset != 0u
         ? Begin() + lookup_table_offset
         : nullptr;
-
-    const uint32_t* methods_offsets_pointer = reinterpret_cast<const uint32_t*>(oat);
-
-    oat += (sizeof(*methods_offsets_pointer) * header->class_defs_size_);
-    if (UNLIKELY(oat > End())) {
+    if (lookup_table_offset != 0u &&
+        (UNLIKELY(lookup_table_offset > Size()) ||
+            UNLIKELY(Size() - lookup_table_offset <
+                     TypeLookupTable::RawDataLength(header->class_defs_size_)))) {
       *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with truncated "
-                                    "method offsets",
+                                    "type lookup table, offset %u of %zu, class defs %u",
                                 GetLocation().c_str(),
                                 i,
-                                dex_file_location.c_str());
+                                dex_file_location.c_str(),
+                                lookup_table_offset,
+                                Size(),
+                                header->class_defs_size_);
       return false;
     }
 
@@ -398,7 +450,7 @@
                                               dex_file_checksum,
                                               dex_file_pointer,
                                               lookup_table_data,
-                                              methods_offsets_pointer,
+                                              class_offsets_pointer,
                                               current_dex_cache_arrays);
     oat_dex_files_storage_.push_back(oat_dex_file);
 
@@ -1020,8 +1072,13 @@
 }
 
 std::unique_ptr<const DexFile> OatFile::OatDexFile::OpenDexFile(std::string* error_msg) const {
-  return DexFile::Open(dex_file_pointer_, FileSize(), dex_file_location_,
-                       dex_file_location_checksum_, this, error_msg);
+  return DexFile::Open(dex_file_pointer_,
+                       FileSize(),
+                       dex_file_location_,
+                       dex_file_location_checksum_,
+                       this,
+                       false /* verify */,
+                       error_msg);
 }
 
 uint32_t OatFile::OatDexFile::GetOatClassOffset(uint16_t class_def_index) const {
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index d6b0868..a8f84a2 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -38,6 +38,7 @@
 #include "os.h"
 #include "profiler.h"
 #include "runtime.h"
+#include "scoped_thread_state_change.h"
 #include "ScopedFd.h"
 #include "utils.h"
 
@@ -326,6 +327,17 @@
   return cached_odex_file_is_up_to_date_;
 }
 
+std::string OatFileAssistant::ArtFileName(const OatFile* oat_file) const {
+  const std::string oat_file_location = oat_file->GetLocation();
+  // Replace extension with .art
+  const size_t last_ext = oat_file_location.find_last_of('.');
+  if (last_ext == std::string::npos) {
+    LOG(ERROR) << "No extension in oat file " << oat_file_location;
+    return std::string();
+  }
+  return oat_file_location.substr(0, last_ext) + ".art";
+}
+
 const std::string* OatFileAssistant::OatFileName() {
   if (!cached_oat_file_name_attempted_) {
     cached_oat_file_name_attempted_ = true;
@@ -1003,5 +1015,22 @@
   return old_profile_load_succeeded_ ? &cached_old_profile_ : nullptr;
 }
 
+gc::space::ImageSpace* OatFileAssistant::OpenImageSpace(const OatFile* oat_file) {
+  DCHECK(oat_file != nullptr);
+  std::string art_file = ArtFileName(oat_file);
+  if (art_file.empty()) {
+    return nullptr;
+  }
+  std::string error_msg;
+  ScopedObjectAccess soa(Thread::Current());
+  gc::space::ImageSpace* ret = gc::space::ImageSpace::CreateFromAppImage(art_file.c_str(),
+                                                                         oat_file,
+                                                                         &error_msg);
+  if (ret == nullptr && (VLOG_IS_ON(image) || OS::FileExists(art_file.c_str()))) {
+    LOG(INFO) << "Failed to open app image " << art_file.c_str() << " " << error_msg;
+  }
+  return ret;
+}
+
 }  // namespace art
 
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index f781532..7b45bca 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -30,6 +30,12 @@
 
 namespace art {
 
+namespace gc {
+namespace space {
+class ImageSpace;
+}  // namespace space
+}  // namespace gc
+
 // Class for assisting with oat file management.
 //
 // This class collects common utilities for determining the status of an oat
@@ -163,6 +169,9 @@
   // the OatFileAssistant object.
   std::unique_ptr<OatFile> GetBestOatFile();
 
+  // Open and returns an image space associated with the oat file.
+  gc::space::ImageSpace* OpenImageSpace(const OatFile* oat_file);
+
   // Loads the dex files in the given oat file for the given dex location.
   // The oat file should be up to date for the given dex location.
   // This loads multiple dex files in the case of multidex.
@@ -214,6 +223,9 @@
   bool OatFileNeedsRelocation();
   bool OatFileIsUpToDate();
 
+  // Return image file name. Does not cache since it relies on the oat file.
+  std::string ArtFileName(const OatFile* oat_file) const;
+
   // These methods return the status for a given opened oat file with respect
   // to the dex location.
   OatStatus GivenOatFileStatus(const OatFile& file);
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index f994f0c..25dcbe4 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -996,6 +996,8 @@
     dex_files = Runtime::Current()->GetOatFileManager().OpenDexFilesFromOat(
         dex_location_.c_str(),
         oat_location_.c_str(),
+        /*class_loader*/nullptr,
+        /*dex_elements*/nullptr,
         &oat_file,
         &error_msgs);
     CHECK(!dex_files.empty()) << Join(error_msgs, '\n');
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 36a967f..de90f0a 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -22,10 +22,15 @@
 
 #include "base/logging.h"
 #include "base/stl_util.h"
+#include "class_linker.h"
 #include "dex_file-inl.h"
 #include "gc/space/image_space.h"
+#include "handle_scope-inl.h"
+#include "mirror/class_loader.h"
 #include "oat_file_assistant.h"
+#include "scoped_thread_state_change.h"
 #include "thread-inl.h"
+#include "thread_list.h"
 
 namespace art {
 
@@ -34,6 +39,9 @@
 // normal builds.
 static constexpr bool kDuplicateClassesCheck = kIsDebugBuild;
 
+// If true, then we attempt to load the application image if it exists.
+static constexpr bool kEnableAppImage = true;
+
 const OatFile* OatFileManager::RegisterOatFile(std::unique_ptr<const OatFile> oat_file) {
   WriterMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
   DCHECK(oat_file != nullptr);
@@ -284,6 +292,8 @@
 std::vector<std::unique_ptr<const DexFile>> OatFileManager::OpenDexFilesFromOat(
     const char* dex_location,
     const char* oat_location,
+    jobject class_loader,
+    jobjectArray dex_elements,
     const OatFile** out_oat_file,
     std::vector<std::string>* error_msgs) {
   CHECK(dex_location != nullptr);
@@ -291,12 +301,13 @@
 
   // Verify we aren't holding the mutator lock, which could starve GC if we
   // have to generate or relocate an oat file.
-  Locks::mutator_lock_->AssertNotHeld(Thread::Current());
-
+  Thread* const self = Thread::Current();
+  Locks::mutator_lock_->AssertNotHeld(self);
+  Runtime* const runtime = Runtime::Current();
   OatFileAssistant oat_file_assistant(dex_location,
                                       oat_location,
                                       kRuntimeISA,
-                                      !Runtime::Current()->IsAotCompiler());
+                                      !runtime->IsAotCompiler());
 
   // Lock the target oat location to avoid races generating and loading the
   // oat file.
@@ -312,11 +323,12 @@
   // Update the oat file on disk if we can. This may fail, but that's okay.
   // Best effort is all that matters here.
   if (!oat_file_assistant.MakeUpToDate(/*out*/&error_msg)) {
-    LOG(WARNING) << error_msg;
+    LOG(INFO) << error_msg;
   }
 
   // Get the oat file on disk.
   std::unique_ptr<const OatFile> oat_file(oat_file_assistant.GetBestOatFile().release());
+
   if (oat_file != nullptr) {
     // Take the file only if it has no collisions, or we must take it because of preopting.
     bool accept_oat_file = !HasCollisions(oat_file.get(), /*out*/ &error_msg);
@@ -351,7 +363,50 @@
 
   // Load the dex files from the oat file.
   if (source_oat_file != nullptr) {
-    dex_files = oat_file_assistant.LoadDexFiles(*source_oat_file, dex_location);
+    bool added_image_space = false;
+    if (source_oat_file->IsExecutable()) {
+      std::unique_ptr<gc::space::ImageSpace> image_space(
+          kEnableAppImage ? oat_file_assistant.OpenImageSpace(source_oat_file) : nullptr);
+      if (image_space != nullptr) {
+        ScopedObjectAccess soa(self);
+        StackHandleScope<1> hs(self);
+        Handle<mirror::ClassLoader> h_loader(
+            hs.NewHandle(soa.Decode<mirror::ClassLoader*>(class_loader)));
+        // Can not load app image without class loader.
+        if (h_loader.Get() != nullptr) {
+          std::string temp_error_msg;
+          // Add image space has a race condition since other threads could be reading from the
+          // spaces array.
+          {
+            ScopedThreadSuspension sts(self, kSuspended);
+            ScopedSuspendAll ssa("Add image space");
+            runtime->GetHeap()->AddSpace(image_space.get());
+          }
+          added_image_space = true;
+          if (!runtime->GetClassLinker()->AddImageSpace(image_space.get(),
+                                                        h_loader,
+                                                        dex_elements,
+                                                        dex_location,
+                                                        /*out*/&dex_files,
+                                                        /*out*/&temp_error_msg)) {
+            LOG(INFO) << "Failed to add image file " << temp_error_msg;
+            dex_files.clear();
+            {
+              ScopedThreadSuspension sts(self, kSuspended);
+              ScopedSuspendAll ssa("Remove image space");
+              runtime->GetHeap()->RemoveSpace(image_space.get());
+            }
+            added_image_space = false;
+            // Non-fatal, don't update error_msg.
+          }
+          image_space.release();
+        }
+      }
+    }
+    if (!added_image_space) {
+      DCHECK(dex_files.empty());
+      dex_files = oat_file_assistant.LoadDexFiles(*source_oat_file, dex_location);
+    }
     if (dex_files.empty()) {
       error_msgs->push_back("Failed to open dex files from " + source_oat_file->GetLocation());
     }
diff --git a/runtime/oat_file_manager.h b/runtime/oat_file_manager.h
index 4690e45..c508c4b 100644
--- a/runtime/oat_file_manager.h
+++ b/runtime/oat_file_manager.h
@@ -25,6 +25,7 @@
 
 #include "base/macros.h"
 #include "base/mutex.h"
+#include "jni.h"
 
 namespace art {
 
@@ -101,6 +102,8 @@
   std::vector<std::unique_ptr<const DexFile>> OpenDexFilesFromOat(
       const char* dex_location,
       const char* oat_location,
+      jobject class_loader,
+      jobjectArray dex_elements,
       /*out*/ const OatFile** out_oat_file,
       /*out*/ std::vector<std::string>* error_msgs)
       REQUIRES(!Locks::oat_file_manager_lock_, !Locks::mutator_lock_);
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 2b92303..aa64ee3 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -309,8 +309,8 @@
     const std::string option(options[i].first);
       // TODO: support -Djava.class.path
     if (option == "bootclasspath") {
-      auto boot_class_path
-          = reinterpret_cast<const std::vector<const DexFile*>*>(options[i].second);
+      auto boot_class_path = static_cast<std::vector<std::unique_ptr<const DexFile>>*>(
+          const_cast<void*>(options[i].second));
 
       if (runtime_options != nullptr) {
         runtime_options->Set(M::BootClassPathDexList, boot_class_path);
@@ -565,13 +565,6 @@
     args.Set(M::HeapGrowthLimit, args.GetOrDefault(M::MemoryMaximumSize));
   }
 
-  if (args.GetOrDefault(M::Experimental) & ExperimentalFlags::kDefaultMethods) {
-    LOG(WARNING) << "Default method support has been enabled. The verifier will be less strict "
-                 << "in some cases. All existing invoke opcodes have an unstable updated "
-                 << "specification and are nearly guaranteed to change over time. Do not attempt "
-                 << "to write shipping code against the invoke opcodes with this flag.";
-  }
-
   if (args.GetOrDefault(M::Experimental) & ExperimentalFlags::kLambdas) {
     LOG(WARNING) << "Experimental lambdas have been enabled. All lambda opcodes have "
                  << "an unstable specification and are nearly guaranteed to change over time. "
@@ -698,8 +691,8 @@
   UsageMessage(stream, "  -X[no]image-dex2oat (Whether to create and use a boot image)\n");
   UsageMessage(stream, "  -Xno-dex-file-fallback "
                        "(Don't fall back to dex files without oat files)\n");
-  UsageMessage(stream, "  -Xexperimental:{lambdas,default-methods} "
-                       "(Enable new experimental dalvik opcodes and semantics, off by default)\n");
+  UsageMessage(stream, "  -Xexperimental:lambdas "
+                       "(Enable new and experimental dalvik opcodes and semantics)\n");
   UsageMessage(stream, "\n");
 
   UsageMessage(stream, "The following previously supported Dalvik options are ignored:\n");
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index 6cea902..1bb816b 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -37,12 +37,21 @@
 enum InlineMethodOpcode : uint16_t {
   kIntrinsicDoubleCvt,
   kIntrinsicFloatCvt,
+  kIntrinsicFloatIsInfinite,
+  kIntrinsicDoubleIsInfinite,
+  kIntrinsicFloatIsNaN,
+  kIntrinsicDoubleIsNaN,
   kIntrinsicReverseBits,
   kIntrinsicReverseBytes,
+  kIntrinsicBitCount,
+  kIntrinsicCompare,
+  kIntrinsicHighestOneBit,
+  kIntrinsicLowestOneBit,
   kIntrinsicNumberOfLeadingZeros,
   kIntrinsicNumberOfTrailingZeros,
   kIntrinsicRotateRight,
   kIntrinsicRotateLeft,
+  kIntrinsicSignum,
   kIntrinsicAbsInt,
   kIntrinsicAbsLong,
   kIntrinsicAbsFloat,
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 1101acd..0c06ca6 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -71,7 +71,7 @@
 #include "gc/space/image_space.h"
 #include "gc/space/space-inl.h"
 #include "handle_scope-inl.h"
-#include "image.h"
+#include "image-inl.h"
 #include "instrumentation.h"
 #include "intern_table.h"
 #include "interpreter/interpreter.h"
@@ -1097,7 +1097,7 @@
   if (GetHeap()->HasBootImageSpace()) {
     ATRACE_BEGIN("InitFromImage");
     std::string error_msg;
-    bool result = class_linker_->InitFromImage(&error_msg);
+    bool result = class_linker_->InitFromBootImage(&error_msg);
     ATRACE_END();
     if (!result) {
       LOG(ERROR) << "Could not initialize from image: " << error_msg;
@@ -1131,10 +1131,14 @@
     }
 
     std::vector<std::unique_ptr<const DexFile>> boot_class_path;
-    OpenDexFiles(dex_filenames,
-                 dex_locations,
-                 runtime_options.GetOrDefault(Opt::Image),
-                 &boot_class_path);
+    if (runtime_options.Exists(Opt::BootClassPathDexList)) {
+      boot_class_path.swap(*runtime_options.GetOrDefault(Opt::BootClassPathDexList));
+    } else {
+      OpenDexFiles(dex_filenames,
+                   dex_locations,
+                   runtime_options.GetOrDefault(Opt::Image),
+                   &boot_class_path);
+    }
     instruction_set_ = runtime_options.GetOrDefault(Opt::ImageInstructionSet);
     std::string error_msg;
     if (!class_linker_->InitWithoutImage(std::move(boot_class_path), &error_msg)) {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 20acffb..c8c2ee5 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -366,7 +366,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns a special method that calls into a trampoline for runtime method resolution
-  ArtMethod* GetResolutionMethod() SHARED_REQUIRES(Locks::mutator_lock_);
+  ArtMethod* GetResolutionMethod();
 
   bool HasResolutionMethod() const {
     return resolution_method_ != nullptr;
@@ -377,8 +377,8 @@
   ArtMethod* CreateResolutionMethod() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns a special method that calls into a trampoline for runtime imt conflicts.
-  ArtMethod* GetImtConflictMethod() SHARED_REQUIRES(Locks::mutator_lock_);
-  ArtMethod* GetImtUnimplementedMethod() SHARED_REQUIRES(Locks::mutator_lock_);
+  ArtMethod* GetImtConflictMethod();
+  ArtMethod* GetImtUnimplementedMethod();
 
   bool HasImtConflictMethod() const {
     return imt_conflict_method_ != nullptr;
diff --git a/runtime/runtime_options.cc b/runtime/runtime_options.cc
index c54461e..e75481c 100644
--- a/runtime/runtime_options.cc
+++ b/runtime/runtime_options.cc
@@ -13,8 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #include "runtime_options.h"
 
+#include <memory>
+
 #include "gc/heap.h"
 #include "monitor.h"
 #include "runtime.h"
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 5624285..308f3ba 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -113,12 +113,12 @@
 RUNTIME_OPTIONS_KEY (Unit,                NoDexFileFallback)
 RUNTIME_OPTIONS_KEY (std::string,         CpuAbiList)
 RUNTIME_OPTIONS_KEY (std::string,         Fingerprint)
-RUNTIME_OPTIONS_KEY (ExperimentalFlags,   Experimental,     ExperimentalFlags::kNone) // -Xexperimental:{, lambdas, default-methods}
+RUNTIME_OPTIONS_KEY (ExperimentalFlags,   Experimental,     ExperimentalFlags::kNone) // -Xexperimental:{none, lambdas}
 
 // Not parse-able from command line, but can be provided explicitly.
 // (Do not add anything here that is defined in ParsedOptions::MakeParser)
-RUNTIME_OPTIONS_KEY (const std::vector<const DexFile*>*, \
-                                          BootClassPathDexList)  // TODO: make unique_ptr
+RUNTIME_OPTIONS_KEY (std::vector<std::unique_ptr<const DexFile>>*, \
+                                          BootClassPathDexList)
 RUNTIME_OPTIONS_KEY (InstructionSet,      ImageInstructionSet,            kRuntimeISA)
 RUNTIME_OPTIONS_KEY (CompilerCallbacks*,  CompilerCallbacksPtr)  // TDOO: make unique_ptr
 RUNTIME_OPTIONS_KEY (bool (*)(),          HookIsSensitiveThread)
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index fc1a445..c8714a6 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -190,7 +190,7 @@
 class DumpCheckpoint FINAL : public Closure {
  public:
   explicit DumpCheckpoint(std::ostream* os)
-      : os_(os), barrier_(0), backtrace_map_(BacktraceMap::Create(GetTid())) {}
+      : os_(os), barrier_(0), backtrace_map_(BacktraceMap::Create(getpid())) {}
 
   void Run(Thread* thread) OVERRIDE {
     // Note thread and self may not be equal if thread was already suspended at the point of the
@@ -1191,6 +1191,7 @@
   }
   LOG(WARNING) << "timed out suspending all daemon threads";
 }
+
 void ThreadList::Register(Thread* self) {
   DCHECK_EQ(self, Thread::Current());
 
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 5815f7a..99b2296 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -815,10 +815,10 @@
   LOG(ERROR) << "Unexpected exception caught event in tracing";
 }
 
-void Trace::BackwardBranch(Thread* /*thread*/, ArtMethod* method,
-                           int32_t /*dex_pc_offset*/)
+void Trace::Branch(Thread* /*thread*/, ArtMethod* method,
+                   uint32_t /*dex_pc*/, int32_t /*dex_pc_offset*/)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-  LOG(ERROR) << "Unexpected backward branch event in tracing" << PrettyMethod(method);
+  LOG(ERROR) << "Unexpected branch event in tracing" << PrettyMethod(method);
 }
 
 void Trace::InvokeVirtualOrInterface(Thread*,
diff --git a/runtime/trace.h b/runtime/trace.h
index 356a81f..80f1a4c 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -164,7 +164,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
   void ExceptionCaught(Thread* thread, mirror::Throwable* exception_object)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
-  void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
+  void Branch(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t dex_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
   void InvokeVirtualOrInterface(Thread* thread,
                                 mirror::Object* this_object,
diff --git a/runtime/type_lookup_table.cc b/runtime/type_lookup_table.cc
index 0d40bb7..fc9faec 100644
--- a/runtime/type_lookup_table.cc
+++ b/runtime/type_lookup_table.cc
@@ -16,6 +16,7 @@
 
 #include "type_lookup_table.h"
 
+#include "base/bit_utils.h"
 #include "dex_file-inl.h"
 #include "utf-inl.h"
 #include "utils.h"
@@ -42,25 +43,39 @@
 }
 
 uint32_t TypeLookupTable::RawDataLength(const DexFile& dex_file) {
-  return RoundUpToPowerOfTwo(dex_file.NumClassDefs()) * sizeof(Entry);
+  return RawDataLength(dex_file.NumClassDefs());
 }
 
-TypeLookupTable* TypeLookupTable::Create(const DexFile& dex_file) {
+uint32_t TypeLookupTable::RawDataLength(uint32_t num_class_defs) {
+  return SupportedSize(num_class_defs) ? RoundUpToPowerOfTwo(num_class_defs) * sizeof(Entry) : 0u;
+}
+
+uint32_t TypeLookupTable::CalculateMask(uint32_t num_class_defs) {
+  return SupportedSize(num_class_defs) ? RoundUpToPowerOfTwo(num_class_defs) - 1u : 0u;
+}
+
+bool TypeLookupTable::SupportedSize(uint32_t num_class_defs) {
+  return num_class_defs != 0u && num_class_defs <= std::numeric_limits<uint16_t>::max();
+}
+
+TypeLookupTable* TypeLookupTable::Create(const DexFile& dex_file, uint8_t* storage) {
   const uint32_t num_class_defs = dex_file.NumClassDefs();
-  return (num_class_defs == 0 || num_class_defs > std::numeric_limits<uint16_t>::max())
-      ? nullptr
-      : new TypeLookupTable(dex_file);
+  return SupportedSize(num_class_defs)
+      ? new TypeLookupTable(dex_file, storage)
+      : nullptr;
 }
 
 TypeLookupTable* TypeLookupTable::Open(const uint8_t* raw_data, const DexFile& dex_file) {
   return new TypeLookupTable(raw_data, dex_file);
 }
 
-TypeLookupTable::TypeLookupTable(const DexFile& dex_file)
+TypeLookupTable::TypeLookupTable(const DexFile& dex_file, uint8_t* storage)
     : dex_file_(dex_file),
-      mask_(RoundUpToPowerOfTwo(dex_file.NumClassDefs()) - 1),
-      entries_(new Entry[mask_ + 1]),
-      owns_entries_(true) {
+      mask_(CalculateMask(dex_file.NumClassDefs())),
+      entries_(storage != nullptr ? reinterpret_cast<Entry*>(storage) : new Entry[mask_ + 1]),
+      owns_entries_(storage == nullptr) {
+  static_assert(alignof(Entry) == 4u, "Expecting Entry to be 4-byte aligned.");
+  DCHECK_ALIGNED(storage, alignof(Entry));
   std::vector<uint16_t> conflict_class_defs;
   // The first stage. Put elements on their initial positions. If an initial position is already
   // occupied then delay the insertion of the element to the second stage to reduce probing
@@ -93,7 +108,7 @@
 
 TypeLookupTable::TypeLookupTable(const uint8_t* raw_data, const DexFile& dex_file)
     : dex_file_(dex_file),
-      mask_(RoundUpToPowerOfTwo(dex_file.NumClassDefs()) - 1),
+      mask_(CalculateMask(dex_file.NumClassDefs())),
       entries_(reinterpret_cast<Entry*>(const_cast<uint8_t*>(raw_data))),
       owns_entries_(false) {}
 
diff --git a/runtime/type_lookup_table.h b/runtime/type_lookup_table.h
index 3c2295c..d74d01d 100644
--- a/runtime/type_lookup_table.h
+++ b/runtime/type_lookup_table.h
@@ -60,7 +60,7 @@
   }
 
   // Method creates lookup table for dex file
-  static TypeLookupTable* Create(const DexFile& dex_file);
+  static TypeLookupTable* Create(const DexFile& dex_file, uint8_t* storage = nullptr);
 
   // Method opens lookup table from binary data. Lookup table does not owns binary data.
   static TypeLookupTable* Open(const uint8_t* raw_data, const DexFile& dex_file);
@@ -76,6 +76,9 @@
   // Method returns length of binary data for the specified dex file.
   static uint32_t RawDataLength(const DexFile& dex_file);
 
+  // Method returns length of binary data for the specified number of class definitions.
+  static uint32_t RawDataLength(uint32_t num_class_defs);
+
  private:
    /**
     * To find element we need to compare strings.
@@ -109,8 +112,11 @@
     }
   };
 
+  static uint32_t CalculateMask(uint32_t num_class_defs);
+  static bool SupportedSize(uint32_t num_class_defs);
+
   // Construct from a dex file.
-  explicit TypeLookupTable(const DexFile& dex_file);
+  explicit TypeLookupTable(const DexFile& dex_file, uint8_t* storage);
 
   // Construct from a dex file with existing data.
   TypeLookupTable(const uint8_t* raw_data, const DexFile& dex_file);
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 8e9f12b..07f94c0 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1115,7 +1115,7 @@
   BacktraceMap* map = existing_map;
   std::unique_ptr<BacktraceMap> tmp_map;
   if (map == nullptr) {
-    tmp_map.reset(BacktraceMap::Create(tid));
+    tmp_map.reset(BacktraceMap::Create(getpid()));
     map = tmp_map.get();
   }
   std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid, map));
diff --git a/runtime/utils/dex_cache_arrays_layout-inl.h b/runtime/utils/dex_cache_arrays_layout-inl.h
index 90e24b9..f6ee6a2 100644
--- a/runtime/utils/dex_cache_arrays_layout-inl.h
+++ b/runtime/utils/dex_cache_arrays_layout-inl.h
@@ -60,7 +60,9 @@
 }
 
 inline size_t DexCacheArraysLayout::TypesSize(size_t num_elements) const {
-  return ArraySize(sizeof(GcRoot<mirror::Class>), num_elements);
+  // App image patching relies on having enough room for a forwarding pointer in the types array.
+  // See FixupArtMethodArrayVisitor and ClassLinker::AddImageSpace.
+  return std::max(ArraySize(sizeof(GcRoot<mirror::Class>), num_elements), pointer_size_);
 }
 
 inline size_t DexCacheArraysLayout::TypesAlignment() const {
@@ -72,7 +74,8 @@
 }
 
 inline size_t DexCacheArraysLayout::MethodsSize(size_t num_elements) const {
-  return ArraySize(pointer_size_, num_elements);
+  // App image patching relies on having enough room for a forwarding pointer in the methods array.
+  return std::max(ArraySize(pointer_size_, num_elements), pointer_size_);
 }
 
 inline size_t DexCacheArraysLayout::MethodsAlignment() const {
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 1c95648..2890a98 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -608,7 +608,6 @@
 bool MethodVerifier::Verify() {
   // Some older code doesn't correctly mark constructors as such. Test for this case by looking at
   // the name.
-  Runtime* runtime = Runtime::Current();
   const DexFile::MethodId& method_id = dex_file_->GetMethodId(dex_method_idx_);
   const char* method_name = dex_file_->StringDataByIdx(method_id.name_idx_);
   bool instance_constructor_by_name = strcmp("<init>", method_name) == 0;
@@ -678,12 +677,9 @@
       }
       if ((class_def_->GetJavaAccessFlags() & kAccInterface) != 0) {
         // Interface methods must be public and abstract (if default methods are disabled).
-        bool default_methods_supported =
-            runtime->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods);
-        uint32_t kRequired = kAccPublic | (default_methods_supported ? 0 : kAccAbstract);
+        uint32_t kRequired = kAccPublic;
         if ((method_access_flags_ & kRequired) != kRequired) {
-          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interface methods must be public"
-                                            << (default_methods_supported ? "" : " and abstract");
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interface methods must be public";
           return false;
         }
         // In addition to the above, interface methods must not be protected.
@@ -715,19 +711,14 @@
       // default methods enabled we also allow other public, static, non-final methods to have code.
       // Otherwise that is the only type of method allowed.
       if (!(IsConstructor() && IsStatic())) {
-        if (runtime->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods)) {
-          if (IsInstanceConstructor()) {
-            Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interfaces may not have non-static constructor";
-            return false;
-          } else if (method_access_flags_ & kAccFinal) {
-            Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interfaces may not have final methods";
-            return false;
-          } else if (!(method_access_flags_ & kAccPublic)) {
-            Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interfaces may not have non-public members";
-            return false;
-          }
-        } else {
-          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interface methods must be abstract";
+        if (IsInstanceConstructor()) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interfaces may not have non-static constructor";
+          return false;
+        } else if (method_access_flags_ & kAccFinal) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interfaces may not have final methods";
+          return false;
+        } else if (!(method_access_flags_ & kAccPublic)) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interfaces may not have non-public members";
           return false;
         }
       }
@@ -2735,7 +2726,8 @@
                        inst->Opcode() == Instruction::INVOKE_SUPER_RANGE);
       bool is_super = (inst->Opcode() == Instruction::INVOKE_SUPER ||
                        inst->Opcode() == Instruction::INVOKE_SUPER_RANGE);
-      ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_VIRTUAL, is_range, is_super);
+      MethodType type = is_super ? METHOD_SUPER : METHOD_VIRTUAL;
+      ArtMethod* called_method = VerifyInvocationArgs(inst, type, is_range);
       const RegType* return_type = nullptr;
       if (called_method != nullptr) {
         size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
@@ -2768,7 +2760,7 @@
     case Instruction::INVOKE_DIRECT:
     case Instruction::INVOKE_DIRECT_RANGE: {
       bool is_range = (inst->Opcode() == Instruction::INVOKE_DIRECT_RANGE);
-      ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_DIRECT, is_range, false);
+      ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_DIRECT, is_range);
       const char* return_type_descriptor;
       bool is_constructor;
       const RegType* return_type = nullptr;
@@ -2848,7 +2840,7 @@
     case Instruction::INVOKE_STATIC:
     case Instruction::INVOKE_STATIC_RANGE: {
         bool is_range = (inst->Opcode() == Instruction::INVOKE_STATIC_RANGE);
-        ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_STATIC, is_range, false);
+        ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_STATIC, is_range);
         const char* descriptor;
         if (called_method == nullptr) {
           uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
@@ -2870,7 +2862,7 @@
     case Instruction::INVOKE_INTERFACE:
     case Instruction::INVOKE_INTERFACE_RANGE: {
       bool is_range =  (inst->Opcode() == Instruction::INVOKE_INTERFACE_RANGE);
-      ArtMethod* abs_method = VerifyInvocationArgs(inst, METHOD_INTERFACE, is_range, false);
+      ArtMethod* abs_method = VerifyInvocationArgs(inst, METHOD_INTERFACE, is_range);
       if (abs_method != nullptr) {
         mirror::Class* called_interface = abs_method->GetDeclaringClass();
         if (!called_interface->IsInterface() && !called_interface->IsObjectClass()) {
@@ -3639,9 +3631,8 @@
   return *common_super;
 }
 
-// TODO Maybe I should just add a METHOD_SUPER to MethodType?
 ArtMethod* MethodVerifier::ResolveMethodAndCheckAccess(
-    uint32_t dex_method_idx, MethodType method_type, bool is_super) {
+    uint32_t dex_method_idx, MethodType method_type) {
   const DexFile::MethodId& method_id = dex_file_->GetMethodId(dex_method_idx);
   const RegType& klass_type = ResolveClassAndCheckAccess(method_id.class_idx_);
   if (klass_type.IsConflict()) {
@@ -3668,9 +3659,10 @@
       res_method = klass->FindDirectMethod(name, signature, pointer_size);
     } else if (method_type == METHOD_INTERFACE) {
       res_method = klass->FindInterfaceMethod(name, signature, pointer_size);
-    } else if (is_super && klass->IsInterface()) {
+    } else if (method_type == METHOD_SUPER && klass->IsInterface()) {
       res_method = klass->FindInterfaceMethod(name, signature, pointer_size);
     } else {
+      DCHECK(method_type == METHOD_VIRTUAL || method_type == METHOD_SUPER);
       res_method = klass->FindVirtualMethod(name, signature, pointer_size);
     }
     if (res_method != nullptr) {
@@ -3679,7 +3671,9 @@
       // If a virtual or interface method wasn't found with the expected type, look in
       // the direct methods. This can happen when the wrong invoke type is used or when
       // a class has changed, and will be flagged as an error in later checks.
-      if (method_type == METHOD_INTERFACE || method_type == METHOD_VIRTUAL) {
+      if (method_type == METHOD_INTERFACE ||
+          method_type == METHOD_VIRTUAL ||
+          method_type == METHOD_SUPER) {
         res_method = klass->FindDirectMethod(name, signature, pointer_size);
       }
       if (res_method == nullptr) {
@@ -3710,12 +3704,10 @@
   // Note: this check must be after the initializer check, as those are required to fail a class,
   //       while this check implies an IncompatibleClassChangeError.
   if (klass->IsInterface()) {
-    Runtime* runtime = Runtime::Current();
-    const bool default_methods_supported =
-        runtime == nullptr ||
-        runtime->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods);
+    // methods called on interfaces should be invoke-interface, invoke-super, or invoke-static.
     if (method_type != METHOD_INTERFACE &&
-        (!default_methods_supported || method_type != METHOD_STATIC)) {
+        method_type != METHOD_STATIC &&
+        method_type != METHOD_SUPER) {
       Fail(VERIFY_ERROR_CLASS_CHANGE)
           << "non-interface method " << PrettyMethod(dex_method_idx, *dex_file_)
           << " is in an interface class " << PrettyClass(klass);
@@ -3742,7 +3734,7 @@
     return res_method;
   }
   // Check that invoke-virtual and invoke-super are not used on private methods of the same class.
-  if (res_method->IsPrivate() && method_type == METHOD_VIRTUAL) {
+  if (res_method->IsPrivate() && (method_type == METHOD_VIRTUAL || method_type == METHOD_SUPER)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invoke-super/virtual can't be used on private method "
                                       << PrettyMethod(res_method);
     return nullptr;
@@ -3751,7 +3743,9 @@
   // target method.
   if ((method_type == METHOD_DIRECT && (!res_method->IsDirect() || res_method->IsStatic())) ||
       (method_type == METHOD_STATIC && !res_method->IsStatic()) ||
-      ((method_type == METHOD_VIRTUAL || method_type == METHOD_INTERFACE) && res_method->IsDirect())
+      ((method_type == METHOD_SUPER ||
+        method_type == METHOD_VIRTUAL ||
+        method_type == METHOD_INTERFACE) && res_method->IsDirect())
       ) {
     Fail(VERIFY_ERROR_CLASS_CHANGE) << "invoke type (" << method_type << ") does not match method "
                                        " type of " << PrettyMethod(res_method);
@@ -3937,12 +3931,12 @@
 };
 
 ArtMethod* MethodVerifier::VerifyInvocationArgs(
-    const Instruction* inst, MethodType method_type, bool is_range, bool is_super) {
+    const Instruction* inst, MethodType method_type, bool is_range) {
   // Resolve the method. This could be an abstract or concrete method depending on what sort of call
   // we're making.
   const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
 
-  ArtMethod* res_method = ResolveMethodAndCheckAccess(method_idx, method_type, is_super);
+  ArtMethod* res_method = ResolveMethodAndCheckAccess(method_idx, method_type);
   if (res_method == nullptr) {  // error or class is unresolved
     // Check what we can statically.
     if (!have_pending_hard_failure_) {
@@ -3953,8 +3947,7 @@
 
   // If we're using invoke-super(method), make sure that the executing method's class' superclass
   // has a vtable entry for the target method. Or the target is on a interface.
-  if (is_super) {
-    DCHECK(method_type == METHOD_VIRTUAL);
+  if (method_type == METHOD_SUPER) {
     if (res_method->GetDeclaringClass()->IsInterface()) {
       // TODO Fill in this part. Verify what we can...
       if (Runtime::Current()->IsAotCompiler()) {
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index ec0a8f9..a26e0fb 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -57,7 +57,8 @@
   METHOD_UNKNOWN  = 0,
   METHOD_DIRECT,      // <init>, private
   METHOD_STATIC,      // static
-  METHOD_VIRTUAL,     // virtual, super
+  METHOD_VIRTUAL,     // virtual
+  METHOD_SUPER,       // super
   METHOD_INTERFACE    // interface
 };
 std::ostream& operator<<(std::ostream& os, const MethodType& rhs);
@@ -654,7 +655,7 @@
    * the referrer can access the resolved method.
    * Does not throw exceptions.
    */
-  ArtMethod* ResolveMethodAndCheckAccess(uint32_t method_idx, MethodType method_type, bool is_super)
+  ArtMethod* ResolveMethodAndCheckAccess(uint32_t method_idx, MethodType method_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   /*
@@ -679,9 +680,7 @@
    * Returns the resolved method on success, null on failure (with *failure
    * set appropriately).
    */
-  ArtMethod* VerifyInvocationArgs(const Instruction* inst,
-                                          MethodType method_type,
-                                          bool is_range, bool is_super)
+  ArtMethod* VerifyInvocationArgs(const Instruction* inst, MethodType method_type, bool is_range)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Similar checks to the above, but on the proto. Will be used when the method cannot be
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index 8300921..cfa8329 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -129,6 +129,7 @@
 jmethodID WellKnownClasses::org_apache_harmony_dalvik_ddmc_DdmServer_dispatch;
 
 jfieldID WellKnownClasses::dalvik_system_DexFile_cookie;
+jfieldID WellKnownClasses::dalvik_system_DexFile_fileName;
 jfieldID WellKnownClasses::dalvik_system_PathClassLoader_pathList;
 jfieldID WellKnownClasses::dalvik_system_DexPathList_dexElements;
 jfieldID WellKnownClasses::dalvik_system_DexPathList__Element_dexFile;
@@ -333,6 +334,7 @@
        true, "newStringFromStringBuilder", "(Ljava/lang/StringBuilder;)Ljava/lang/String;");
 
   dalvik_system_DexFile_cookie = CacheField(env, dalvik_system_DexFile, false, "mCookie", "Ljava/lang/Object;");
+  dalvik_system_DexFile_fileName = CacheField(env, dalvik_system_DexFile, false, "mFileName", "Ljava/lang/String;");
   dalvik_system_PathClassLoader_pathList = CacheField(env, dalvik_system_PathClassLoader, false, "pathList", "Ldalvik/system/DexPathList;");
   dalvik_system_DexPathList_dexElements = CacheField(env, dalvik_system_DexPathList, false, "dexElements", "[Ldalvik/system/DexPathList$Element;");
   dalvik_system_DexPathList__Element_dexFile = CacheField(env, dalvik_system_DexPathList__Element, false, "dexFile", "Ldalvik/system/DexFile;");
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index 55158a7..482ff0a 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -140,6 +140,7 @@
   static jmethodID org_apache_harmony_dalvik_ddmc_DdmServer_dispatch;
 
   static jfieldID dalvik_system_DexFile_cookie;
+  static jfieldID dalvik_system_DexFile_fileName;
   static jfieldID dalvik_system_DexPathList_dexElements;
   static jfieldID dalvik_system_DexPathList__Element_dexFile;
   static jfieldID dalvik_system_PathClassLoader_pathList;
diff --git a/runtime/zip_archive.cc b/runtime/zip_archive.cc
index 9daaf8e..d96fb42 100644
--- a/runtime/zip_archive.cc
+++ b/runtime/zip_archive.cc
@@ -133,4 +133,8 @@
   return new ZipEntry(handle_, zip_entry.release());
 }
 
+ZipArchive::~ZipArchive() {
+  CloseArchive(handle_);
+}
+
 }  // namespace art
diff --git a/runtime/zip_archive.h b/runtime/zip_archive.h
index 717eb8c..42bf55c 100644
--- a/runtime/zip_archive.h
+++ b/runtime/zip_archive.h
@@ -63,9 +63,7 @@
 
   ZipEntry* Find(const char* name, std::string* error_msg) const;
 
-  ~ZipArchive() {
-    CloseArchive(handle_);
-  }
+  ~ZipArchive();
 
  private:
   explicit ZipArchive(ZipArchiveHandle handle) : handle_(handle) {}
diff --git a/test/004-StackWalk/src/Main.java b/test/004-StackWalk/src/Main.java
index 883ce2c..072b1d0 100644
--- a/test/004-StackWalk/src/Main.java
+++ b/test/004-StackWalk/src/Main.java
@@ -2,14 +2,14 @@
   public Main() {
   }
 
+  boolean doThrow = false;
+
   int $noinline$f() throws Exception {
     g(1);
     g(2);
 
-    // This loop currently defeats inlining of `f`.
-    for (int i = 0; i < 10; i++) {
-      Thread.sleep(0);
-    }
+    // This currently defeats inlining of `f`.
+    if (doThrow) { throw new Error(); }
     return 0;
   }
 
diff --git a/test/004-UnsafeTest/src/Main.java b/test/004-UnsafeTest/src/Main.java
index 5b22e88..a9a7a05 100644
--- a/test/004-UnsafeTest/src/Main.java
+++ b/test/004-UnsafeTest/src/Main.java
@@ -110,23 +110,35 @@
     check(unsafe.getObject(t, objectOffset), objectValue, "Unsafe.getObject(Object, long)");
 
     if (unsafe.compareAndSwapInt(t, intOffset, 0, 1)) {
-        System.out.println("Unexpectedly succeeding compareAndSwap...");
+      System.out.println("Unexpectedly succeeding compareAndSwapInt(t, intOffset, 0, 1)");
     }
     if (!unsafe.compareAndSwapInt(t, intOffset, intValue, 0)) {
-        System.out.println("Unexpectedly not succeeding compareAndSwap...");
+      System.out.println(
+          "Unexpectedly not succeeding compareAndSwapInt(t, intOffset, intValue, 0)");
     }
     if (!unsafe.compareAndSwapInt(t, intOffset, 0, 1)) {
-        System.out.println("Unexpectedly not succeeding compareAndSwap...");
+      System.out.println("Unexpectedly not succeeding compareAndSwapInt(t, intOffset, 0, 1)");
+    }
+    // Exercise sun.misc.Unsafe.compareAndSwapInt using the same
+    // integer (1) for the `expectedValue` and `newValue` arguments.
+    if (!unsafe.compareAndSwapInt(t, intOffset, 1, 1)) {
+      System.out.println("Unexpectedly not succeeding compareAndSwapInt(t, intOffset, 1, 1)");
     }
 
     if (unsafe.compareAndSwapLong(t, longOffset, 0, 1)) {
-        System.out.println("Unexpectedly succeeding compareAndSwapLong...");
+      System.out.println("Unexpectedly succeeding compareAndSwapLong(t, longOffset, 0, 1)");
     }
     if (!unsafe.compareAndSwapLong(t, longOffset, longValue, 0)) {
-        System.out.println("Unexpectedly not succeeding compareAndSwapLong...");
+      System.out.println(
+          "Unexpectedly not succeeding compareAndSwapLong(t, longOffset, longValue, 0)");
     }
     if (!unsafe.compareAndSwapLong(t, longOffset, 0, 1)) {
-        System.out.println("Unexpectedly not succeeding compareAndSwapLong...");
+      System.out.println("Unexpectedly not succeeding compareAndSwapLong(t, longOffset, 0, 1)");
+    }
+    // Exercise sun.misc.Unsafe.compareAndSwapLong using the same
+    // integer (1) for the `expectedValue` and `newValue` arguments.
+    if (!unsafe.compareAndSwapLong(t, longOffset, 1, 1)) {
+      System.out.println("Unexpectedly not succeeding compareAndSwapLong(t, longOffset, 1, 1)");
     }
 
     // We do not use `null` as argument to sun.misc.Unsafe.compareAndSwapObject
@@ -135,31 +147,41 @@
     // references).  This way, when heap poisoning is enabled, we can
     // better exercise its implementation within that method.
     if (unsafe.compareAndSwapObject(t, objectOffset, new Object(), new Object())) {
-        System.out.println("Unexpectedly succeeding compareAndSwapObject...");
+      System.out.println("Unexpectedly succeeding " +
+          "compareAndSwapObject(t, objectOffset, new Object(), new Object())");
     }
     Object objectValue2 = new Object();
     if (!unsafe.compareAndSwapObject(t, objectOffset, objectValue, objectValue2)) {
-        System.out.println("Unexpectedly not succeeding compareAndSwapObject...");
+      System.out.println("Unexpectedly not succeeding " +
+          "compareAndSwapObject(t, objectOffset, objectValue, objectValue2)");
     }
     Object objectValue3 = new Object();
     if (!unsafe.compareAndSwapObject(t, objectOffset, objectValue2, objectValue3)) {
-        System.out.println("Unexpectedly not succeeding compareAndSwapObject...");
+      System.out.println("Unexpectedly not succeeding " +
+          "compareAndSwapObject(t, objectOffset, objectValue2, objectValue3)");
     }
-
+    // Exercise sun.misc.Unsafe.compareAndSwapObject using the same
+    // object (`objectValue3`) for the `expectedValue` and `newValue` arguments.
+    if (!unsafe.compareAndSwapObject(t, objectOffset, objectValue3, objectValue3)) {
+      System.out.println("Unexpectedly not succeeding " +
+          "compareAndSwapObject(t, objectOffset, objectValue3, objectValue3)");
+    }
     // Exercise sun.misc.Unsafe.compareAndSwapObject using the same
     // object (`t`) for the `obj` and `newValue` arguments.
     if (!unsafe.compareAndSwapObject(t, objectOffset, objectValue3, t)) {
-        System.out.println("Unexpectedly not succeeding compareAndSwapObject...");
+      System.out.println(
+          "Unexpectedly not succeeding compareAndSwapObject(t, objectOffset, objectValue3, t)");
     }
     // Exercise sun.misc.Unsafe.compareAndSwapObject using the same
     // object (`t`) for the `obj`, `expectedValue` and `newValue` arguments.
     if (!unsafe.compareAndSwapObject(t, objectOffset, t, t)) {
-        System.out.println("Unexpectedly not succeeding compareAndSwapObject...");
+      System.out.println("Unexpectedly not succeeding compareAndSwapObject(t, objectOffset, t, t)");
     }
     // Exercise sun.misc.Unsafe.compareAndSwapObject using the same
     // object (`t`) for the `obj` and `expectedValue` arguments.
     if (!unsafe.compareAndSwapObject(t, objectOffset, t, new Object())) {
-        System.out.println("Unexpectedly not succeeding compareAndSwapObject...");
+      System.out.println(
+          "Unexpectedly not succeeding compareAndSwapObject(t, objectOffset, t, new Object())");
     }
   }
 
diff --git a/test/048-reflect-v8/build b/test/048-reflect-v8/build
new file mode 100644
index 0000000..4ea1838
--- /dev/null
+++ b/test/048-reflect-v8/build
@@ -0,0 +1,28 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Make us exit on a failure.
+set -e
+
+# Hard-wired use of experimental jack.
+# TODO: fix this temporary work-around for lambdas, see b/19467889
+export USE_JACK=true
+export JACK_SERVER=false
+export JACK_REPOSITORY="${ANDROID_BUILD_TOP}/prebuilts/sdk/tools/jacks"
+# e.g. /foo/bar/jack-3.10.ALPHA.jar -> 3.10.ALPHA
+export JACK_VERSION="$(find "$JACK_REPOSITORY" -name '*ALPHA*' | sed 's/.*jack-//g' | sed 's/[.]jar//g')"
+
+./default-build "$@" --experimental default-methods
diff --git a/test/048-reflect-v8/expected.txt b/test/048-reflect-v8/expected.txt
new file mode 100644
index 0000000..3109ecc
--- /dev/null
+++ b/test/048-reflect-v8/expected.txt
@@ -0,0 +1,95 @@
+==============================
+Are These Methods Default:
+==============================
+IsDefaultTest$DefaultInterface is default = yes
+IsDefaultTest$RegularInterface is default = no
+IsDefaultTest$ImplementsWithDefault is default = yes
+IsDefaultTest$ImplementsWithRegular is default = no
+==============================
+Class annotations by type:
+==============================
+Annotations by type, defined by class SingleUser with annotation Calendar: @Calendar(dayOfMonth=unspecified_month, dayOfWeek=single, hour=23)
+Annotations by type, defined by class SingleUser with annotation Calendars: <empty>
+Annotations by type, defined by class User with annotation Calendar: @Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)
+Annotations by type, defined by class User with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+Annotations by type, defined by class User2 with annotation Calendar: @Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)
+Annotations by type, defined by class User2 with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)])
+Annotations by type, defined by class UserComplex with annotation Calendar: @Calendar(dayOfMonth=afirst, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)
+Annotations by type, defined by class UserComplex with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)])
+Annotations by type, defined by class UserSub with annotation Calendar: @Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)
+Annotations by type, defined by class UserSub with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+Annotations by type, defined by class UserSub2 with annotation Calendar: @Calendar(dayOfMonth=sub2, dayOfWeek=unspecified_week, hour=6)
+Annotations by type, defined by class UserSub2 with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+-----------------------------
+-----------------------------
+==============================
+Class declared annotation:
+==============================
+Declared annotations by class class SingleUser, annotation interface Calendar: @Calendar(dayOfMonth=unspecified_month, dayOfWeek=single, hour=23)
+Declared annotations by class class SingleUser, annotation interface Calendars: <null>
+Declared annotations by class class User, annotation interface Calendar: <null>
+Declared annotations by class class User, annotation interface Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+Declared annotations by class class UserComplex, annotation interface Calendar: @Calendar(dayOfMonth=afirst, dayOfWeek=unspecified_week, hour=6)
+Declared annotations by class class UserComplex, annotation interface Calendars: @Calendars(value=[@Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)])
+Declared annotations by class class UserSub, annotation interface Calendar: <null>
+Declared annotations by class class UserSub, annotation interface Calendars: <null>
+Declared annotations by class class UserSub2, annotation interface Calendar: @Calendar(dayOfMonth=sub2, dayOfWeek=unspecified_week, hour=6)
+Declared annotations by class class UserSub2, annotation interface Calendars: <null>
+-----------------------------
+-----------------------------
+==============================
+Declared class annotations by type:
+==============================
+Declared annnotations by type, defined by class SingleUser with annotation Calendar: @Calendar(dayOfMonth=unspecified_month, dayOfWeek=single, hour=23)
+Declared annnotations by type, defined by class SingleUser with annotation Calendars: <empty>
+Declared annnotations by type, defined by class User with annotation Calendar: @Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)
+Declared annnotations by type, defined by class User with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+Declared annnotations by type, defined by class User2 with annotation Calendar: @Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)
+Declared annnotations by type, defined by class User2 with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)])
+Declared annnotations by type, defined by class UserComplex with annotation Calendar: @Calendar(dayOfMonth=afirst, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)
+Declared annnotations by type, defined by class UserComplex with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)])
+Declared annnotations by type, defined by class UserSub with annotation Calendar: <empty>
+Declared annnotations by type, defined by class UserSub with annotation Calendars: <empty>
+Declared annnotations by type, defined by class UserSub2 with annotation Calendar: @Calendar(dayOfMonth=sub2, dayOfWeek=unspecified_week, hour=6)
+Declared annnotations by type, defined by class UserSub2 with annotation Calendars: <empty>
+-----------------------------
+-----------------------------
+==============================
+Method annotations by type:
+==============================
+Annotations by type, defined by method singleUser with annotation Calendar: @Calendar(dayOfMonth=unspecified_month, dayOfWeek=single, hour=23)
+Annotations by type, defined by method singleUser with annotation Calendars: <empty>
+Annotations by type, defined by method user with annotation Calendar: @Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)
+Annotations by type, defined by method user with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+Annotations by type, defined by method user2 with annotation Calendar: @Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)
+Annotations by type, defined by method user2 with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)])
+Annotations by type, defined by method userComplex with annotation Calendar: @Calendar(dayOfMonth=afirst, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)
+Annotations by type, defined by method userComplex with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)])
+-----------------------------
+-----------------------------
+==============================
+Declared method annotations:
+==============================
+Annotations declared by method singleUser with annotation Calendar: @Calendar(dayOfMonth=unspecified_month, dayOfWeek=single, hour=23)
+Annotations declared by method singleUser with annotation Calendars: <null>
+Annotations declared by method user with annotation Calendar: <null>
+Annotations declared by method user with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+Annotations declared by method user2 with annotation Calendar: <null>
+Annotations declared by method user2 with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)])
+Annotations declared by method userComplex with annotation Calendar: @Calendar(dayOfMonth=afirst, dayOfWeek=unspecified_week, hour=6)
+Annotations declared by method userComplex with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)])
+-----------------------------
+-----------------------------
+==============================
+Declared method annotations by type:
+==============================
+Annotations by type, defined by method singleUser with annotation Calendar: @Calendar(dayOfMonth=unspecified_month, dayOfWeek=single, hour=23)
+Annotations by type, defined by method singleUser with annotation Calendars: <empty>
+Annotations by type, defined by method user with annotation Calendar: @Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)
+Annotations by type, defined by method user with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+Annotations by type, defined by method user2 with annotation Calendar: @Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)
+Annotations by type, defined by method user2 with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)])
+Annotations by type, defined by method userComplex with annotation Calendar: @Calendar(dayOfMonth=afirst, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)
+Annotations by type, defined by method userComplex with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)])
+-----------------------------
+-----------------------------
diff --git a/test/048-reflect-v8/info.txt b/test/048-reflect-v8/info.txt
new file mode 100644
index 0000000..a336d30
--- /dev/null
+++ b/test/048-reflect-v8/info.txt
@@ -0,0 +1 @@
+Test reflection for 1.8 APIs
diff --git a/test/048-reflect-v8/src/AnnotationTest.java b/test/048-reflect-v8/src/AnnotationTest.java
new file mode 100644
index 0000000..75e6845
--- /dev/null
+++ b/test/048-reflect-v8/src/AnnotationTest.java
@@ -0,0 +1,291 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.annotation.Annotation;
+import java.lang.reflect.Method;
+
+public class AnnotationTest extends AnnotationTestHelpers {
+  public static void testAnnotationsByType() {
+    System.out.println("==============================");
+    System.out.println("Class annotations by type:");
+    System.out.println("==============================");
+
+    // Print associated annotations:
+    // * A is directly present or repeatably present on an element E;
+    // * No annotation of A is directly/repeatably present on an element
+    //   AND E is a class AND A's type is inheritable, AND A is associated with its superclass.
+    // (Looks through subtypes recursively only if there's 0 result at each level,
+    // and the annotation is @Inheritable).
+    printAnnotationsByType(Calendar.class, SingleUser.class);
+    printAnnotationsByType(Calendars.class, SingleUser.class);
+
+    printAnnotationsByType(Calendar.class, User.class);
+    printAnnotationsByType(Calendars.class, User.class);
+
+    printAnnotationsByType(Calendar.class, User2.class);  // Enforce ordering 'z,x,y'
+    printAnnotationsByType(Calendars.class, User2.class);
+
+    // NOTE:
+    //    Order of outer-most annotations Calendars[C,C],S vs C,Calendars[C,C] is unspecified.
+    //    In particular it's the order of #getDeclaredAnnotations which is completely unmentioned.
+    //    The only requirement for #getAnnotationsByType is to have same ordering as
+    //    #getDeclaredAnnotations.
+    //    (Calendars[] itself has to maintain value() order).
+    printAnnotationsByType(Calendar.class, UserComplex.class);  // Cs(C,C),C collapses into C,C,C.
+    printAnnotationsByType(Calendars.class, UserComplex.class);
+
+    printAnnotationsByType(Calendar.class, UserSub.class);
+    printAnnotationsByType(Calendars.class, UserSub.class);
+
+    printAnnotationsByType(Calendar.class, UserSub2.class);
+    // The directly present "Calendar" annotation masks all the repeatably present
+    // "Calendar" annotations coming from User.
+    printAnnotationsByType(Calendars.class, UserSub2.class);
+    // Edge case: UserSub2 doesn't directly have a Calendars annotation,
+    // so it doesn't mask the "User" Calendars annotation.
+
+    System.out.println("-----------------------------");
+    System.out.println("-----------------------------");
+
+  }
+
+  public static void testDeclaredAnnotation() {
+    System.out.println("==============================");
+    System.out.println("Class declared annotation:");
+    System.out.println("==============================");
+
+    // Print directly present annotations:
+    //
+    // The element E has an annotation_item for it (accessible through an
+    // annotations_directory_item) corresponding to an annotation A,
+    // and A's type_idx must match that on the encoded_annotation (from the annotation_item).
+    // (Does not look through the subtypes recursively)
+    printDeclaredAnnotation(SingleUser.class, Calendar.class);
+    printDeclaredAnnotation(SingleUser.class, Calendars.class);
+
+    printDeclaredAnnotation(User.class, Calendar.class);
+    printDeclaredAnnotation(User.class, Calendars.class);
+
+    printDeclaredAnnotation(UserComplex.class, Calendar.class);
+    printDeclaredAnnotation(UserComplex.class, Calendars.class);
+
+    printDeclaredAnnotation(UserSub.class, Calendar.class);
+    printDeclaredAnnotation(UserSub.class, Calendars.class);
+
+    printDeclaredAnnotation(UserSub2.class, Calendar.class);
+    printDeclaredAnnotation(UserSub2.class, Calendars.class);
+
+    System.out.println("-----------------------------");
+    System.out.println("-----------------------------");
+  }
+
+  public static void testDeclaredAnnotationsByType() {
+    System.out.println("==============================");
+    System.out.println("Declared class annotations by type:");
+    System.out.println("==============================");
+
+    // A is directly present or repeatably present on an element E;
+    // -- (does not do any recursion for classes regardless of @Inherited)
+    printDeclaredAnnotationsByType(Calendar.class, SingleUser.class);
+    printDeclaredAnnotationsByType(Calendars.class, SingleUser.class);
+
+    printDeclaredAnnotationsByType(Calendar.class, User.class);
+    printDeclaredAnnotationsByType(Calendars.class, User.class);
+
+    printDeclaredAnnotationsByType(Calendar.class, User2.class);  // Enforce ordering 'z,x,y'
+    printDeclaredAnnotationsByType(Calendars.class, User2.class);
+
+    printDeclaredAnnotationsByType(Calendar.class, UserComplex.class);
+    printDeclaredAnnotationsByType(Calendars.class, UserComplex.class);
+
+    printDeclaredAnnotationsByType(Calendar.class, UserSub.class);
+    printDeclaredAnnotationsByType(Calendars.class, UserSub.class);
+
+    printDeclaredAnnotationsByType(Calendar.class, UserSub2.class);
+    // The directly present "Calendar" annotation masks all the repeatably present "Calendar"
+    // annotations coming from User.
+    printDeclaredAnnotationsByType(Calendars.class, UserSub2.class);
+    // Edge case: UserSub2 doesn't directly have a Calendars annotation,
+    // so it doesn't mask the "User" Calendars annotation.
+
+    System.out.println("-----------------------------");
+    System.out.println("-----------------------------");
+  }
+
+  // Print the annotation "annotationClass" that is associated with an element denoted by
+  // "annotationUseClass."
+  private static <A extends Annotation> void printAnnotationsByType(Class<A> annotationClass,
+      Class<?> annotationUseClass) {
+    A[] annotationsByType = annotationUseClass.getAnnotationsByType(annotationClass);
+
+    String msg = "Annotations by type, defined by class "
+        + annotationUseClass.getName() + " with annotation " + annotationClass.getName() + ": "
+        + asString(annotationsByType);
+
+
+    System.out.println(msg);
+  }
+
+  private static <A extends Annotation> void printDeclaredAnnotation(Class<?> annotationUseClass,
+      Class<A> annotationDefClass) {
+    A anno = annotationUseClass.getDeclaredAnnotation(annotationDefClass);
+
+    String msg = asString(anno);
+
+    System.out.println("Declared annotations by class " + annotationUseClass
+        + ", annotation " + annotationDefClass + ": " + msg);
+  }
+
+  // Print the annotation "annotationClass" that is directly/indirectly present with an element
+  // denoted by "annotationUseClass."
+  private static <A extends Annotation> void printDeclaredAnnotationsByType(
+      Class<A> annotationClass, Class<?> annotationUseClass) {
+    A[] annotationsByType = annotationUseClass.getDeclaredAnnotationsByType(annotationClass);
+
+    String msg = "Declared annnotations by type, defined by class " + annotationUseClass.getName()
+        + " with annotation " + annotationClass.getName() + ": "
+        + asString(annotationsByType);
+
+    System.out.println(msg);
+  }
+
+  public static void testMethodAnnotationsByType() {
+    System.out.println("==============================");
+    System.out.println("Method annotations by type:");
+    System.out.println("==============================");
+
+    // Print associated annotations:
+    // * A is directly present or repeatably present on an element E;
+    // * No annotation of A is directly/repeatably present on an element AND E is a class
+    //   AND A's type is inheritable, AND A is associated with its superclass.
+    // (Looks through subtypes recursively only if there's 0 result at each level,
+    // and the annotation is @Inheritable).
+    printMethodAnnotationsByType(Calendar.class, "singleUser", AnnotationTestFixture.class);
+    printMethodAnnotationsByType(Calendars.class, "singleUser", AnnotationTestFixture.class);
+
+    printMethodAnnotationsByType(Calendar.class, "user", AnnotationTestFixture.class);
+    printMethodAnnotationsByType(Calendars.class, "user", AnnotationTestFixture.class);
+
+    printMethodAnnotationsByType(Calendar.class, "user2", AnnotationTestFixture.class);
+    printMethodAnnotationsByType(Calendars.class, "user2", AnnotationTestFixture.class);
+
+    printMethodAnnotationsByType(Calendar.class, "userComplex", AnnotationTestFixture.class);
+    printMethodAnnotationsByType(Calendars.class, "userComplex", AnnotationTestFixture.class);
+
+    System.out.println("-----------------------------");
+    System.out.println("-----------------------------");
+  }
+
+  // Print the annotation "annotationClass" that is associated with an element denoted by
+  // "annotationUseClass" method methodName.
+  private static <A extends Annotation> void printMethodAnnotationsByType(Class<A> annotationClass,
+      String methodName, Class<?> annotationUseClass) {
+    Method m = null;
+    try {
+      m = annotationUseClass.getDeclaredMethod(methodName);
+    } catch (Throwable t) {
+      throw new AssertionError(t);
+    }
+    A[] annotationsByType = m.getAnnotationsByType(annotationClass);
+
+    String msg = "Annotations by type, defined by method " + m.getName() + " with annotation " +
+      annotationClass.getName() + ": " +
+      asString(annotationsByType);
+
+    System.out.println(msg);
+  }
+
+  public static void testMethodDeclaredAnnotations() {
+    System.out.println("==============================");
+    System.out.println("Declared method annotations:");
+    System.out.println("==============================");
+
+    printMethodDeclaredAnnotation(Calendar.class, "singleUser", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotation(Calendars.class, "singleUser", AnnotationTestFixture.class);
+
+    printMethodDeclaredAnnotation(Calendar.class, "user", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotation(Calendars.class, "user", AnnotationTestFixture.class);
+
+    printMethodDeclaredAnnotation(Calendar.class, "user2", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotation(Calendars.class, "user2", AnnotationTestFixture.class);
+
+    printMethodDeclaredAnnotation(Calendar.class, "userComplex", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotation(Calendars.class, "userComplex", AnnotationTestFixture.class);
+
+    System.out.println("-----------------------------");
+    System.out.println("-----------------------------");
+  }
+
+  // Print the annotation "annotationClass" that is associated with an element denoted by
+  // methodName in annotationUseClass.
+  private static <A extends Annotation> void printMethodDeclaredAnnotation(Class<A> annotationClass,
+      String methodName, Class<?> annotationUseClass) {
+    Method m = null;
+    try {
+      m = annotationUseClass.getDeclaredMethod(methodName);
+    } catch (Throwable t) {
+      throw new AssertionError(t);
+    }
+    Annotation annotationsByType = m.getDeclaredAnnotation(annotationClass);
+
+    String msg = "Annotations declared by method " + m.getName() + " with annotation "
+        + annotationClass.getName() + ": "
+        + asString(annotationsByType);
+
+    System.out.println(msg);
+  }
+
+  public static void testMethodDeclaredAnnotationsByType() {
+    System.out.println("==============================");
+    System.out.println("Declared method annotations by type:");
+    System.out.println("==============================");
+
+    printMethodDeclaredAnnotationByType(Calendar.class, "singleUser", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotationByType(Calendars.class, "singleUser", AnnotationTestFixture.class);
+
+    printMethodDeclaredAnnotationByType(Calendar.class, "user", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotationByType(Calendars.class, "user", AnnotationTestFixture.class);
+
+    printMethodDeclaredAnnotationByType(Calendar.class, "user2", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotationByType(Calendars.class, "user2", AnnotationTestFixture.class);
+
+    printMethodDeclaredAnnotationByType(Calendar.class, "userComplex", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotationByType(Calendars.class, "userComplex",
+        AnnotationTestFixture.class);
+
+    System.out.println("-----------------------------");
+    System.out.println("-----------------------------");
+  }
+
+  // Print the annotation "annotationClass" that is associated with an element denoted by
+  // methodName in annotationUseClass.
+  private static <A extends Annotation> void printMethodDeclaredAnnotationByType(
+      Class<A> annotationClass, String methodName, Class<?> annotationUseClass) {
+    Method m = null;
+    try {
+      m = annotationUseClass.getDeclaredMethod(methodName);
+    } catch (Throwable t) {
+      throw new AssertionError(t);
+    }
+    A[] annotationsByType = m.getDeclaredAnnotationsByType(annotationClass);
+
+    String msg = "Annotations by type, defined by method " + m.getName() + " with annotation "
+        + annotationClass.getName() + ": "
+        + asString(annotationsByType);
+
+    System.out.println(msg);
+  }
+}
diff --git a/test/048-reflect-v8/src/AnnotationTestFixture.java b/test/048-reflect-v8/src/AnnotationTestFixture.java
new file mode 100644
index 0000000..248dfac
--- /dev/null
+++ b/test/048-reflect-v8/src/AnnotationTestFixture.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class AnnotationTestFixture {
+
+  @Calendar(dayOfWeek="single", hour=23)
+  public static void singleUser() {
+
+  }
+  @Calendars ({
+    @Calendar(dayOfMonth="last"),
+    @Calendar(dayOfWeek="Fri", hour=23)
+  })
+  public static void user() {
+
+  }
+
+  @Calendars ({
+    @Calendar(dayOfMonth="z"),
+    @Calendar(dayOfMonth="x"),
+    @Calendar(dayOfMonth="y")
+  })
+  public static void user2() {
+
+  }
+
+  @Calendar(dayOfMonth="afirst")
+  @Calendars ({
+    @Calendar(dayOfMonth="zsecond"),
+    @Calendar(dayOfMonth="athird", hour=23)
+  })
+  public static void userComplex() {
+
+  }
+}
diff --git a/test/048-reflect-v8/src/AnnotationTestHelpers.java b/test/048-reflect-v8/src/AnnotationTestHelpers.java
new file mode 100644
index 0000000..6b5bea2
--- /dev/null
+++ b/test/048-reflect-v8/src/AnnotationTestHelpers.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.annotation.Annotation;
+
+public class AnnotationTestHelpers {
+  // Provide custom print function that print a deterministic output.
+  // Note that Annotation#toString has unspecified order: it prints out the
+  // fields, which is why we can't rely on it.
+
+  public static String asString(Annotation anno) {
+    if (anno instanceof Calendar) {
+      return asString((Calendar)anno);
+    } else if (anno instanceof Calendars) {
+      return asString((Calendars)anno);
+    } else {
+      if (anno == null) {
+        return "<null>";
+      }
+      // Fall-back, usually would only go here in a test failure.
+      return anno.toString();
+    }
+  }
+
+  public static String asString(Annotation[] annos) {
+    String msg = "";
+
+    if (annos == null) {
+      msg += "<null>";
+    } else if (annos.length == 0) {
+      msg += "<empty>";
+    } else {
+      for (int i = 0; i < annos.length; ++i) {
+        msg += asString(annos[i]);
+
+        if (i != annos.length - 1) {
+          msg += ", ";
+        }
+      }
+    }
+
+    return msg;
+  }
+
+  public static String asString(Calendar calendar) {
+    if (calendar == null) {
+      return "<null>";
+    }
+
+    return "@Calendar(dayOfMonth=" + calendar.dayOfMonth() + ", dayOfWeek=" +
+      calendar.dayOfWeek() + ", hour=" + calendar.hour() + ")";
+  }
+
+  public static String asString(Calendars calendars) {
+    if (calendars == null) {
+      return "<null>";
+    }
+
+    String s = "@Calendars(value=[";
+
+    Calendar[] allValues = calendars.value();
+    for (int i = 0; i < allValues.length; ++i) {
+      s += asString(allValues[i]);
+      if (i != allValues.length - 1) {
+        s += ", ";
+      }
+    }
+
+    s += "])";
+
+    return s;
+  }
+}
diff --git a/test/048-reflect-v8/src/Calendar.java b/test/048-reflect-v8/src/Calendar.java
new file mode 100644
index 0000000..4a16573
--- /dev/null
+++ b/test/048-reflect-v8/src/Calendar.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.annotation.Inherited;
+import java.lang.annotation.Repeatable;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+
+// This is a plain old non-1.8 annotation. At runtime we can see that it has a
+// "Repeatable" annotation if we query with getDeclaredAnnotation(Repeatable.class)
+@Retention(RetentionPolicy.RUNTIME)
+@Repeatable(Calendars.class)
+@Inherited  // note: container must also be @Inherited by JLS.
+public @interface Calendar {
+    String dayOfMonth() default "unspecified_month";
+    String dayOfWeek() default "unspecified_week";
+    int hour() default 6;
+}
+
diff --git a/test/048-reflect-v8/src/Calendars.java b/test/048-reflect-v8/src/Calendars.java
new file mode 100644
index 0000000..caeda52
--- /dev/null
+++ b/test/048-reflect-v8/src/Calendars.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.annotation.Inherited;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+
+// Plain old annotation, there's nothing 1.8 specific about it.
+@Retention(RetentionPolicy.RUNTIME)
+@Inherited  // note: elements must also be @Inherited by JLS.
+public @interface Calendars {
+  Calendar[] value();
+}
diff --git a/test/048-reflect-v8/src/IFaceA.java b/test/048-reflect-v8/src/IFaceA.java
new file mode 100644
index 0000000..9b1f610
--- /dev/null
+++ b/test/048-reflect-v8/src/IFaceA.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Stored as a complex annotation Calendars(Calendar,Calendar)
+// in the binary.
+@Calendars ({
+  @Calendar(dayOfMonth="if_a_first"),
+  @Calendar(dayOfMonth="if_b_last")
+})
+public interface IFaceA {
+}
diff --git a/test/048-reflect-v8/src/IFaceSimple.java b/test/048-reflect-v8/src/IFaceSimple.java
new file mode 100644
index 0000000..93cf610
--- /dev/null
+++ b/test/048-reflect-v8/src/IFaceSimple.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Simple annotation, no container.
+@Calendar(dayOfMonth="if_simple_first")
+public interface IFaceSimple {
+
+}
diff --git a/test/048-reflect-v8/src/IsDefaultTest.java b/test/048-reflect-v8/src/IsDefaultTest.java
new file mode 100644
index 0000000..177dcf1
--- /dev/null
+++ b/test/048-reflect-v8/src/IsDefaultTest.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class IsDefaultTest {
+  interface DefaultInterface {
+    default void sayHi() {
+      System.out.println("hi default");
+    }
+  }
+
+  interface RegularInterface {
+    void sayHi();
+  }
+
+  class ImplementsWithDefault implements DefaultInterface {}
+  class ImplementsWithRegular implements RegularInterface {
+    public void sayHi() {
+      System.out.println("hello specific");
+    }
+  }
+
+  private static void printIsDefault(Class<?> klass) {
+    Method m;
+    try {
+      m = klass.getMethod("sayHi");
+    } catch (Throwable t) {
+      System.out.println(t);
+      return;
+    }
+
+    boolean isDefault = m.isDefault();
+    System.out.println(klass.getName() + " is default = " + (isDefault ? "yes" : "no"));
+  }
+
+  public static void test() {
+    System.out.println("==============================");
+    System.out.println("Are These Methods Default:");
+    System.out.println("==============================");
+
+    printIsDefault(DefaultInterface.class);
+    printIsDefault(RegularInterface.class);
+    printIsDefault(ImplementsWithDefault.class);
+    printIsDefault(ImplementsWithRegular.class);
+  }
+}
diff --git a/test/048-reflect-v8/src/Main.java b/test/048-reflect-v8/src/Main.java
new file mode 100644
index 0000000..f2b8287
--- /dev/null
+++ b/test/048-reflect-v8/src/Main.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    IsDefaultTest.test();
+    AnnotationTest.testAnnotationsByType();
+    AnnotationTest.testDeclaredAnnotation();
+    AnnotationTest.testDeclaredAnnotationsByType();
+    AnnotationTest.testMethodAnnotationsByType();
+    AnnotationTest.testMethodDeclaredAnnotations();
+    AnnotationTest.testMethodDeclaredAnnotationsByType();
+  }
+}
diff --git a/test/048-reflect-v8/src/SingleUser.java b/test/048-reflect-v8/src/SingleUser.java
new file mode 100644
index 0000000..0f9c430
--- /dev/null
+++ b/test/048-reflect-v8/src/SingleUser.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Stored as a single "Calendar" annotation in the binary.
+@Calendar(dayOfWeek="single", hour=23)
+public class SingleUser {
+
+}
diff --git a/test/048-reflect-v8/src/User.java b/test/048-reflect-v8/src/User.java
new file mode 100644
index 0000000..003ceeb
--- /dev/null
+++ b/test/048-reflect-v8/src/User.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Stored as a complex annotation Calendars(Calendar,Calendar)
+// in the binary.
+//
+/* FIXME: Use this code instead, when Jack supports repeatable annotations properly.
+ *
+ * @Calendar(dayOfMonth="last")
+ * @Calendar(dayOfWeek="Fri", hour=23)
+ */
+@Calendars ({
+  @Calendar(dayOfMonth="last"),
+  @Calendar(dayOfWeek="Fri", hour=23)
+})
+public class User {
+
+}
diff --git a/test/048-reflect-v8/src/User2.java b/test/048-reflect-v8/src/User2.java
new file mode 100644
index 0000000..1a6049f
--- /dev/null
+++ b/test/048-reflect-v8/src/User2.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Stored as a complex annotation Calendars(Calendar,Calendar,Calendar)
+// in the binary.
+// (Check for order, should be z,x,y)
+@Calendars ({
+  @Calendar(dayOfMonth="z"),
+  @Calendar(dayOfMonth="x"),
+  @Calendar(dayOfMonth="y")
+})
+public class User2 {
+
+}
diff --git a/test/048-reflect-v8/src/UserComplex.java b/test/048-reflect-v8/src/UserComplex.java
new file mode 100644
index 0000000..e262349
--- /dev/null
+++ b/test/048-reflect-v8/src/UserComplex.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Stored as a complex annotation Calendars(Calendar,Calendar)
+// followed by a Calendar in the binary.
+// In other words { Calendars([C,C]), C }
+//
+// Note that trying to do {C,Calendars,C} or similar
+// is illegal by the JLS.
+@Calendar(dayOfMonth="afirst")
+@Calendars ({
+  @Calendar(dayOfMonth="zsecond"),
+  @Calendar(dayOfMonth="athird", hour=23)
+})
+// @Calendar(dayOfMonth="zlast")  // Leave for future ordering test
+public class UserComplex {
+
+}
diff --git a/test/048-reflect-v8/src/UserSub.java b/test/048-reflect-v8/src/UserSub.java
new file mode 100644
index 0000000..d60aa6a
--- /dev/null
+++ b/test/048-reflect-v8/src/UserSub.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class UserSub
+  extends User
+  implements IFaceA, IFaceSimple {
+
+}
diff --git a/test/048-reflect-v8/src/UserSub2.java b/test/048-reflect-v8/src/UserSub2.java
new file mode 100644
index 0000000..13e2eb0
--- /dev/null
+++ b/test/048-reflect-v8/src/UserSub2.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This calendar subsumes anything else we would've normally gotten from the subclass.
+@Calendar(dayOfMonth="sub2")
+public class UserSub2
+  extends User
+  implements IFaceA, IFaceSimple {
+
+}
diff --git a/test/137-cfi/cfi.cc b/test/137-cfi/cfi.cc
index 77301d2..87656bc 100644
--- a/test/137-cfi/cfi.cc
+++ b/test/137-cfi/cfi.cc
@@ -101,7 +101,12 @@
 }
 #endif
 
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_unwindInProcess(JNIEnv*, jobject, jint, jboolean) {
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_unwindInProcess(
+    JNIEnv*,
+    jobject,
+    jboolean full_signatrues,
+    jint,
+    jboolean) {
 #if __linux__
   if (IsPicImage()) {
     LOG(INFO) << "Image is pic, in-process unwinding check bypassed.";
@@ -122,14 +127,21 @@
   // We cannot really parse an exact stack, as the optimizing compiler may inline some functions.
   // This is also risky, as deduping might play a trick on us, so the test needs to make sure that
   // only unique functions are being expected.
+  // "mini-debug-info" does not include parameters to save space.
   std::vector<std::string> seq = {
       "Java_Main_unwindInProcess",                   // This function.
-      "boolean Main.unwindInProcess(int, boolean)",  // The corresponding Java native method frame.
+      "Main.unwindInProcess",                        // The corresponding Java native method frame.
+      "int java.util.Arrays.binarySearch(java.lang.Object[], int, int, java.lang.Object, java.util.Comparator)",  // Framework method.
+      "Main.main"                                    // The Java entry method.
+  };
+  std::vector<std::string> full_seq = {
+      "Java_Main_unwindInProcess",                   // This function.
+      "boolean Main.unwindInProcess(boolean, int, boolean)",  // The corresponding Java native method frame.
       "int java.util.Arrays.binarySearch(java.lang.Object[], int, int, java.lang.Object, java.util.Comparator)",  // Framework method.
       "void Main.main(java.lang.String[])"           // The Java entry method.
   };
 
-  bool result = CheckStack(bt.get(), seq);
+  bool result = CheckStack(bt.get(), full_signatrues ? full_seq : seq);
   if (!kCauseSegfault) {
     return result ? JNI_TRUE : JNI_FALSE;
   } else {
@@ -178,7 +190,11 @@
 }
 #endif
 
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_unwindOtherProcess(JNIEnv*, jobject, jint pid_int) {
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_unwindOtherProcess(
+    JNIEnv*,
+    jobject,
+    jboolean full_signatrues,
+    jint pid_int) {
 #if __linux__
   // TODO: What to do on Valgrind?
   pid_t pid = static_cast<pid_t>(pid_int);
@@ -214,17 +230,27 @@
 
   if (result) {
     // See comment in unwindInProcess for non-exact stack matching.
+    // "mini-debug-info" does not include parameters to save space.
     std::vector<std::string> seq = {
         // "Java_Main_sleep",                        // The sleep function being executed in the
                                                      // other runtime.
                                                      // Note: For some reason, the name isn't
                                                      // resolved, so don't look for it right now.
+        "Main.sleep",                                // The corresponding Java native method frame.
+        "int java.util.Arrays.binarySearch(java.lang.Object[], int, int, java.lang.Object, java.util.Comparator)",  // Framework method.
+        "Main.main"                                  // The Java entry method.
+    };
+    std::vector<std::string> full_seq = {
+        // "Java_Main_sleep",                        // The sleep function being executed in the
+                                                     // other runtime.
+                                                     // Note: For some reason, the name isn't
+                                                     // resolved, so don't look for it right now.
         "boolean Main.sleep(int, boolean, double)",  // The corresponding Java native method frame.
         "int java.util.Arrays.binarySearch(java.lang.Object[], int, int, java.lang.Object, java.util.Comparator)",  // Framework method.
         "void Main.main(java.lang.String[])"         // The Java entry method.
     };
 
-    result = CheckStack(bt.get(), seq);
+    result = CheckStack(bt.get(), full_signatrues ? full_seq : seq);
   }
 
   if (ptrace(PTRACE_DETACH, pid, 0, 0) != 0) {
diff --git a/test/137-cfi/expected.txt b/test/137-cfi/expected.txt
index 6a5618e..8db7853 100644
--- a/test/137-cfi/expected.txt
+++ b/test/137-cfi/expected.txt
@@ -1 +1,2 @@
 JNI_OnLoad called
+JNI_OnLoad called
diff --git a/test/137-cfi/run b/test/137-cfi/run
index 9c567b6..6f4bcfe 100755
--- a/test/137-cfi/run
+++ b/test/137-cfi/run
@@ -14,4 +14,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} "$@" -Xcompiler-option --generate-debug-info
+# Test with full DWARF debugging information.
+# Check full signatures of methods.
+${RUN} "$@" -Xcompiler-option --generate-debug-info --args --full-signatures
+
+# Test with minimal compressed debugging information.
+# Check only method names (parameters are omitted to save space).
+${RUN} "$@" -Xcompiler-option --generate-mini-debug-info
diff --git a/test/137-cfi/src/Main.java b/test/137-cfi/src/Main.java
index 5474c9b..7755338 100644
--- a/test/137-cfi/src/Main.java
+++ b/test/137-cfi/src/Main.java
@@ -34,19 +34,28 @@
 
   private boolean secondary;
 
+  private boolean full_signatures;
+
   private boolean passed;
 
-  public Main(boolean secondary) {
+  public Main(boolean secondary, boolean full_signatures) {
       this.secondary = secondary;
+      this.full_signatures = full_signatures;
   }
 
   public static void main(String[] args) throws Exception {
     System.loadLibrary(args[0]);
       boolean secondary = false;
-      if (args.length > 0 && args[args.length - 1].equals("--secondary")) {
+      boolean full_signatures = false;
+      for (String arg : args) {
+        if (arg.equals("--secondary")) {
           secondary = true;
+        }
+        if (arg.equals("--full-signatures")) {
+          full_signatures = true;
+        }
       }
-      new Main(secondary).run();
+      new Main(secondary, full_signatures).run();
   }
 
   private void run() {
@@ -96,7 +105,7 @@
               throw new RuntimeException(e);
           }
 
-          if (!unwindOtherProcess(pid)) {
+          if (!unwindOtherProcess(full_signatures, pid)) {
               System.out.println("Unwinding other process failed.");
           }
       } finally {
@@ -154,7 +163,7 @@
       if (b) {
           return sleep(2, b, 1.0);
       } else {
-          return unwindInProcess(1, b);
+          return unwindInProcess(full_signatures, 1, b);
       }
   }
 
@@ -162,6 +171,6 @@
 
   public native boolean sleep(int i, boolean b, double dummy);
 
-  public native boolean unwindInProcess(int i, boolean b);
-  public native boolean unwindOtherProcess(int pid);
+  public native boolean unwindInProcess(boolean full_signatures, int i, boolean b);
+  public native boolean unwindOtherProcess(boolean full_signatures, int pid);
 }
diff --git a/test/442-checker-constant-folding/src/Main.java b/test/442-checker-constant-folding/src/Main.java
index 0e07f47..5479818 100644
--- a/test/442-checker-constant-folding/src/Main.java
+++ b/test/442-checker-constant-folding/src/Main.java
@@ -666,11 +666,11 @@
   /// CHECK-START: int Main.StaticConditionNulls() constant_folding_after_inlining (before)
   /// CHECK-DAG:     <<Null:l\d+>>    NullConstant
   /// CHECK-DAG:     <<Cond:z\d+>>    NotEqual [<<Null>>,<<Null>>]
-  /// CHECK-DAG:                      If [<<Cond>>]
+  /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<Cond>>]
 
   /// CHECK-START: int Main.StaticConditionNulls() constant_folding_after_inlining (after)
   /// CHECK-DAG:     <<Const0:i\d+>>  IntConstant 0
-  /// CHECK-DAG:                      If [<<Const0>>]
+  /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<Const0>>]
 
   /// CHECK-START: int Main.StaticConditionNulls() constant_folding_after_inlining (after)
   /// CHECK-NOT:                      NotEqual
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index 3e6d1f4..06cfd0a 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -622,28 +622,39 @@
   static int[][] mA;
 
   /// CHECK-START: void Main.dynamicBCEAndIntrinsic(int) BCE (before)
-  /// CHECK-DAG: NullCheck
-  /// CHECK-DAG: ArrayLength
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: NullCheck
-  /// CHECK-DAG: ArrayLength
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: InvokeStaticOrDirect
-  /// CHECK-DAG: ArraySet
-
+  //  Array references mA[i] and ..[j] both in inner loop.
+  /// CHECK-DAG:  <<Get1:l\d+>>  ArrayGet [<<Array1:l\d+>>,<<Bounds1:i\d+>>] loop:<<InnerLoop:B\d+>>
+  /// CHECK-DAG:  <<Array1>>     NullCheck [<<Field1:l\d+>>]                 loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Len1:i\d+>>  ArrayLength [<<Array1>>]                    loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Bounds1>>    BoundsCheck [<<Index1:i\d+>>,<<Len1>>]      loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Get2:i\d+>>  ArrayGet [<<Array2:l\d+>>,<<Bounds2:i\d+>>] loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Array2>>     NullCheck [<<Get1>>]                        loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Len2:i\d+>>  ArrayLength [<<Array2>>]                    loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Bounds2>>    BoundsCheck [<<Index2:i\d+>>,<<Len2>>]      loop:<<InnerLoop>>
+  /// CHECK-DAG:                 InvokeStaticOrDirect [<<Get2>>]             loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Index2>>     Phi                                         loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Index1>>     Phi                                         loop:<<OuterLoop:B\d+>>
+  /// CHECK-DAG:  <<Field1>>     StaticFieldGet                              loop:none
+  /// CHECK-EVAL: "<<InnerLoop>>" != "<<OuterLoop>>"
+  //
+  /// CHECK-START: void Main.dynamicBCEAndIntrinsic(int) BCE (after)
+  //  Array reference mA[i] hoisted to same level as deopt.
+  /// CHECK-DAG:                 Deoptimize                                  loop:<<OuterLoop:B\d+>>
+  /// CHECK-DAG:                 ArrayLength                                 loop:<<OuterLoop>>
+  /// CHECK-DAG:  <<Get1:l\d+>>  ArrayGet [<<Array1:l\d+>>,<<Index1:i\d+>>]  loop:<<OuterLoop>>
+  //  Array reference ..[j] still in inner loop, with a direct index.
+  /// CHECK-DAG:  <<Get2:i\d+>>  ArrayGet [<<Array2:l\d+>>,<<Index2:i\d+>>]  loop:<<InnerLoop:B\d+>>
+  /// CHECK-DAG:                 InvokeStaticOrDirect [<<Get2>>]             loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Index2>>     Phi                                         loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Index1>>     Phi                                         loop:<<OuterLoop>>
+  //  Synthetic phi.
+  /// CHECK-DAG:  <<Array2>>     Phi                                         loop:<<OuterLoop>>
+  /// CHECK-DAG:  <<Array1>>     StaticFieldGet                              loop:none
+  /// CHECK-EVAL: "<<InnerLoop>>" != "<<OuterLoop>>"
+  //
   /// CHECK-START: void Main.dynamicBCEAndIntrinsic(int) BCE (after)
   /// CHECK-NOT: NullCheck
-  /// CHECK-NOT: ArrayLength
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-NOT: ArrayGet
-  /// CHECK-DAG: InvokeStaticOrDirect
-  /// CHECK-DAG: ArraySet
-  /// CHECK-DAG: Exit
-  /// CHECK-DAG: Deoptimize
-
   static void dynamicBCEAndIntrinsic(int n) {
     for (int i = 0; i < n; i++) {
       for (int j = 0; j < n; j++) {
diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java
index 92cf807..d48b30e 100644
--- a/test/450-checker-types/src/Main.java
+++ b/test/450-checker-types/src/Main.java
@@ -275,23 +275,6 @@
     }
   }
 
-  /// CHECK-START: void Main.testNotInstanceOf_Inlined(java.lang.Object) inliner (after)
-  /// CHECK-DAG:     <<IOf:z\d+>>  InstanceOf
-  /// CHECK-DAG:     <<Not:z\d+>>  BooleanNot [<<IOf>>]
-  /// CHECK-DAG:                   If [<<Not>>]
-
-  /// CHECK-START: void Main.testNotInstanceOf_Inlined(java.lang.Object) instruction_simplifier_after_bce (before)
-  /// CHECK:         CheckCast
-  /// CHECK-NOT:     CheckCast
-
-  /// CHECK-START: void Main.testNotInstanceOf_Inlined(java.lang.Object) instruction_simplifier_after_bce (after)
-  /// CHECK-NOT:     CheckCast
-  public void testNotInstanceOf_Inlined(Object o) {
-    if ($inline$InstanceofSubclassC(o)) {
-      ((SubclassC)o).$noinline$g();
-    }
-  }
-
   /// CHECK-START: void Main.testInstanceOfKeep(java.lang.Object) instruction_simplifier (before)
   /// CHECK:         CheckCast
   /// CHECK:         CheckCast
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index 0fd7801..3c8abeb 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -969,6 +969,13 @@
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
   /// CHECK-DAG:                       If [<<Arg>>]
 
+  /// CHECK-START: int Main.EqualTrueRhs(boolean) instruction_simplifier_before_codegen (after)
+  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
+  /// CHECK-DAG:     <<Const5:i\d+>>   IntConstant 5
+  /// CHECK-DAG:     <<Select:i\d+>>   Select [<<Const3>>,<<Const5>>,<<Arg>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
+
   public static int EqualTrueRhs(boolean arg) {
     return (arg != true) ? 3 : 5;
   }
@@ -983,6 +990,13 @@
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
   /// CHECK-DAG:                       If [<<Arg>>]
 
+  /// CHECK-START: int Main.EqualTrueLhs(boolean) instruction_simplifier_before_codegen (after)
+  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
+  /// CHECK-DAG:     <<Const5:i\d+>>   IntConstant 5
+  /// CHECK-DAG:     <<Select:i\d+>>   Select [<<Const3>>,<<Const5>>,<<Arg>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
+
   public static int EqualTrueLhs(boolean arg) {
     return (true != arg) ? 3 : 5;
   }
@@ -995,8 +1009,14 @@
 
   /// CHECK-START: int Main.EqualFalseRhs(boolean) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
-  /// CHECK-DAG:     <<NotArg:z\d+>>   BooleanNot [<<Arg>>]
-  /// CHECK-DAG:                       If [<<NotArg>>]
+  /// CHECK-DAG:                       If [<<Arg>>]
+
+  /// CHECK-START: int Main.EqualFalseRhs(boolean) instruction_simplifier_before_codegen (after)
+  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
+  /// CHECK-DAG:     <<Const5:i\d+>>   IntConstant 5
+  /// CHECK-DAG:     <<Select:i\d+>>   Select [<<Const5>>,<<Const3>>,<<Arg>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
 
   public static int EqualFalseRhs(boolean arg) {
     return (arg != false) ? 3 : 5;
@@ -1010,8 +1030,14 @@
 
   /// CHECK-START: int Main.EqualFalseLhs(boolean) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
-  /// CHECK-DAG:     <<NotArg:z\d+>>   BooleanNot [<<Arg>>]
-  /// CHECK-DAG:                       If [<<NotArg>>]
+  /// CHECK-DAG:                       If [<<Arg>>]
+
+  /// CHECK-START: int Main.EqualFalseLhs(boolean) instruction_simplifier_before_codegen (after)
+  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
+  /// CHECK-DAG:     <<Const5:i\d+>>   IntConstant 5
+  /// CHECK-DAG:     <<Select:i\d+>>   Select [<<Const5>>,<<Const3>>,<<Arg>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
 
   public static int EqualFalseLhs(boolean arg) {
     return (false != arg) ? 3 : 5;
@@ -1025,8 +1051,14 @@
 
   /// CHECK-START: int Main.NotEqualTrueRhs(boolean) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
-  /// CHECK-DAG:     <<NotArg:z\d+>>   BooleanNot [<<Arg>>]
-  /// CHECK-DAG:                       If [<<NotArg>>]
+  /// CHECK-DAG:                       If [<<Arg>>]
+
+  /// CHECK-START: int Main.NotEqualTrueRhs(boolean) instruction_simplifier_before_codegen (after)
+  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
+  /// CHECK-DAG:     <<Const5:i\d+>>   IntConstant 5
+  /// CHECK-DAG:     <<Select:i\d+>>   Select [<<Const5>>,<<Const3>>,<<Arg>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
 
   public static int NotEqualTrueRhs(boolean arg) {
     return (arg == true) ? 3 : 5;
@@ -1040,8 +1072,14 @@
 
   /// CHECK-START: int Main.NotEqualTrueLhs(boolean) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
-  /// CHECK-DAG:     <<NotArg:z\d+>>   BooleanNot [<<Arg>>]
-  /// CHECK-DAG:                       If [<<NotArg>>]
+  /// CHECK-DAG:                       If [<<Arg>>]
+
+  /// CHECK-START: int Main.NotEqualTrueLhs(boolean) instruction_simplifier_before_codegen (after)
+  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
+  /// CHECK-DAG:     <<Const5:i\d+>>   IntConstant 5
+  /// CHECK-DAG:     <<Select:i\d+>>   Select [<<Const5>>,<<Const3>>,<<Arg>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
 
   public static int NotEqualTrueLhs(boolean arg) {
     return (true == arg) ? 3 : 5;
@@ -1057,6 +1095,13 @@
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
   /// CHECK-DAG:                       If [<<Arg>>]
 
+  /// CHECK-START: int Main.NotEqualFalseRhs(boolean) instruction_simplifier_before_codegen (after)
+  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
+  /// CHECK-DAG:     <<Const5:i\d+>>   IntConstant 5
+  /// CHECK-DAG:     <<Select:i\d+>>   Select [<<Const3>>,<<Const5>>,<<Arg>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
+
   public static int NotEqualFalseRhs(boolean arg) {
     return (arg == false) ? 3 : 5;
   }
@@ -1071,38 +1116,51 @@
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
   /// CHECK-DAG:                       If [<<Arg>>]
 
+  /// CHECK-START: int Main.NotEqualFalseLhs(boolean) instruction_simplifier_before_codegen (after)
+  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
+  /// CHECK-DAG:     <<Const5:i\d+>>   IntConstant 5
+  /// CHECK-DAG:     <<Select:i\d+>>   Select [<<Const3>>,<<Const5>>,<<Arg>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
+
   public static int NotEqualFalseLhs(boolean arg) {
     return (false == arg) ? 3 : 5;
   }
 
   /// CHECK-START: boolean Main.EqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (before)
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
   /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
-  /// CHECK-DAG:     <<BoolNot:z\d+>>  BooleanNot [<<Arg>>]
-  /// CHECK-DAG:     <<Cond:z\d+>>     Equal [<<BoolNot>>,<<Const2>>]
-  /// CHECK-DAG:                       Return [<<Cond>>]
+  /// CHECK-DAG:     <<NotArg:i\d+>>   Select [<<Const1>>,<<Const0>>,<<Arg>>]
+  /// CHECK-DAG:     <<Cond:z\d+>>     Equal [<<NotArg>>,<<Const2>>]
+  /// CHECK-DAG:     <<NotCond:i\d+>>  Select [<<Const1>>,<<Const0>>,<<Cond>>]
+  /// CHECK-DAG:                       Return [<<NotCond>>]
 
   /// CHECK-START: boolean Main.EqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (after)
-  /// CHECK-DAG:     <<False:i\d+>>    IntConstant 0
-  /// CHECK-DAG:                       Return [<<False>>]
+  /// CHECK-DAG:     <<True:i\d+>>     IntConstant 1
+  /// CHECK-DAG:                       Return [<<True>>]
 
   public static boolean EqualBoolVsIntConst(boolean arg) {
-    return (arg ? 0 : 1) == 2;
+    return (arg ? 0 : 1) != 2;
   }
 
   /// CHECK-START: boolean Main.NotEqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (before)
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
   /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
-  /// CHECK-DAG:     <<BoolNot:z\d+>>  BooleanNot [<<Arg>>]
-  /// CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<BoolNot>>,<<Const2>>]
-  /// CHECK-DAG:                       Return [<<Cond>>]
+  /// CHECK-DAG:     <<NotArg:i\d+>>   Select [<<Const1>>,<<Const0>>,<<Arg>>]
+  /// CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<NotArg>>,<<Const2>>]
+  /// CHECK-DAG:     <<NotCond:i\d+>>  Select [<<Const1>>,<<Const0>>,<<Cond>>]
+  /// CHECK-DAG:                       Return [<<NotCond>>]
 
   /// CHECK-START: boolean Main.NotEqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (after)
-  /// CHECK-DAG:     <<True:i\d+>>     IntConstant 1
-  /// CHECK-DAG:                       Return [<<True>>]
+  /// CHECK-DAG:     <<False:i\d+>>    IntConstant 0
+  /// CHECK-DAG:                       Return [<<False>>]
 
   public static boolean NotEqualBoolVsIntConst(boolean arg) {
-    return (arg ? 0 : 1) != 2;
+    return (arg ? 0 : 1) == 2;
   }
 
   /*
@@ -1113,19 +1171,16 @@
 
   /// CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_bce (before)
   /// CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
-  /// CHECK-DAG:     <<NotArg:z\d+>>    BooleanNot [<<Arg>>]
-  /// CHECK-DAG:     <<NotNotArg:z\d+>> BooleanNot [<<NotArg>>]
+  /// CHECK-DAG:     <<Const0:i\d+>>    IntConstant 0
+  /// CHECK-DAG:     <<Const1:i\d+>>    IntConstant 1
+  /// CHECK-DAG:     <<NotArg:i\d+>>    Select [<<Const1>>,<<Const0>>,<<Arg>>]
+  /// CHECK-DAG:     <<NotNotArg:i\d+>> Select [<<Const1>>,<<Const0>>,<<NotArg>>]
   /// CHECK-DAG:                        Return [<<NotNotArg>>]
 
   /// CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_bce (after)
   /// CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
-  /// CHECK-DAG:                        BooleanNot [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  /// CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_bce (after)
-  /// CHECK:                            BooleanNot
-  /// CHECK-NOT:                        BooleanNot
-
   public static boolean NegateValue(boolean arg) {
     return !arg;
   }
@@ -1254,8 +1309,14 @@
 
   /// CHECK-START: int Main.booleanFieldNotEqualOne() instruction_simplifier (after)
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
-  /// CHECK-DAG:      <<Not:z\d+>>      BooleanNot [<<Field>>]
-  /// CHECK-DAG:                        If [<<Not>>]
+  /// CHECK-DAG:                        If [<<Field>>]
+
+  /// CHECK-START: int Main.booleanFieldNotEqualOne() instruction_simplifier_before_codegen (after)
+  /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
+  /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
+  /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const54>>,<<Const13>>,<<Field>>]
+  /// CHECK-DAG:                        Return [<<Select>>]
 
   public static int booleanFieldNotEqualOne() {
     return (booleanField == true) ? 13 : 54;
@@ -1269,8 +1330,14 @@
 
   /// CHECK-START: int Main.booleanFieldEqualZero() instruction_simplifier (after)
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
-  /// CHECK-DAG:      <<Not:z\d+>>      BooleanNot [<<Field>>]
-  /// CHECK-DAG:                        If [<<Not>>]
+  /// CHECK-DAG:                        If [<<Field>>]
+
+  /// CHECK-START: int Main.booleanFieldEqualZero() instruction_simplifier_before_codegen (after)
+  /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
+  /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
+  /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const54>>,<<Const13>>,<<Field>>]
+  /// CHECK-DAG:                        Return [<<Select>>]
 
   public static int booleanFieldEqualZero() {
     return (booleanField != false) ? 13 : 54;
@@ -1278,18 +1345,27 @@
 
   /// CHECK-START: int Main.intConditionNotEqualOne(int) instruction_simplifier_after_bce (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
-  /// CHECK-DAG:      <<GT:z\d+>>       GreaterThan [<<Arg>>,<<Const42>>]
+  /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<LE:z\d+>>       LessThanOrEqual [<<Arg>>,<<Const42>>]
+  /// CHECK-DAG:      <<GT:i\d+>>       Select [<<Const1>>,<<Const0>>,<<LE>>]
   /// CHECK-DAG:      <<NE:z\d+>>       NotEqual [<<GT>>,<<Const1>>]
-  /// CHECK-DAG:                        If [<<NE>>]
+  /// CHECK-DAG:      <<Result:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
+  /// CHECK-DAG:                        Return [<<Result>>]
 
   /// CHECK-START: int Main.intConditionNotEqualOne(int) instruction_simplifier_after_bce (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
-  /// CHECK-DAG:                        If [<<LE:z\d+>>]
+  /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<Result:i\d+>>   Select [<<Const13>>,<<Const54>>,<<LE:z\d+>>]
   /// CHECK-DAG:      <<LE>>            LessThanOrEqual [<<Arg>>,<<Const42>>]
-  // Note that we match `LE` from If because there are two identical LessThanOrEqual instructions.
+  /// CHECK-DAG:                        Return [<<Result>>]
+  // Note that we match `LE` from Select because there are two identical
+  // LessThanOrEqual instructions.
 
   public static int intConditionNotEqualOne(int i) {
     return ((i > 42) == true) ? 13 : 54;
@@ -1298,17 +1374,26 @@
   /// CHECK-START: int Main.intConditionEqualZero(int) instruction_simplifier_after_bce (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
-  /// CHECK-DAG:      <<GT:z\d+>>       GreaterThan [<<Arg>>,<<Const42>>]
-  /// CHECK-DAG:      <<EQ:z\d+>>       Equal [<<GT>>,<<Const0>>]
-  /// CHECK-DAG:                        If [<<EQ>>]
+  /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<LE:z\d+>>       LessThanOrEqual [<<Arg>>,<<Const42>>]
+  /// CHECK-DAG:      <<GT:i\d+>>       Select [<<Const1>>,<<Const0>>,<<LE>>]
+  /// CHECK-DAG:      <<NE:z\d+>>       Equal [<<GT>>,<<Const0>>]
+  /// CHECK-DAG:      <<Result:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
+  /// CHECK-DAG:                        Return [<<Result>>]
 
   /// CHECK-START: int Main.intConditionEqualZero(int) instruction_simplifier_after_bce (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
-  /// CHECK-DAG:                        If [<<LE:z\d+>>]
+  /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<Result:i\d+>>   Select [<<Const13>>,<<Const54>>,<<LE:z\d+>>]
   /// CHECK-DAG:      <<LE>>            LessThanOrEqual [<<Arg>>,<<Const42>>]
-  // Note that we match `LE` from If because there are two identical LessThanOrEqual instructions.
+  /// CHECK-DAG:                        Return [<<Result>>]
+  // Note that we match `LE` from Select because there are two identical
+  // LessThanOrEqual instructions.
 
   public static int intConditionEqualZero(int i) {
     return ((i > 42) != false) ? 13 : 54;
@@ -1316,17 +1401,33 @@
 
   // Test that conditions on float/double are not flipped.
 
+  /// CHECK-START: int Main.floatConditionNotEqualOne(float) ssa_builder (after)
+  /// CHECK:                            LessThanOrEqual
+
   /// CHECK-START: int Main.floatConditionNotEqualOne(float) register (before)
-  /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
-  /// CHECK-DAG:                        NotEqual [{{i\d+}},<<Const1>>]
+  /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
+  /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<Const42:f\d+>>  FloatConstant 42
+  /// CHECK-DAG:      <<LE:z\d+>>       LessThanOrEqual [<<Arg>>,<<Const42>>]
+  /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<LE>>]
+  /// CHECK-DAG:                        Return [<<Select>>]
 
   public static int floatConditionNotEqualOne(float f) {
     return ((f > 42.0f) == true) ? 13 : 54;
   }
 
+  /// CHECK-START: int Main.doubleConditionEqualZero(double) ssa_builder (after)
+  /// CHECK:                            LessThanOrEqual
+
   /// CHECK-START: int Main.doubleConditionEqualZero(double) register (before)
-  /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
-  /// CHECK-DAG:                        Equal [{{i\d+}},<<Const0>>]
+  /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
+  /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<Const42:d\d+>>  DoubleConstant 42
+  /// CHECK-DAG:      <<LE:z\d+>>       LessThanOrEqual [<<Arg>>,<<Const42>>]
+  /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<LE>>]
+  /// CHECK-DAG:                        Return [<<Select>>]
 
   public static int doubleConditionEqualZero(double d) {
     return ((d > 42.0) != false) ? 13 : 54;
@@ -1374,6 +1475,10 @@
     assertIntEquals(NotEqualTrueLhs(true), 3);
     assertIntEquals(NotEqualFalseRhs(true), 5);
     assertIntEquals(NotEqualFalseLhs(true), 5);
+    assertBooleanEquals(EqualBoolVsIntConst(true), true);
+    assertBooleanEquals(EqualBoolVsIntConst(true), true);
+    assertBooleanEquals(NotEqualBoolVsIntConst(false), false);
+    assertBooleanEquals(NotEqualBoolVsIntConst(false), false);
     assertBooleanEquals(NotNotBool(true), true);
     assertBooleanEquals(NotNotBool(false), false);
     assertFloatEquals(Div2(100.0f), 50.0f);
diff --git a/test/463-checker-boolean-simplifier/src/Main.java b/test/463-checker-boolean-simplifier/src/Main.java
index 61510d8..682f126 100644
--- a/test/463-checker-boolean-simplifier/src/Main.java
+++ b/test/463-checker-boolean-simplifier/src/Main.java
@@ -37,7 +37,7 @@
    * empty branches removed.
    */
 
-  /// CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (before)
+  /// CHECK-START: boolean Main.BooleanNot(boolean) select_generator (before)
   /// CHECK-DAG:     <<Param:z\d+>>    ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
@@ -45,23 +45,24 @@
   /// CHECK-DAG:     <<Phi:i\d+>>      Phi [<<Const1>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Phi>>]
 
-  /// CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (before)
+  /// CHECK-START: boolean Main.BooleanNot(boolean) select_generator (before)
   /// CHECK:                           Goto
   /// CHECK:                           Goto
   /// CHECK:                           Goto
   /// CHECK-NOT:                       Goto
 
-  /// CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (after)
+  /// CHECK-START: boolean Main.BooleanNot(boolean) select_generator (after)
   /// CHECK-DAG:     <<Param:z\d+>>    ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
-  /// CHECK-DAG:     <<NotParam:z\d+>> BooleanNot [<<Param>>]
+  /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:     <<NotParam:i\d+>> Select [<<Const1>>,<<Const0>>,<<Param>>]
   /// CHECK-DAG:                       Return [<<NotParam>>]
 
-  /// CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (after)
+  /// CHECK-START: boolean Main.BooleanNot(boolean) select_generator (after)
   /// CHECK-NOT:                       If
   /// CHECK-NOT:                       Phi
 
-  /// CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (after)
+  /// CHECK-START: boolean Main.BooleanNot(boolean) select_generator (after)
   /// CHECK:                           Goto
   /// CHECK-NOT:                       Goto
 
@@ -74,7 +75,7 @@
    * and 0 when False.
    */
 
-  /// CHECK-START: boolean Main.GreaterThan(int, int) boolean_simplifier (before)
+  /// CHECK-START: boolean Main.GreaterThan(int, int) select_generator (before)
   /// CHECK-DAG:     <<ParamX:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
@@ -84,13 +85,14 @@
   /// CHECK-DAG:     <<Phi:i\d+>>      Phi [<<Const0>>,<<Const1>>]
   /// CHECK-DAG:                       Return [<<Phi>>]
 
-  /// CHECK-START: boolean Main.GreaterThan(int, int) boolean_simplifier (after)
+  /// CHECK-START: boolean Main.GreaterThan(int, int) select_generator (after)
   /// CHECK-DAG:     <<ParamX:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
   /// CHECK-DAG:     <<Cond:z\d+>>     GreaterThan [<<ParamX>>,<<ParamY>>]
-  /// CHECK-DAG:                       Return [<<Cond>>]
+  /// CHECK-DAG:     <<Select:i\d+>>   Select [<<Const0>>,<<Const1>>,<<Cond>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
 
   public static boolean GreaterThan(int x, int y) {
     return (x <= y) ? false : true;
@@ -101,7 +103,7 @@
    * and 1 when False.
    */
 
-  /// CHECK-START: boolean Main.LessThan(int, int) boolean_simplifier (before)
+  /// CHECK-START: boolean Main.LessThan(int, int) select_generator (before)
   /// CHECK-DAG:     <<ParamX:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
@@ -111,13 +113,14 @@
   /// CHECK-DAG:     <<Phi:i\d+>>      Phi [<<Const1>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Phi>>]
 
-  /// CHECK-START: boolean Main.LessThan(int, int) boolean_simplifier (after)
+  /// CHECK-START: boolean Main.LessThan(int, int) select_generator (after)
   /// CHECK-DAG:     <<ParamX:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
-  /// CHECK-DAG:     <<Cond:z\d+>>     LessThan [<<ParamX>>,<<ParamY>>]
-  /// CHECK-DAG:                       Return [<<Cond>>]
+  /// CHECK-DAG:     <<Cond:z\d+>>     GreaterThanOrEqual [<<ParamX>>,<<ParamY>>]
+  /// CHECK-DAG:     <<Select:i\d+>>   Select [<<Const1>>,<<Const0>>,<<Cond>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
 
   public static boolean LessThan(int x, int y) {
     return (x < y) ? true : false;
@@ -128,7 +131,7 @@
    * Note that Phis are discovered retrospectively.
    */
 
-  /// CHECK-START: boolean Main.ValuesOrdered(int, int, int) boolean_simplifier (before)
+  /// CHECK-START: boolean Main.ValuesOrdered(int, int, int) select_generator (before)
   /// CHECK-DAG:     <<ParamX:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<ParamZ:i\d+>>   ParameterValue
@@ -145,29 +148,25 @@
   /// CHECK-DAG:     <<PhiYZ>>         Phi [<<Const1>>,<<Const0>>]
   /// CHECK-DAG:     <<PhiXYZ>>        Phi [<<Const1>>,<<Const0>>]
 
-  /// CHECK-START: boolean Main.ValuesOrdered(int, int, int) boolean_simplifier (after)
+  /// CHECK-START: boolean Main.ValuesOrdered(int, int, int) select_generator (after)
   /// CHECK-DAG:     <<ParamX:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<ParamZ:i\d+>>   ParameterValue
-  /// CHECK-DAG:     <<CmpXY:z\d+>>    LessThanOrEqual [<<ParamX>>,<<ParamY>>]
-  /// CHECK-DAG:     <<CmpYZ:z\d+>>    LessThanOrEqual [<<ParamY>>,<<ParamZ>>]
-  /// CHECK-DAG:     <<CmpXYZ:z\d+>>   Equal [<<CmpXY>>,<<CmpYZ>>]
-  /// CHECK-DAG:                       Return [<<CmpXYZ>>]
+  /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:     <<CmpXY:z\d+>>    GreaterThan [<<ParamX>>,<<ParamY>>]
+  /// CHECK-DAG:     <<SelXY:i\d+>>    Select [<<Const1>>,<<Const0>>,<<CmpXY>>]
+  /// CHECK-DAG:     <<CmpYZ:z\d+>>    GreaterThan [<<ParamY>>,<<ParamZ>>]
+  /// CHECK-DAG:     <<SelYZ:i\d+>>    Select [<<Const1>>,<<Const0>>,<<CmpYZ>>]
+  /// CHECK-DAG:     <<CmpXYZ:z\d+>>   NotEqual [<<SelXY>>,<<SelYZ>>]
+  /// CHECK-DAG:     <<SelXYZ:i\d+>>   Select [<<Const1>>,<<Const0>>,<<CmpXYZ>>]
+  /// CHECK-DAG:                       Return [<<SelXYZ>>]
 
   public static boolean ValuesOrdered(int x, int y, int z) {
     return (x <= y) == (y <= z);
   }
 
-  /// CHECK-START: int Main.NegatedCondition(boolean) boolean_simplifier (before)
-  /// CHECK-DAG:     <<Param:z\d+>>    ParameterValue
-  /// CHECK-DAG:     <<Const42:i\d+>>  IntConstant 42
-  /// CHECK-DAG:     <<Const43:i\d+>>  IntConstant 43
-  /// CHECK-DAG:     <<NotParam:z\d+>> BooleanNot [<<Param>>]
-  /// CHECK-DAG:                       If [<<NotParam>>]
-  /// CHECK-DAG:     <<Phi:i\d+>>      Phi [<<Const42>>,<<Const43>>]
-  /// CHECK-DAG:                       Return [<<Phi>>]
-
-  /// CHECK-START: int Main.NegatedCondition(boolean) boolean_simplifier (after)
+  /// CHECK-START: int Main.NegatedCondition(boolean) select_generator (before)
   /// CHECK-DAG:     <<Param:z\d+>>    ParameterValue
   /// CHECK-DAG:     <<Const42:i\d+>>  IntConstant 42
   /// CHECK-DAG:     <<Const43:i\d+>>  IntConstant 43
@@ -175,9 +174,14 @@
   /// CHECK-DAG:     <<Phi:i\d+>>      Phi [<<Const42>>,<<Const43>>]
   /// CHECK-DAG:                       Return [<<Phi>>]
 
-  // Note: The fact that branches are swapped is verified by running the test.
+  /// CHECK-START: int Main.NegatedCondition(boolean) select_generator (after)
+  /// CHECK-DAG:     <<Param:z\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:     <<Const43:i\d+>>  IntConstant 43
+  /// CHECK-DAG:     <<Select:i\d+>>   Select [<<Const43>>,<<Const42>>,<<Param>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
 
-  /// CHECK-START: int Main.NegatedCondition(boolean) boolean_simplifier (after)
+  /// CHECK-START: int Main.NegatedCondition(boolean) select_generator (after)
   /// CHECK-NOT:                       BooleanNot
 
   public static int NegatedCondition(boolean x) {
@@ -188,6 +192,179 @@
     }
   }
 
+  /// CHECK-START: int Main.SimpleTrueBlock(boolean, int) select_generator (after)
+  /// CHECK-DAG:     <<ParamX:z\d+>>   ParameterValue
+  /// CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
+  /// CHECK-DAG:     <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:     <<Const43:i\d+>>  IntConstant 43
+  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<ParamY>>,<<Const42>>]
+  /// CHECK-DAG:     <<Select:i\d+>>   Select [<<Const43>>,<<Add>>,<<ParamX>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
+
+  /// CHECK-START: int Main.SimpleTrueBlock(boolean, int) select_generator (after)
+  /// CHECK-NOT:     If
+
+  public static int SimpleTrueBlock(boolean x, int y) {
+    return x ? y + 42 : 43;
+  }
+
+  /// CHECK-START: int Main.SimpleFalseBlock(boolean, int) select_generator (after)
+  /// CHECK-DAG:     <<ParamX:z\d+>>   ParameterValue
+  /// CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
+  /// CHECK-DAG:     <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:     <<Const43:i\d+>>  IntConstant 43
+  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<ParamY>>,<<Const43>>]
+  /// CHECK-DAG:     <<Select:i\d+>>   Select [<<Add>>,<<Const42>>,<<ParamX>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
+
+  /// CHECK-START: int Main.SimpleFalseBlock(boolean, int) select_generator (after)
+  /// CHECK-NOT:     If
+
+  public static int SimpleFalseBlock(boolean x, int y) {
+    return x ? 42 : y + 43;
+  }
+
+  /// CHECK-START: int Main.SimpleBothBlocks(boolean, int, int) select_generator (after)
+  /// CHECK-DAG:     <<ParamX:z\d+>>   ParameterValue
+  /// CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
+  /// CHECK-DAG:     <<ParamZ:i\d+>>   ParameterValue
+  /// CHECK-DAG:     <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:     <<Const43:i\d+>>  IntConstant 43
+  /// CHECK-DAG:     <<AddTrue:i\d+>>  Add [<<ParamY>>,<<Const42>>]
+  /// CHECK-DAG:     <<AddFalse:i\d+>> Add [<<ParamZ>>,<<Const43>>]
+  /// CHECK-DAG:     <<Select:i\d+>>   Select [<<AddFalse>>,<<AddTrue>>,<<ParamX>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
+
+  /// CHECK-START: int Main.SimpleBothBlocks(boolean, int, int) select_generator (after)
+  /// CHECK-NOT:     If
+
+  public static int SimpleBothBlocks(boolean x, int y, int z) {
+    return x ? y + 42 : z + 43;
+  }
+
+  /// CHECK-START: int Main.ThreeBlocks(boolean, boolean) select_generator (after)
+  /// CHECK-DAG:     <<ParamX:z\d+>>    ParameterValue
+  /// CHECK-DAG:     <<ParamY:z\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Const1:i\d+>>    IntConstant 1
+  /// CHECK-DAG:     <<Const2:i\d+>>    IntConstant 2
+  /// CHECK-DAG:     <<Const3:i\d+>>    IntConstant 3
+  /// CHECK-DAG:     <<Select23:i\d+>>  Select [<<Const3>>,<<Const2>>,<<ParamY>>]
+  /// CHECK-DAG:     <<Select123:i\d+>> Select [<<Select23>>,<<Const1>>,<<ParamX>>]
+  /// CHECK-DAG:                        Return [<<Select123>>]
+
+  public static int ThreeBlocks(boolean x, boolean y) {
+    if (x) {
+      return 1;
+    } else if (y) {
+      return 2;
+    } else {
+      return 3;
+    }
+  }
+
+  /// CHECK-START: int Main.MultiplePhis() select_generator (before)
+  /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:     <<Const13:i\d+>>  IntConstant 13
+  /// CHECK-DAG:     <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:     <<PhiX:i\d+>>     Phi [<<Const0>>,<<Const13>>,<<Const42>>]
+  /// CHECK-DAG:     <<PhiY:i\d+>>     Phi [<<Const1>>,<<Add:i\d+>>,<<Add>>]
+  /// CHECK-DAG:     <<Add>>           Add [<<PhiY>>,<<Const1>>]
+  /// CHECK-DAG:     <<Cond:z\d+>>     LessThanOrEqual [<<Add>>,<<Const1>>]
+  /// CHECK-DAG:                       If [<<Cond>>]
+  /// CHECK-DAG:                       Return [<<PhiX>>]
+
+  /// CHECK-START: int Main.MultiplePhis() select_generator (after)
+  /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:     <<Const13:i\d+>>  IntConstant 13
+  /// CHECK-DAG:     <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:     <<PhiX:i\d+>>     Phi [<<Const0>>,<<Select:i\d+>>]
+  /// CHECK-DAG:     <<PhiY:i\d+>>     Phi [<<Const1>>,<<Add:i\d+>>]
+  /// CHECK-DAG:     <<Add>>           Add [<<PhiY>>,<<Const1>>]
+  /// CHECK-DAG:     <<Cond:z\d+>>     LessThanOrEqual [<<Add>>,<<Const1>>]
+  /// CHECK-DAG:     <<Select>>        Select [<<Const13>>,<<Const42>>,<<Cond>>]
+  /// CHECK-DAG:                       Return [<<PhiX>>]
+
+  public static int MultiplePhis() {
+    int x = 0;
+    int y = 1;
+    while (y++ < 10) {
+      if (y > 1) {
+        x = 13;
+      } else {
+        x = 42;
+      }
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.TrueBlockWithTooManyInstructions(boolean) select_generator (before)
+  /// CHECK-DAG:     <<This:l\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Cond:z\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
+  /// CHECK-DAG:     <<Const43:i\d+>> IntConstant 43
+  /// CHECK-DAG:                      If [<<Cond>>]
+  /// CHECK-DAG:     <<Iget:i\d+>>    InstanceFieldGet [<<This>>]
+  /// CHECK-DAG:     <<Add:i\d+>>     Add [<<Iget>>,<<Const2>>]
+  /// CHECK-DAG:                      Phi [<<Add>>,<<Const43>>]
+
+  /// CHECK-START: int Main.TrueBlockWithTooManyInstructions(boolean) select_generator (after)
+  /// CHECK-NOT:     Select
+
+  public int TrueBlockWithTooManyInstructions(boolean x) {
+    return x ? (read_field + 2) : 43;
+  }
+
+  /// CHECK-START: int Main.FalseBlockWithTooManyInstructions(boolean) select_generator (before)
+  /// CHECK-DAG:     <<This:l\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Cond:z\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Const3:i\d+>>  IntConstant 3
+  /// CHECK-DAG:     <<Const42:i\d+>> IntConstant 42
+  /// CHECK-DAG:                      If [<<Cond>>]
+  /// CHECK-DAG:     <<Iget:i\d+>>    InstanceFieldGet [<<This>>]
+  /// CHECK-DAG:     <<Add:i\d+>>     Add [<<Iget>>,<<Const3>>]
+  /// CHECK-DAG:                      Phi [<<Const42>>,<<Add>>]
+
+  /// CHECK-START: int Main.FalseBlockWithTooManyInstructions(boolean) select_generator (after)
+  /// CHECK-NOT:     Select
+
+  public int FalseBlockWithTooManyInstructions(boolean x) {
+    return x ? 42 : (read_field + 3);
+  }
+
+  /// CHECK-START: int Main.TrueBlockWithSideEffects(boolean) select_generator (before)
+  /// CHECK-DAG:     <<This:l\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Cond:z\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Const42:i\d+>> IntConstant 42
+  /// CHECK-DAG:     <<Const43:i\d+>> IntConstant 43
+  /// CHECK-DAG:                      If [<<Cond>>]
+  /// CHECK-DAG:                      InstanceFieldSet [<<This>>,<<Const42>>]
+  /// CHECK-DAG:                      Phi [<<Const42>>,<<Const43>>]
+
+  /// CHECK-START: int Main.TrueBlockWithSideEffects(boolean) select_generator (after)
+  /// CHECK-NOT:     Select
+
+  public int TrueBlockWithSideEffects(boolean x) {
+    return x ? (write_field = 42) : 43;
+  }
+
+  /// CHECK-START: int Main.FalseBlockWithSideEffects(boolean) select_generator (before)
+  /// CHECK-DAG:     <<This:l\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Cond:z\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Const42:i\d+>> IntConstant 42
+  /// CHECK-DAG:     <<Const43:i\d+>> IntConstant 43
+  /// CHECK-DAG:                      If [<<Cond>>]
+  /// CHECK-DAG:                      InstanceFieldSet [<<This>>,<<Const43>>]
+  /// CHECK-DAG:                      Phi [<<Const42>>,<<Const43>>]
+
+  /// CHECK-START: int Main.FalseBlockWithSideEffects(boolean) select_generator (after)
+  /// CHECK-NOT:     Select
+
+  public int FalseBlockWithSideEffects(boolean x) {
+    return x ? 42 : (write_field = 43);
+  }
+
   public static void main(String[] args) {
     assertBoolEquals(false, BooleanNot(true));
     assertBoolEquals(true, BooleanNot(false));
@@ -206,5 +383,30 @@
     assertBoolEquals(false, ValuesOrdered(5, 5, 3));
     assertIntEquals(42, NegatedCondition(true));
     assertIntEquals(43, NegatedCondition(false));
+    assertIntEquals(46, SimpleTrueBlock(true, 4));
+    assertIntEquals(43, SimpleTrueBlock(false, 4));
+    assertIntEquals(42, SimpleFalseBlock(true, 7));
+    assertIntEquals(50, SimpleFalseBlock(false, 7));
+    assertIntEquals(48, SimpleBothBlocks(true, 6, 2));
+    assertIntEquals(45, SimpleBothBlocks(false, 6, 2));
+    assertIntEquals(1, ThreeBlocks(true, true));
+    assertIntEquals(1, ThreeBlocks(true, false));
+    assertIntEquals(2, ThreeBlocks(false, true));
+    assertIntEquals(3, ThreeBlocks(false, false));
+    assertIntEquals(13, MultiplePhis());
+
+    Main m = new Main();
+    assertIntEquals(42, m.TrueBlockWithTooManyInstructions(true));
+    assertIntEquals(43, m.TrueBlockWithTooManyInstructions(false));
+    assertIntEquals(42, m.FalseBlockWithTooManyInstructions(true));
+    assertIntEquals(43, m.FalseBlockWithTooManyInstructions(false));
+    assertIntEquals(42, m.TrueBlockWithSideEffects(true));
+    assertIntEquals(43, m.TrueBlockWithSideEffects(false));
+    assertIntEquals(42, m.FalseBlockWithSideEffects(true));
+    assertIntEquals(43, m.FalseBlockWithSideEffects(false));
   }
+
+  // These need to be instance fields so as to not generate a LoadClass for iget/iput.
+  public int read_field = 40;
+  public int write_field = 42;
 }
diff --git a/test/468-checker-bool-simplifier-regression/smali/TestCase.smali b/test/468-checker-bool-simplifier-regression/smali/TestCase.smali
index da1c5ec..87ad21e 100644
--- a/test/468-checker-bool-simplifier-regression/smali/TestCase.smali
+++ b/test/468-checker-bool-simplifier-regression/smali/TestCase.smali
@@ -18,7 +18,7 @@
 
 .field public static value:Z
 
-## CHECK-START: boolean TestCase.testCase() boolean_simplifier (before)
+## CHECK-START: boolean TestCase.testCase() select_generator (before)
 ## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
 ## CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
 ## CHECK-DAG:     <<Value:z\d+>>    StaticFieldGet
@@ -26,10 +26,12 @@
 ## CHECK-DAG:     <<Phi:i\d+>>      Phi [<<Const1>>,<<Const0>>]
 ## CHECK-DAG:                       Return [<<Phi>>]
 
-## CHECK-START: boolean TestCase.testCase() boolean_simplifier (after)
+## CHECK-START: boolean TestCase.testCase() select_generator (after)
+## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+## CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
 ## CHECK-DAG:     <<Value:z\d+>>    StaticFieldGet
-## CHECK-DAG:     <<Not:z\d+>>      BooleanNot [<<Value>>]
-## CHECK-DAG:                       Return [<<Not>>]
+## CHECK-DAG:     <<Select:i\d+>>   Select [<<Const1>>,<<Const0>>,<<Value>>]
+## CHECK-DAG:                       Return [<<Select>>]
 
 .method public static testCase()Z
     .registers 2
diff --git a/test/474-checker-boolean-input/src/Main.java b/test/474-checker-boolean-input/src/Main.java
index a2b219d..fbc28d8 100644
--- a/test/474-checker-boolean-input/src/Main.java
+++ b/test/474-checker-boolean-input/src/Main.java
@@ -27,9 +27,9 @@
    * we implement a suitable type analysis.
    */
 
-  /// CHECK-START: boolean Main.TestPhiAsBoolean(int) boolean_simplifier (after)
+  /// CHECK-START: boolean Main.TestPhiAsBoolean(int) select_generator (after)
   /// CHECK-DAG:     <<Phi:i\d+>>     Phi
-  /// CHECK-DAG:                      BooleanNot [<<Phi>>]
+  /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<Phi>>]
 
   public static boolean f1;
   public static boolean f2;
@@ -47,9 +47,9 @@
    * we implement a suitable type analysis.
    */
 
-  /// CHECK-START: boolean Main.TestAndAsBoolean(boolean, boolean) boolean_simplifier (after)
+  /// CHECK-START: boolean Main.TestAndAsBoolean(boolean, boolean) select_generator (after)
   /// CHECK-DAG:     <<And:i\d+>>     And
-  /// CHECK-DAG:                      BooleanNot [<<And>>]
+  /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<And>>]
 
   public static boolean InlineAnd(boolean x, boolean y) {
     return x & y;
@@ -64,9 +64,9 @@
    * we implement a suitable type analysis.
    */
 
-  /// CHECK-START: boolean Main.TestOrAsBoolean(boolean, boolean) boolean_simplifier (after)
+  /// CHECK-START: boolean Main.TestOrAsBoolean(boolean, boolean) select_generator (after)
   /// CHECK-DAG:     <<Or:i\d+>>      Or
-  /// CHECK-DAG:                      BooleanNot [<<Or>>]
+  /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<Or>>]
 
   public static boolean InlineOr(boolean x, boolean y) {
     return x | y;
@@ -81,9 +81,9 @@
    * we implement a suitable type analysis.
    */
 
-  /// CHECK-START: boolean Main.TestXorAsBoolean(boolean, boolean) boolean_simplifier (after)
+  /// CHECK-START: boolean Main.TestXorAsBoolean(boolean, boolean) select_generator (after)
   /// CHECK-DAG:     <<Xor:i\d+>>     Xor
-  /// CHECK-DAG:                      BooleanNot [<<Xor>>]
+  /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<Xor>>]
 
   public static boolean InlineXor(boolean x, boolean y) {
     return x ^ y;
diff --git a/test/480-checker-dead-blocks/src/Main.java b/test/480-checker-dead-blocks/src/Main.java
index 5adafaf..e5171f0 100644
--- a/test/480-checker-dead-blocks/src/Main.java
+++ b/test/480-checker-dead-blocks/src/Main.java
@@ -56,6 +56,8 @@
       z = x + y;
     } else {
       z = x - y;
+      // Prevent HSelect simplification by having a branch with multiple instructions.
+      System.nanoTime();
     }
     return z;
   }
@@ -86,6 +88,8 @@
       z = x + y;
     } else {
       z = x - y;
+      // Prevent HSelect simplification by having a branch with multiple instructions.
+      System.nanoTime();
     }
     return z;
   }
diff --git a/test/482-checker-loop-back-edge-use/src/Main.java b/test/482-checker-loop-back-edge-use/src/Main.java
index d0b33b9..f8f0aa3 100644
--- a/test/482-checker-loop-back-edge-use/src/Main.java
+++ b/test/482-checker-loop-back-edge-use/src/Main.java
@@ -40,6 +40,9 @@
   /// CHECK-EVAL:    <<GotoLiv2>> + 2 == <<ArgLoopUse2>>
 
   public static void loop2(boolean incoming) {
+    // Add some code at entry to avoid having the entry block be a pre header.
+    // This avoids having to create a synthesized block.
+    System.out.println("Enter");
     while (true) {
       System.out.println("foo");
       while (incoming) {}
@@ -170,6 +173,9 @@
   /// CHECK-EVAL:    <<GotoLiv1>> + 2 == <<ArgLoopUse>>
 
   public static void loop9() {
+    // Add some code at entry to avoid having the entry block be a pre header.
+    // This avoids having to create a synthesized block.
+    System.out.println("Enter");
     while (Runtime.getRuntime() != null) {
       // 'incoming' must only have a use in the inner loop.
       boolean incoming = field;
diff --git a/test/485-checker-dce-loop-update/smali/TestCase.smali b/test/485-checker-dce-loop-update/smali/TestCase.smali
index 1de0bae..056f22c 100644
--- a/test/485-checker-dce-loop-update/smali/TestCase.smali
+++ b/test/485-checker-dce-loop-update/smali/TestCase.smali
@@ -137,15 +137,14 @@
 ## CHECK-DAG:     <<Cst5:i\d+>>  IntConstant 5
 ## CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
 ## CHECK-DAG:     <<Cst11:i\d+>> IntConstant 11
-## CHECK-DAG:     <<PhiX1:i\d+>> Phi [<<ArgX>>,<<Add5:i\d+>>,<<Add7:i\d+>>] loop:<<HeaderY:B\d+>>
+## CHECK-DAG:     <<PhiX:i\d+>>  Phi [<<ArgX>>,<<Add5:i\d+>>,<<Add7:i\d+>>] loop:<<HeaderY:B\d+>>
 ## CHECK-DAG:                    If [<<ArgY>>]                              loop:<<HeaderY>>
-## CHECK-DAG:                    If [<<ArgZ>>]                              loop:<<HeaderY>>
-## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst11>>]                  loop:<<HeaderY>>
-## CHECK-DAG:     <<PhiX2:i\d+>> Phi [<<PhiX1>>,<<Mul9>>]                   loop:<<HeaderY>>
+## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX>>,<<Cst11>>]                   loop:<<HeaderY>>
+## CHECK-DAG:     <<SelX:i\d+>>  Select [<<PhiX>>,<<Mul9>>,<<ArgZ>>]        loop:<<HeaderY>>
 ## CHECK-DAG:                    If [<<Cst1>>]                              loop:<<HeaderY>>
-## CHECK-DAG:     <<Add5>>       Add [<<PhiX2>>,<<Cst5>>]                   loop:<<HeaderY>>
-## CHECK-DAG:     <<Add7>>       Add [<<PhiX1>>,<<Cst7>>]                   loop:<<HeaderY>>
-## CHECK-DAG:                    Return [<<PhiX2>>]                         loop:none
+## CHECK-DAG:     <<Add5>>       Add [<<SelX>>,<<Cst5>>]                    loop:<<HeaderY>>
+## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
+## CHECK-DAG:                    Return [<<SelX>>]                          loop:none
 
 ## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination_final (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
@@ -153,13 +152,12 @@
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
 ## CHECK-DAG:     <<Cst11:i\d+>> IntConstant 11
-## CHECK-DAG:     <<PhiX1:i\d+>> Phi [<<ArgX>>,<<Add7:i\d+>>]               loop:<<HeaderY:B\d+>>
+## CHECK-DAG:     <<PhiX:i\d+>>  Phi [<<ArgX>>,<<Add7:i\d+>>]               loop:<<HeaderY:B\d+>>
 ## CHECK-DAG:                    If [<<ArgY>>]                              loop:<<HeaderY>>
-## CHECK-DAG:     <<Add7>>       Add [<<PhiX1>>,<<Cst7>>]                   loop:<<HeaderY>>
-## CHECK-DAG:                    If [<<ArgZ>>]                              loop:none
-## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst11>>]                  loop:none
-## CHECK-DAG:     <<PhiX2:i\d+>> Phi [<<PhiX1>>,<<Mul9>>]                   loop:none
-## CHECK-DAG:                    Return [<<PhiX2>>]                         loop:none
+## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
+## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX>>,<<Cst11>>]                   loop:none
+## CHECK-DAG:     <<SelX:i\d+>>  Select [<<PhiX>>,<<Mul9>>,<<ArgZ>>]        loop:none
+## CHECK-DAG:                    Return [<<SelX>>]                          loop:none
 
 .method public static testExitPredecessors(IZZ)I
   .registers 4
diff --git a/test/496-checker-inlining-and-class-loader/src/Main.java b/test/496-checker-inlining-and-class-loader/src/Main.java
index 39c031a..ea6df62 100644
--- a/test/496-checker-inlining-and-class-loader/src/Main.java
+++ b/test/496-checker-inlining-and-class-loader/src/Main.java
@@ -16,6 +16,7 @@
 
 import java.lang.reflect.Field;
 import java.lang.reflect.Method;
+import java.util.ArrayList;
 import java.util.List;
 
 class MyClassLoader extends ClassLoader {
@@ -30,18 +31,31 @@
     Object pathList = f.get(loader);
 
     // Some magic to get access to the dexField field of pathList.
+    // Need to make a copy of the dex elements since we don't want an app image with pre-resolved
+    // things.
     f = pathList.getClass().getDeclaredField("dexElements");
     f.setAccessible(true);
-    dexElements = (Object[]) f.get(pathList);
-    dexFileField = dexElements[0].getClass().getDeclaredField("dexFile");
-    dexFileField.setAccessible(true);
+    Object[] dexElements = (Object[]) f.get(pathList);
+    f = dexElements[0].getClass().getDeclaredField("dexFile");
+    f.setAccessible(true);
+    for (Object element : dexElements) {
+      Object dexFile = f.get(element);
+      // Make copy.
+      Field fileNameField = dexFile.getClass().getDeclaredField("mFileName");
+      fileNameField.setAccessible(true);
+      dexFiles.add(dexFile.getClass().getDeclaredConstructor(String.class).newInstance(
+        fileNameField.get(dexFile)));
+    }
   }
 
-  Object[] dexElements;
+  ArrayList<Object> dexFiles = new ArrayList<Object>();
   Field dexFileField;
 
   protected Class<?> loadClass(String className, boolean resolve) throws ClassNotFoundException {
-    System.out.println("Request for " + className);
+    // Other classes may also get loaded, ignore those.
+    if (className.equals("LoadedByMyClassLoader") || className.equals("FirstSeenByMyClassLoader")) {
+      System.out.println("Request for " + className);
+    }
 
     // We're only going to handle LoadedByMyClassLoader.
     if (className != "LoadedByMyClassLoader") {
@@ -50,13 +64,12 @@
 
     // Mimic what DexPathList.findClass is doing.
     try {
-      for (Object element : dexElements) {
-        Object dex = dexFileField.get(element);
-        Method method = dex.getClass().getDeclaredMethod(
+      for (Object dexFile : dexFiles) {
+        Method method = dexFile.getClass().getDeclaredMethod(
             "loadClassBinaryName", String.class, ClassLoader.class, List.class);
 
-        if (dex != null) {
-          Class clazz = (Class)method.invoke(dex, className, this, null);
+        if (dexFile != null) {
+          Class clazz = (Class)method.invoke(dexFile, className, this, null);
           if (clazz != null) {
             return clazz;
           }
diff --git a/test/506-verify-aput/src/Main.java b/test/506-verify-aput/src/Main.java
index 8359f2c..08368d4 100644
--- a/test/506-verify-aput/src/Main.java
+++ b/test/506-verify-aput/src/Main.java
@@ -23,11 +23,12 @@
     try {
       Class.forName("VerifyAPut1");
       throw new Error("expected verification error");
-    } catch (VerifyError e) { /* ignore */ }
-
+    } catch (VerifyError e) { /* ignore */
+    } catch (Error e) { System.out.println(e.getClass() + " " + e.getClass().getClassLoader()); }
     try {
       Class.forName("VerifyAPut2");
       throw new Error("expected verification error");
-    } catch (VerifyError e) { /* ignore */ }
+    } catch (VerifyError e) { /* ignore */
+    } catch (Error e) { System.out.println(e.getClass() + " " + e.getClass().getClassLoader()); }
   }
 }
diff --git a/test/510-checker-try-catch/smali/SsaBuilder.smali b/test/510-checker-try-catch/smali/SsaBuilder.smali
index 710e849..a6a5bfe 100644
--- a/test/510-checker-try-catch/smali/SsaBuilder.smali
+++ b/test/510-checker-try-catch/smali/SsaBuilder.smali
@@ -21,7 +21,7 @@
 
 ## CHECK-START: int SsaBuilder.testSimplifyCatchBlock(int, int, int) ssa_builder (after)
 
-## CHECK:      name             "B0"
+## CHECK:      name             "B1"
 ## CHECK-NEXT: from_bci
 ## CHECK-NEXT: to_bci
 ## CHECK-NEXT: predecessors
@@ -39,12 +39,15 @@
 ## CHECK:      name             "<<BExtracted>>"
 ## CHECK-NEXT: from_bci
 ## CHECK-NEXT: to_bci
-## CHECK-NEXT: predecessors     "B0" "<<BCatch>>"
+## CHECK-NEXT: predecessors     "B1" "<<BCatch>>"
 ## CHECK-NOT:  flags            "catch_block"
 ## CHECK:      Add
 
 .method public static testSimplifyCatchBlock(III)I
     .registers 4
+    # Avoid entry block be a pre header, which leads to
+    # the cfg simplifier to add a synthesized block.
+    goto :catch_all
 
     :catch_all
     add-int/2addr p0, p1
diff --git a/test/530-checker-loops/src/Main.java b/test/530-checker-loops/src/Main.java
index f1d9a37..deff279 100644
--- a/test/530-checker-loops/src/Main.java
+++ b/test/530-checker-loops/src/Main.java
@@ -26,7 +26,7 @@
   //
 
   /// CHECK-START: int Main.linear(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linear(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -40,7 +40,7 @@
   }
 
   /// CHECK-START: int Main.linearDown(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearDown(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -54,7 +54,7 @@
   }
 
   /// CHECK-START: int Main.linearObscure(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearObscure(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -69,7 +69,7 @@
   }
 
   /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -84,7 +84,7 @@
   }
 
   /// CHECK-START: int Main.hiddenStride(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.hiddenStride(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -101,7 +101,7 @@
   }
 
   /// CHECK-START: int Main.linearWhile(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearWhile(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -116,7 +116,7 @@
   }
 
   /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -134,7 +134,7 @@
   }
 
   /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -156,7 +156,7 @@
   }
 
   /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -173,7 +173,7 @@
   }
 
   /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -194,7 +194,7 @@
   }
 
   /// CHECK-START: int[] Main.linearWithParameter(int) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int[] Main.linearWithParameter(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -208,7 +208,7 @@
   }
 
   /// CHECK-START: int[] Main.linearCopy(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int[] Main.linearCopy(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -223,8 +223,8 @@
   }
 
   /// CHECK-START: int Main.linearByTwo(int[]) BCE (before)
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearByTwo(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -241,7 +241,7 @@
   }
 
   /// CHECK-START: int Main.linearByTwoSkip1(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearByTwoSkip1(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -255,10 +255,12 @@
   }
 
   /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (after)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (after)
   /// CHECK-NOT: Deoptimize
   private static int linearByTwoSkip2(int x[]) {
     int result = 0;
@@ -270,7 +272,7 @@
   }
 
   /// CHECK-START: int Main.linearWithCompoundStride() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearWithCompoundStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -287,7 +289,7 @@
   }
 
   /// CHECK-START: int Main.linearWithLargePositiveStride() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearWithLargePositiveStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -305,10 +307,12 @@
   }
 
   /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (after)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (after)
   /// CHECK-NOT: Deoptimize
   private static int linearWithVeryLargePositiveStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
@@ -323,7 +327,7 @@
   }
 
   /// CHECK-START: int Main.linearWithLargeNegativeStride() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearWithLargeNegativeStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -341,10 +345,12 @@
   }
 
   /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (after)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (after)
   /// CHECK-NOT: Deoptimize
   private static int linearWithVeryLargeNegativeStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
@@ -359,7 +365,7 @@
   }
 
   /// CHECK-START: int Main.linearForNEUp() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearForNEUp() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -374,7 +380,7 @@
   }
 
   /// CHECK-START: int Main.linearForNEDown() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearForNEDown() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -389,7 +395,7 @@
   }
 
   /// CHECK-START: int Main.linearDoWhileUp() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearDoWhileUp() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -405,7 +411,7 @@
   }
 
   /// CHECK-START: int Main.linearDoWhileDown() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearDoWhileDown() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -421,10 +427,12 @@
   }
 
   /// CHECK-START: int Main.linearShort() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearShort() BCE (after)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearShort() BCE (after)
   /// CHECK-NOT: Deoptimize
   private static int linearShort() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
@@ -437,7 +445,7 @@
   }
 
   /// CHECK-START: int Main.invariantFromPreLoop(int[], int) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.invariantFromPreLoop(int[], int) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -458,20 +466,11 @@
   }
 
   /// CHECK-START: void Main.linearTriangularOnTwoArrayLengths(int) BCE (before)
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: void Main.linearTriangularOnTwoArrayLengths(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
   /// CHECK-NOT: Deoptimize
   private static void linearTriangularOnTwoArrayLengths(int n) {
     int[] a = new int[n];
@@ -488,20 +487,11 @@
   }
 
   /// CHECK-START: void Main.linearTriangularOnOneArrayLength(int) BCE (before)
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: void Main.linearTriangularOnOneArrayLength(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
   /// CHECK-NOT: Deoptimize
   private static void linearTriangularOnOneArrayLength(int n) {
     int[] a = new int[n];
@@ -518,20 +508,11 @@
   }
 
   /// CHECK-START: void Main.linearTriangularOnParameter(int) BCE (before)
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: void Main.linearTriangularOnParameter(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
   /// CHECK-NOT: Deoptimize
   private static void linearTriangularOnParameter(int n) {
     int[] a = new int[n];
@@ -548,32 +529,13 @@
   }
 
   /// CHECK-START: void Main.linearTriangularVariations(int) BCE (before)
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: void Main.linearTriangularVariations(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
   /// CHECK-NOT: Deoptimize
   private static void linearTriangularVariations(int n) {
     int[] a = new int[n];
@@ -616,22 +578,11 @@
   }
 
   /// CHECK-START: void Main.bubble(int[]) BCE (before)
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: If
-  /// CHECK: ArraySet
-  /// CHECK: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: void Main.bubble(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: If
-  /// CHECK: ArraySet
-  /// CHECK: ArraySet
   /// CHECK-NOT: Deoptimize
   private static void bubble(int[] a) {
     for (int i = a.length; --i >= 0;) {
@@ -646,7 +597,7 @@
   }
 
   /// CHECK-START: int Main.periodicIdiom(int) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.periodicIdiom(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -664,7 +615,7 @@
   }
 
   /// CHECK-START: int Main.periodicSequence2(int) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.periodicSequence2(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -685,10 +636,10 @@
   }
 
   /// CHECK-START: int Main.periodicSequence4(int) BCE (before)
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.periodicSequence4(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -713,7 +664,7 @@
   }
 
   /// CHECK-START: int Main.justRightUp1() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.justRightUp1() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -728,7 +679,7 @@
   }
 
   /// CHECK-START: int Main.justRightUp2() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.justRightUp2() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -743,7 +694,7 @@
   }
 
   /// CHECK-START: int Main.justRightUp3() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.justRightUp3() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -758,10 +709,12 @@
   }
 
   /// CHECK-START: int Main.justOOBUp() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.justOOBUp() BCE (after)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.justOOBUp() BCE (after)
   /// CHECK-NOT: Deoptimize
   private static int justOOBUp() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
@@ -774,7 +727,7 @@
   }
 
   /// CHECK-START: int Main.justRightDown1() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.justRightDown1() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -789,7 +742,7 @@
   }
 
   /// CHECK-START: int Main.justRightDown2() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.justRightDown2() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -804,7 +757,7 @@
   }
 
   /// CHECK-START: int Main.justRightDown3() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.justRightDown3() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -819,10 +772,12 @@
   }
 
   /// CHECK-START: int Main.justOOBDown() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.justOOBDown() BCE (after)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.justOOBDown() BCE (after)
   /// CHECK-NOT: Deoptimize
   private static int justOOBDown() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
@@ -835,66 +790,74 @@
   }
 
   /// CHECK-START: void Main.lowerOOB(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: void Main.lowerOOB(int[]) BCE (after)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.lowerOOB(int[]) BCE (after)
   /// CHECK-NOT: Deoptimize
   private static void lowerOOB(int[] x) {
+    // OOB!
     for (int i = -1; i < x.length; i++) {
       sResult += x[i];
     }
   }
 
   /// CHECK-START: void Main.upperOOB(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: void Main.upperOOB(int[]) BCE (after)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.upperOOB(int[]) BCE (after)
   /// CHECK-NOT: Deoptimize
   private static void upperOOB(int[] x) {
+    // OOB!
     for (int i = 0; i <= x.length; i++) {
       sResult += x[i];
     }
   }
 
   /// CHECK-START: void Main.doWhileUpOOB() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: void Main.doWhileUpOOB() BCE (after)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.doWhileUpOOB() BCE (after)
   /// CHECK-NOT: Deoptimize
   private static void doWhileUpOOB() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int i = 0;
+    // OOB!
     do {
       sResult += x[i++];
     } while (i <= x.length);
   }
 
   /// CHECK-START: void Main.doWhileDownOOB() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: void Main.doWhileDownOOB() BCE (after)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.doWhileDownOOB() BCE (after)
   /// CHECK-NOT: Deoptimize
   private static void doWhileDownOOB() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int i = x.length - 1;
+    // OOB!
     do {
       sResult += x[i--];
     } while (-1 <= i);
   }
 
   /// CHECK-START: int[] Main.multiply1() BCE (before)
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int[] Main.multiply1() BCE (after)
   /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
   /// CHECK-NOT: Deoptimize
   private static int[] multiply1() {
     int[] a = new int[10];
@@ -912,21 +875,20 @@
   }
 
   /// CHECK-START: int[] Main.multiply2() BCE (before)
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int[] Main.multiply2() BCE (after)
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int[] Main.multiply2() BCE (after)
+  /// CHECK-NOT: Deoptimize
   static int[] multiply2() {
     int[] a = new int[10];
     try {
       for (int i = -3; i <= 3; i++) {
         for (int j = -3; j <= 3; j++) {
           // Range [-9,9]: unsafe.
-         a[i * j] += 1;
+          a[i * j] += 1;
         }
       }
     } catch (Exception e) {
@@ -936,24 +898,19 @@
   }
 
   /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (before)
-  /// CHECK: StaticFieldGet
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: StaticFieldSet
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
   //
   /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (after)
-  /// CHECK: StaticFieldGet
-  /// CHECK-NOT: NullCheck
-  /// CHECK-NOT: ArrayLength
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: StaticFieldSet
-  /// CHECK: Exit
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
+  /// CHECK-DAG: ArrayGet    loop:{{B\d+}}
+  /// CHECK-DAG: Deoptimize  loop:none
+  //
+  /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (after)
+  /// CHECK-NOT: NullCheck   loop:{{B\d+}}
+  /// CHECK-NOT: ArrayLength loop:{{B\d+}}
+  /// CHECK-NOT: BoundsCheck loop:{{B\d+}}
   private static int linearDynamicBCE1(int[] x, int lo, int hi) {
     int result = 0;
     for (int i = lo; i < hi; i++) {
@@ -963,24 +920,19 @@
   }
 
   /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (before)
-  /// CHECK: StaticFieldGet
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: StaticFieldSet
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
   //
   /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (after)
-  /// CHECK: StaticFieldGet
-  /// CHECK-NOT: NullCheck
-  /// CHECK-NOT: ArrayLength
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: StaticFieldSet
-  /// CHECK: Exit
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
+  /// CHECK-DAG: ArrayGet    loop:{{B\d+}}
+  /// CHECK-DAG: Deoptimize  loop:none
+  //
+  /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (after)
+  /// CHECK-NOT: NullCheck   loop:{{B\d+}}
+  /// CHECK-NOT: ArrayLength loop:{{B\d+}}
+  /// CHECK-NOT: BoundsCheck loop:{{B\d+}}
   private static int linearDynamicBCE2(int[] x, int lo, int hi, int offset) {
     int result = 0;
     for (int i = lo; i < hi; i++) {
@@ -990,19 +942,19 @@
   }
 
   /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (before)
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
   //
   /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (after)
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK-NOT: NullCheck
-  /// CHECK-NOT: ArrayLength
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
+  /// CHECK-DAG: ArrayGet    loop:{{B\d+}}
+  /// CHECK-DAG: Deoptimize  loop:none
+  //
+  /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (after)
+  /// CHECK-NOT: NullCheck   loop:{{B\d+}}
+  /// CHECK-NOT: ArrayLength loop:{{B\d+}}
+  /// CHECK-NOT: BoundsCheck loop:{{B\d+}}
   private static int wrapAroundDynamicBCE(int[] x) {
     int w = 9;
     int result = 0;
@@ -1014,19 +966,19 @@
   }
 
   /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (before)
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
   //
   /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (after)
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK-NOT: NullCheck
-  /// CHECK-NOT: ArrayLength
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
+  /// CHECK-DAG: ArrayGet    loop:{{B\d+}}
+  /// CHECK-DAG: Deoptimize  loop:none
+  //
+  /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (after)
+  /// CHECK-NOT: NullCheck   loop:{{B\d+}}
+  /// CHECK-NOT: ArrayLength loop:{{B\d+}}
+  /// CHECK-NOT: BoundsCheck loop:{{B\d+}}
   private static int periodicDynamicBCE(int[] x) {
     int k = 0;
     int result = 0;
@@ -1038,20 +990,19 @@
   }
 
   /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (before)
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
   //
   /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
-  /// CHECK-NOT: NullCheck
-  /// CHECK-NOT: ArrayLength
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: Exit
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
+  /// CHECK-DAG: ArrayGet    loop:{{B\d+}}
+  /// CHECK-DAG: Deoptimize  loop:none
+  //
+  /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
+  /// CHECK-NOT: NullCheck   loop:{{B\d+}}
+  /// CHECK-NOT: ArrayLength loop:{{B\d+}}
+  /// CHECK-NOT: BoundsCheck loop:{{B\d+}}
   static int dynamicBCEPossiblyInfiniteLoop(int[] x, int lo, int hi) {
     // This loop could be infinite for hi = max int. Since i is also used
     // as subscript, however, dynamic bce can proceed.
@@ -1063,16 +1014,14 @@
   }
 
   /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (before)
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
   //
   /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
   /// CHECK-NOT: Deoptimize
   static int noDynamicBCEPossiblyInfiniteLoop(int[] x, int lo, int hi) {
     // As above, but now the index is not used as subscript,
@@ -1085,16 +1034,14 @@
   }
 
   /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (before)
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
   //
   /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (after)
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (after)
   /// CHECK-NOT: Deoptimize
   static int noDynamicBCEMixedInductionTypes(int[] x, long lo, long hi) {
     int result = 0;
@@ -1107,42 +1054,21 @@
   }
 
   /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (before)
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: NotEqual
-  /// CHECK: If
-  /// CHECK: If
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: If
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: {{l\d+}} ArrayGet loop:<<Loop:B\d+>>
+  /// CHECK-DAG: {{l\d+}} ArrayGet loop:<<Loop>>
+  /// CHECK-DAG: {{l\d+}} ArrayGet loop:<<Loop>>
   //
   /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (after)
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: NotEqual
-  /// CHECK: If
-  /// CHECK: If
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: If
-  /// CHECK: Deoptimize
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: Exit
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK-NOT: ArrayGet
+  //  Order matters:
+  /// CHECK:              Deoptimize loop:<<Loop:B\d+>>
+  //  CHECK-NOT:          Goto       loop:<<Loop>>
+  /// CHECK-DAG: {{l\d+}} ArrayGet   loop:<<Loop>>
+  /// CHECK-DAG: {{l\d+}} ArrayGet   loop:<<Loop>>
+  /// CHECK-DAG: {{l\d+}} ArrayGet   loop:<<Loop>>
+  /// CHECK:              Goto       loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
   static int dynamicBCEAndConstantIndices(int[] x, int[][] a, int lo, int hi) {
     // Deliberately test array length on a before the loop so that only bounds checks
     // on constant subscripts remain, making them a viable candidate for hoisting.
@@ -1166,80 +1092,73 @@
     return result;
   }
 
-  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], java.lang.Integer[], int, int) BCE (before)
-  /// CHECK: If
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllPrimTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], int, int) BCE (before)
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  //  For brevity, just test occurrence of at least one of each in the loop:
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
   //
-  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], java.lang.Integer[], int, int) BCE (after)
-  /// CHECK-DAG: If
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK-NOT: ArrayGet
-  /// CHECK-DAG: Exit
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: ArrayGet
-  static int dynamicBCEAndConstantIndicesAllTypes(int[] q,
-                                                  boolean[] r,
-                                                  byte[] s,
-                                                  char[] t,
-                                                  short[] u,
-                                                  int[] v,
-                                                  long[] w,
-                                                  float[] x,
-                                                  double[] y,
-                                                  Integer[] z, int lo, int hi) {
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllPrimTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], int, int) BCE (after)
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-NOT: ArrayGet    loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllPrimTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], int, int) BCE (after)
+  /// CHECK-NOT: NullCheck   loop:{{B\d+}}
+  /// CHECK-NOT: ArrayLength loop:{{B\d+}}
+  /// CHECK-NOT: BoundsCheck loop:{{B\d+}}
+  //
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllPrimTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], int, int) BCE (after)
+  /// CHECK-DAG: Deoptimize  loop:none
+  static int dynamicBCEAndConstantIndicesAllPrimTypes(int[] q,
+                                                      boolean[] r,
+                                                      byte[] s,
+                                                      char[] t,
+                                                      short[] u,
+                                                      int[] v,
+                                                      long[] w,
+                                                      float[] x,
+                                                      double[] y, int lo, int hi) {
     int result = 0;
     for (int i = lo; i < hi; i++) {
+      // All constant index array references can be hoisted out of the loop during BCE on q[i].
       result += q[i] + (r[0] ? 1 : 0) + (int) s[0] + (int) t[0] + (int) u[0] + (int) v[0] +
-                                        (int) w[0] + (int) x[0] + (int) y[0] + (int) z[0];
+                                        (int) w[0] + (int) x[0] + (int) y[0];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndexRefType(int[], java.lang.Integer[], int, int) BCE (before)
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndexRefType(int[], java.lang.Integer[], int, int) BCE (after)
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: Deoptimize  loop:none
+  //
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndexRefType(int[], java.lang.Integer[], int, int) BCE (after)
+  /// CHECK-NOT: ArrayLength loop:{{B\d+}}
+  /// CHECK-NOT: BoundsCheck loop:{{B\d+}}
+  static int dynamicBCEAndConstantIndexRefType(int[] q, Integer[] z, int lo, int hi) {
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      // Similar to above, but now implicit call to intValue() may prevent hoisting
+      // z[0] itself during BCE on q[i]. Therefore, we just check BCE on q[i].
+      result += q[i] + z[0];
     }
     return result;
   }
@@ -1501,9 +1420,10 @@
     long[] x6 = { 6 };
     float[] x7 = { 7 };
     double[] x8 = { 8 };
+    expectEquals(415,
+        dynamicBCEAndConstantIndicesAllPrimTypes(x, x1, x2, x3, x4, x5, x6, x7, x8, 0, 10));
     Integer[] x9 = { 9 };
-    expectEquals(505,
-        dynamicBCEAndConstantIndicesAllTypes(x, x1, x2, x3, x4, x5, x6, x7, x8, x9, 0, 10));
+    expectEquals(145, dynamicBCEAndConstantIndexRefType(x, x9, 0, 10));
   }
 
   private static void expectEquals(int expected, int result) {
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index baee7b3..f87326c 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -595,19 +595,16 @@
   /// CHECK-DAG:                       InstanceFieldSet [<<Obj>>,<<True>>]
   /// CHECK-DAG:                       InstanceFieldSet [<<Obj>>,<<Float8>>]
   /// CHECK-DAG:     <<GetTest:z\d+>>  InstanceFieldGet [<<Obj>>]
-  /// CHECK-DAG:                       If [<<GetTest>>]
   /// CHECK-DAG:     <<GetField:f\d+>> InstanceFieldGet [<<Obj>>]
-  /// CHECK-DAG:     <<Phi:f\d+>>      Phi [<<Float42>>,<<GetField>>]
-  /// CHECK-DAG:                       Return [<<Phi>>]
+  /// CHECK-DAG:     <<Select:f\d+>>   Select [<<Float42>>,<<GetField>>,<<GetTest>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
 
   /// CHECK-START: float Main.test24() load_store_elimination (after)
   /// CHECK-DAG:     <<True:i\d+>>     IntConstant 1
   /// CHECK-DAG:     <<Float8:f\d+>>   FloatConstant 8
   /// CHECK-DAG:     <<Float42:f\d+>>  FloatConstant 42
-  /// CHECK-DAG:     <<Obj:l\d+>>      NewInstance
-  /// CHECK-DAG:                       If [<<True>>]
-  /// CHECK-DAG:     <<Phi:f\d+>>      Phi [<<Float42>>,<<Float8>>]
-  /// CHECK-DAG:                       Return [<<Phi>>]
+  /// CHECK-DAG:     <<Select:f\d+>>   Select [<<Float42>>,<<Float8>>,<<True>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
 
   static float test24() {
     float a = 42.0f;
diff --git a/test/543-checker-dce-trycatch/smali/TestCase.smali b/test/543-checker-dce-trycatch/smali/TestCase.smali
index 1756fa4..62511df 100644
--- a/test/543-checker-dce-trycatch/smali/TestCase.smali
+++ b/test/543-checker-dce-trycatch/smali/TestCase.smali
@@ -15,6 +15,8 @@
 .class public LTestCase;
 .super Ljava/lang/Object;
 
+.field public static sField:I
+
 .method private static $inline$False()Z
     .registers 1
     const/4 v0, 0x0
@@ -240,24 +242,25 @@
     shr-int/2addr p2, p3
 
     :try_start
-    const v1, 0xa           # dead catch phi input, defined in entry block (HInstruction)
-    add-int v2, p0, p1      # dead catch phi input, defined in the dead block (HInstruction)
+    const v1, 0xa                  # dead catch phi input, defined in entry block (HInstruction)
+    add-int v2, p0, p1             # dead catch phi input, defined in the dead block (HInstruction)
     move v3, v2
     if-eqz v3, :define_phi
+    sput v3, LTestCase;->sField:I  # beat HSelect simplification (has side-effects, does not throw)
     const v3, 0xf
     :define_phi
-    # v3 = Phi [Add, 0xf]   # dead catch phi input, defined in the dead block (HPhi)
+    # v3 = Phi [Add, 0xf]          # dead catch phi input, defined in the dead block (HPhi)
     div-int/2addr p0, v2
 
     :else
-    const v1, 0xb           # live catch phi input
-    const v2, 0xc           # live catch phi input
-    const v3, 0x10          # live catch phi input
+    const v1, 0xb                  # live catch phi input
+    const v2, 0xc                  # live catch phi input
+    const v3, 0x10                 # live catch phi input
     div-int/2addr p0, p3
 
-    const v1, 0xd           # live catch phi input
-    const v2, 0xe           # live catch phi input
-    const v3, 0x11          # live catch phi input
+    const v1, 0xd                  # live catch phi input
+    const v2, 0xe                  # live catch phi input
+    const v3, 0x11                 # live catch phi input
     div-int/2addr p0, p1
     :try_end
     .catchall {:try_start .. :try_end} :catch_all
diff --git a/test/563-checker-fakestring/smali/TestCase.smali b/test/563-checker-fakestring/smali/TestCase.smali
index 4bd804d..54312a4 100644
--- a/test/563-checker-fakestring/smali/TestCase.smali
+++ b/test/563-checker-fakestring/smali/TestCase.smali
@@ -124,3 +124,59 @@
    return-object v0
 
 .end method
+
+# Test that the compiler does not assume that the first argument of String.<init>
+# is a NewInstance by inserting an irreducible loop between them (b/26676472).
+
+# We verify the type of the input instruction (Phi) in debuggable mode, because
+# it is eliminated by later stages of SsaBuilder otherwise.
+
+## CHECK-START-DEBUGGABLE: java.lang.String TestCase.thisNotNewInstance1(byte[], boolean) register (after)
+## CHECK-DAG:                   InvokeStaticOrDirect env:[[<<Phi:l\d+>>,{{.*]]}}
+## CHECK-DAG:     <<Phi>>       Phi
+
+.method public static thisNotNewInstance1([BZ)Ljava/lang/String;
+   .registers 5
+
+   new-instance v0, Ljava/lang/String;
+
+   # Irreducible loop
+   if-eqz p1, :loop_entry
+   :loop_header
+   const v1, 0x1
+   xor-int p1, p1, v1
+   :loop_entry
+   if-eqz p1, :string_init
+   goto :loop_header
+
+   :string_init
+   const-string v1, "UTF8"
+   invoke-direct {v0, p0, v1}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+   return-object v0
+
+.end method
+
+## CHECK-START-DEBUGGABLE: java.lang.String TestCase.thisNotNewInstance2(byte[], boolean) register (after)
+## CHECK-DAG:                   InvokeStaticOrDirect env:[[<<Phi:l\d+>>,{{.*]]}}
+## CHECK-DAG:     <<Phi>>       Phi
+
+.method public static thisNotNewInstance2([BZ)Ljava/lang/String;
+   .registers 5
+
+   new-instance v0, Ljava/lang/String;
+
+   # Irreducible loop
+   if-eqz p1, :loop_entry
+   :loop_header
+   if-eqz p1, :string_init
+   :loop_entry
+   const v1, 0x1
+   xor-int p1, p1, v1
+   goto :loop_header
+
+   :string_init
+   const-string v1, "UTF8"
+   invoke-direct {v0, p0, v1}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+   return-object v0
+
+.end method
diff --git a/test/563-checker-fakestring/src/Main.java b/test/563-checker-fakestring/src/Main.java
index 04df0f6..1ac8a5b 100644
--- a/test/563-checker-fakestring/src/Main.java
+++ b/test/563-checker-fakestring/src/Main.java
@@ -63,5 +63,20 @@
       String result = (String) m.invoke(null, new Object[] { testData });
       assertEqual(testString, result);
     }
+
+    {
+      Method m = c.getMethod("thisNotNewInstance1", byte[].class, boolean.class);
+      String result = (String) m.invoke(null, new Object[] { testData, true });
+      assertEqual(testString, result);
+      result = (String) m.invoke(null, new Object[] { testData, false });
+      assertEqual(testString, result);
+    }
+    {
+      Method m = c.getMethod("thisNotNewInstance2", byte[].class, boolean.class);
+      String result = (String) m.invoke(null, new Object[] { testData, true });
+      assertEqual(testString, result);
+      result = (String) m.invoke(null, new Object[] { testData, false });
+      assertEqual(testString, result);
+    }
   }
 }
diff --git a/test/564-checker-bitcount/expected.txt b/test/564-checker-bitcount/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/564-checker-bitcount/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/564-checker-bitcount/info.txt b/test/564-checker-bitcount/info.txt
new file mode 100644
index 0000000..57db66b
--- /dev/null
+++ b/test/564-checker-bitcount/info.txt
@@ -0,0 +1 @@
+Unit test for 32-bit and 64-bit bit count operation.
diff --git a/test/564-checker-bitcount/src/Main.java b/test/564-checker-bitcount/src/Main.java
new file mode 100644
index 0000000..2683b25
--- /dev/null
+++ b/test/564-checker-bitcount/src/Main.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // TODO: make something like this work when b/26700769 is done.
+  // CHECK-START-X86_64: int Main.bits32(int) disassembly (after)
+  // CHECK-DAG: popcnt
+
+  /// CHECK-START: int Main.bits32(int) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerBitCount
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static int bits32(int x) {
+    return Integer.bitCount(x);
+  }
+
+  /// CHECK-START: int Main.bits64(long) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect intrinsic:LongBitCount
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static int bits64(long x) {
+    return Long.bitCount(x);
+  }
+
+  public static void main(String args[]) {
+    expectEquals32(bits32(0x00000000), 0);
+    expectEquals32(bits32(0x00000001), 1);
+    expectEquals32(bits32(0x10000000), 1);
+    expectEquals32(bits32(0x10000001), 2);
+    expectEquals32(bits32(0x00000003), 2);
+    expectEquals32(bits32(0x70000000), 3);
+    expectEquals32(bits32(0x000F0000), 4);
+    expectEquals32(bits32(0x00001111), 4);
+    expectEquals32(bits32(0x11110000), 4);
+    expectEquals32(bits32(0x11111111), 8);
+    expectEquals32(bits32(0x12345678), 13);
+    expectEquals32(bits32(0x9ABCDEF0), 19);
+    expectEquals32(bits32(0xFFFFFFFF), 32);
+
+    for (int i = 0; i < 32; i++) {
+      expectEquals32(bits32(1 << i), 1);
+    }
+
+    expectEquals64(bits64(0x0000000000000000L), 0);
+    expectEquals64(bits64(0x0000000000000001L), 1);
+    expectEquals64(bits64(0x1000000000000000L), 1);
+    expectEquals64(bits64(0x1000000000000001L), 2);
+    expectEquals64(bits64(0x0000000000000003L), 2);
+    expectEquals64(bits64(0x7000000000000000L), 3);
+    expectEquals64(bits64(0x000F000000000000L), 4);
+    expectEquals64(bits64(0x0000000011111111L), 8);
+    expectEquals64(bits64(0x1111111100000000L), 8);
+    expectEquals64(bits64(0x1111111111111111L), 16);
+    expectEquals64(bits64(0x123456789ABCDEF1L), 33);
+    expectEquals64(bits64(0xFFFFFFFFFFFFFFFFL), 64);
+
+    for (int i = 0; i < 64; i++) {
+      expectEquals64(bits64(1L << i), 1);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals32(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+  private static void expectEquals64(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/564-checker-inline-loop/expected.txt b/test/564-checker-inline-loop/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/564-checker-inline-loop/expected.txt
diff --git a/test/564-checker-inline-loop/info.txt b/test/564-checker-inline-loop/info.txt
new file mode 100644
index 0000000..a590bc6
--- /dev/null
+++ b/test/564-checker-inline-loop/info.txt
@@ -0,0 +1 @@
+Tests inlining of loops in the optimizing compiler.
diff --git a/test/564-checker-inline-loop/src/Main.java b/test/564-checker-inline-loop/src/Main.java
new file mode 100644
index 0000000..6929913
--- /dev/null
+++ b/test/564-checker-inline-loop/src/Main.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: int Main.inlineLoop() inliner (before)
+  /// CHECK-DAG:     <<Invoke:i\d+>>  InvokeStaticOrDirect
+  /// CHECK-DAG:                      Return [<<Invoke>>]
+
+  /// CHECK-START: int Main.inlineLoop() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  /// CHECK-START: int Main.inlineLoop() inliner (after)
+  /// CHECK-DAG:     <<Constant:i\d+>>   IntConstant 42
+  /// CHECK-DAG:                         Return [<<Constant>>]
+
+  /// CHECK-START: int Main.inlineLoop() licm (after)
+  /// CHECK:                         Goto loop:{{B\d+}}
+
+  public static int inlineLoop() {
+    return loopMethod();
+  }
+
+  /// CHECK-START: void Main.inlineWithinLoop() inliner (before)
+  /// CHECK:      InvokeStaticOrDirect
+
+  /// CHECK-START: void Main.inlineWithinLoop() inliner (after)
+  /// CHECK-NOT:  InvokeStaticOrDirect
+
+  /// CHECK-START: void Main.inlineWithinLoop() licm (after)
+  /// CHECK-DAG:  Goto loop:<<OuterLoop:B\d+>> outer_loop:none
+  /// CHECK-DAG:  Goto outer_loop:<<OuterLoop>>
+
+  public static void inlineWithinLoop() {
+    while (doLoop) {
+      loopMethod();
+    }
+  }
+
+  public static int loopMethod() {
+    while (doLoop) {}
+    return 42;
+  }
+
+  public static boolean doLoop = false;
+
+  public static void main(String[] args) {
+    inlineLoop();
+    inlineWithinLoop();
+  }
+}
diff --git a/test/564-checker-irreducible-loop/expected.txt b/test/564-checker-irreducible-loop/expected.txt
new file mode 100644
index 0000000..d81cc07
--- /dev/null
+++ b/test/564-checker-irreducible-loop/expected.txt
@@ -0,0 +1 @@
+42
diff --git a/test/564-checker-irreducible-loop/info.txt b/test/564-checker-irreducible-loop/info.txt
new file mode 100644
index 0000000..1e0dd02
--- /dev/null
+++ b/test/564-checker-irreducible-loop/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing in the presence of
+an irreducible loop.
diff --git a/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..b82ed92
--- /dev/null
+++ b/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -0,0 +1,61 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+## CHECK-START-X86: int IrreducibleLoop.simpleLoop(int) dead_code_elimination (before)
+## CHECK-DAG: <<Method:(i|j)\d+>> CurrentMethod
+## CHECK-DAG: <<Constant:i\d+>>   IntConstant 42
+## CHECK-DAG:                     InvokeStaticOrDirect [<<Constant>>,<<Method>>] loop:{{B\d+}} irreducible:true
+## CHECK-DAG:                     InvokeStaticOrDirect [<<Constant>>,<<Method>>] loop:none
+.method public static simpleLoop(I)I
+   .registers 3
+   const/16 v0, 42
+   if-eqz p0, :loop_entry
+   goto :other_loop_pre_entry
+
+   # The then part: beginning of the irreducible loop.
+   :loop_entry
+   if-nez p0, :exit
+   invoke-static {v0},LIrreducibleLoop;->$noinline$m(I)V
+   :other_loop_entry
+   goto :loop_entry
+
+   # The else part: a block uses the ArtMethod and branches to
+   # a block that doesn't. The register allocator used to trip there, as the
+   # ArtMethod was a live_in of the last block before the loop, but did not have
+   # a location due to our liveness analysis.
+   :other_loop_pre_entry
+   if-eqz p0, :other_loop_entry
+   invoke-static {v0},LIrreducibleLoop;->$noinline$m(I)V
+   goto :other_loop_entry
+
+   :exit
+   return v0
+.end method
+
+.method public static $noinline$m(I)V
+   .registers 3
+   const/16 v0, 0
+   sget-boolean v1,LIrreducibleLoop;->doThrow:Z
+   if-eqz v1, :exit
+   # Prevent inlining.
+   throw v0
+   :exit
+   return-void
+.end method
+
+.field public static doThrow:Z
diff --git a/test/564-checker-irreducible-loop/src/Main.java b/test/564-checker-irreducible-loop/src/Main.java
new file mode 100644
index 0000000..94e3357
--- /dev/null
+++ b/test/564-checker-irreducible-loop/src/Main.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    Method m = c.getMethod("simpleLoop", int.class);
+    Object[] arguments = { 42 };
+    System.out.println(m.invoke(null, arguments));
+  }
+}
diff --git a/test/565-checker-condition-liveness/expected.txt b/test/565-checker-condition-liveness/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/565-checker-condition-liveness/expected.txt
diff --git a/test/565-checker-condition-liveness/info.txt b/test/565-checker-condition-liveness/info.txt
new file mode 100644
index 0000000..67b6ceb
--- /dev/null
+++ b/test/565-checker-condition-liveness/info.txt
@@ -0,0 +1 @@
+Test the use positions of inputs of non-materialized conditions.
\ No newline at end of file
diff --git a/test/565-checker-condition-liveness/src/Main.java b/test/565-checker-condition-liveness/src/Main.java
new file mode 100644
index 0000000..a811e5b
--- /dev/null
+++ b/test/565-checker-condition-liveness/src/Main.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: void Main.main(java.lang.String[]) liveness (after)
+  /// CHECK:         <<X:i\d+>>    ArrayLength uses:[<<UseInput:\d+>>]
+  /// CHECK:         <<Y:i\d+>>    StaticFieldGet uses:[<<UseInput>>]
+  /// CHECK:         <<Cond:z\d+>> LessThanOrEqual [<<X>>,<<Y>>]
+  /// CHECK-NEXT:                  If [<<Cond>>] liveness:<<LivIf:\d+>>
+  /// CHECK-EVAL:    <<UseInput>> == <<LivIf>> + 1
+
+  public static void main(String[] args) {
+    int x = args.length;
+    int y = field;
+    if (x > y) {
+      System.nanoTime();
+    }
+  }
+
+  public static int field = 42;
+}
diff --git a/test/565-checker-irreducible-loop/expected.txt b/test/565-checker-irreducible-loop/expected.txt
new file mode 100644
index 0000000..6ed281c
--- /dev/null
+++ b/test/565-checker-irreducible-loop/expected.txt
@@ -0,0 +1,2 @@
+1
+1
diff --git a/test/565-checker-irreducible-loop/info.txt b/test/565-checker-irreducible-loop/info.txt
new file mode 100644
index 0000000..1e0dd02
--- /dev/null
+++ b/test/565-checker-irreducible-loop/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing in the presence of
+an irreducible loop.
diff --git a/test/565-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/565-checker-irreducible-loop/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..29547ca
--- /dev/null
+++ b/test/565-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -0,0 +1,101 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+# Check that both the irreducible loop and the other loop entry
+# move the constant-folded value to where it's expected.
+
+## CHECK-START-X86: int IrreducibleLoop.test1(int, long) register (after)
+## CHECK-DAG:                     ParallelMove {{.*84->.*}} loop:none
+## CHECK-DAG:                     ParallelMove {{.*84->.*}} loop:{{B\d+}} irreducible:true
+.method public static test1(IJ)I
+   .registers 10
+   const/16 v6, 2
+   const/16 v4, 1
+   const-wide/16 v0, 42
+   add-long v2, v0, v0
+
+   if-eqz p0, :loop_entry
+   goto :other_loop_pre_entry
+
+   # The then part: beginning of the irreducible loop.
+   :loop_entry
+   if-eqz p0, :exit
+   cmp-long v6, v2, p1
+   :other_loop_entry
+   sub-int p0, p0, v4
+   goto :loop_entry
+
+   # The other block branching to the irreducible loop.
+   # In that block, v4 has no live range.
+   :other_loop_pre_entry
+   goto :other_loop_entry
+
+   :exit
+   return v6
+.end method
+
+# Check that the compiler does not crash when
+# a live interval is found while connecting siblings, but that
+# live interval is inactive at the desired position.
+
+## CHECK-START-X86: int IrreducibleLoop.test2(int, long) register (after)
+## CHECK-DAG:                     ParallelMove {{.*84->.*}} loop:none
+## CHECK-DAG:                     ParallelMove {{.*84->.*}} loop:{{B\d+}} irreducible:true
+.method public static test2(IJ)I
+   .registers 14
+   const/16 v6, 2
+   const/16 v4, 1
+   const-wide/16 v0, 42
+   const-wide/16 v8, 68
+   add-long v2, v0, v0
+
+   if-eqz p0, :loop_entry
+   goto :other_loop_pre_entry
+
+   # The then part: beginning of the irreducible loop.
+   :loop_entry
+   if-eqz p0, :exit
+   cmp-long v6, v2, p1
+   :other_loop_entry
+   sub-int p0, p0, v4
+   goto :loop_entry
+
+   # The other block branching to the irreducible loop.
+   :other_loop_pre_entry
+   # Make v2 have a register location.
+   sput-wide v2, LIrreducibleLoop;->myField:J
+   # Stress register allocator on x86 to split v2.
+   sput-wide v0, LIrreducibleLoop;->myField:J
+   sput-wide p1, LIrreducibleLoop;->myField:J
+   sput-wide v8, LIrreducibleLoop;->myField:J
+   if-eqz p0, :join
+   # Stress register allocator on x86 to split v2.
+   sput-wide p1, LIrreducibleLoop;->myField:J
+   sput-wide v8, LIrreducibleLoop;->myField:J
+   sput-wide v0, LIrreducibleLoop;->myField:J
+   # Last use of v2 before the irreducible loop, that
+   # will create an interval hole.
+   sput-wide v2, LIrreducibleLoop;->myField:J
+   :join
+   goto :other_loop_entry
+
+   :exit
+   return v6
+.end method
+
+.field public static volatile myField:J
diff --git a/test/565-checker-irreducible-loop/src/Main.java b/test/565-checker-irreducible-loop/src/Main.java
new file mode 100644
index 0000000..e48bd6b
--- /dev/null
+++ b/test/565-checker-irreducible-loop/src/Main.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    {
+      Method m = c.getMethod("test1", int.class, long.class);
+      Object[] arguments = { 42, 31L };
+      System.out.println(m.invoke(null, arguments));
+    }
+
+    {
+      Method m = c.getMethod("test2", int.class, long.class);
+      Object[] arguments = { 42, 31L };
+      System.out.println(m.invoke(null, arguments));
+    }
+  }
+}
diff --git a/test/565-checker-rotate/expected.txt b/test/565-checker-rotate/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/565-checker-rotate/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/565-checker-rotate/info.txt b/test/565-checker-rotate/info.txt
new file mode 100644
index 0000000..c6a8091
--- /dev/null
+++ b/test/565-checker-rotate/info.txt
@@ -0,0 +1 @@
+Unit test for 32-bit and 64-bit rotate operations.
diff --git a/test/565-checker-rotate/src/Main.java b/test/565-checker-rotate/src/Main.java
new file mode 100644
index 0000000..33bbe02
--- /dev/null
+++ b/test/565-checker-rotate/src/Main.java
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: int Main.rotateLeft32(int, int) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerRotateLeft
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static int rotateLeft32(int x, int y) {
+    return Integer.rotateLeft(x, y);
+  }
+
+  /// CHECK-START: long Main.rotateLeft64(long, int) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:j\d+>> InvokeStaticOrDirect intrinsic:LongRotateLeft
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static long rotateLeft64(long x, int y) {
+    return Long.rotateLeft(x, y);
+  }
+
+  /// CHECK-START: int Main.rotateRight32(int, int) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerRotateRight
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static int rotateRight32(int x, int y) {
+    return Integer.rotateRight(x, y);
+  }
+
+  /// CHECK-START: long Main.rotateRight64(long, int) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:j\d+>> InvokeStaticOrDirect intrinsic:LongRotateRight
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static long rotateRight64(long x, int y) {
+    return Long.rotateRight(x, y);
+  }
+
+  public static void main(String args[]) {
+    expectEquals32(0x00000001, rotateLeft32(0x00000001, 0));
+    expectEquals32(0x00000002, rotateLeft32(0x00000001, 1));
+    expectEquals32(0x80000000, rotateLeft32(0x00000001, 31));
+    expectEquals32(0x00000001, rotateLeft32(0x00000001, 32));  // overshoot
+    expectEquals32(0x00000003, rotateLeft32(0x80000001, 1));
+    expectEquals32(0x00000006, rotateLeft32(0x80000001, 2));
+    expectEquals32(0x23456781, rotateLeft32(0x12345678, 4));
+    expectEquals32(0xBCDEF09A, rotateLeft32(0x9ABCDEF0, 8));
+    for (int i = 0; i < 40; i++) {  // overshoot a bit
+      int j = i & 31;
+      expectEquals32(0x00000000, rotateLeft32(0x00000000, i));
+      expectEquals32(0xFFFFFFFF, rotateLeft32(0xFFFFFFFF, i));
+      expectEquals32(1 << j, rotateLeft32(0x00000001, i));
+      expectEquals32((0x12345678 << j) | (0x12345678 >>> -j),
+                     rotateLeft32(0x12345678, i));
+    }
+
+    expectEquals64(0x0000000000000001L, rotateLeft64(0x0000000000000001L, 0));
+    expectEquals64(0x0000000000000002L, rotateLeft64(0x0000000000000001L, 1));
+    expectEquals64(0x8000000000000000L, rotateLeft64(0x0000000000000001L, 63));
+    expectEquals64(0x0000000000000001L, rotateLeft64(0x0000000000000001L, 64));  // overshoot
+    expectEquals64(0x0000000000000003L, rotateLeft64(0x8000000000000001L, 1));
+    expectEquals64(0x0000000000000006L, rotateLeft64(0x8000000000000001L, 2));
+    expectEquals64(0x23456789ABCDEF01L, rotateLeft64(0x123456789ABCDEF0L, 4));
+    expectEquals64(0x3456789ABCDEF012L, rotateLeft64(0x123456789ABCDEF0L, 8));
+    for (int i = 0; i < 70; i++) {  // overshoot a bit
+      int j = i & 63;
+      expectEquals64(0x0000000000000000L, rotateLeft64(0x0000000000000000L, i));
+      expectEquals64(0xFFFFFFFFFFFFFFFFL, rotateLeft64(0xFFFFFFFFFFFFFFFFL, i));
+      expectEquals64(1L << j, rotateLeft64(0x0000000000000001, i));
+      expectEquals64((0x123456789ABCDEF0L << j) | (0x123456789ABCDEF0L >>> -j),
+                     rotateLeft64(0x123456789ABCDEF0L, i));
+    }
+
+    expectEquals32(0x80000000, rotateRight32(0x80000000, 0));
+    expectEquals32(0x40000000, rotateRight32(0x80000000, 1));
+    expectEquals32(0x00000001, rotateRight32(0x80000000, 31));
+    expectEquals32(0x80000000, rotateRight32(0x80000000, 32));  // overshoot
+    expectEquals32(0xC0000000, rotateRight32(0x80000001, 1));
+    expectEquals32(0x60000000, rotateRight32(0x80000001, 2));
+    expectEquals32(0x81234567, rotateRight32(0x12345678, 4));
+    expectEquals32(0xF09ABCDE, rotateRight32(0x9ABCDEF0, 8));
+    for (int i = 0; i < 40; i++) {  // overshoot a bit
+      int j = i & 31;
+      expectEquals32(0x00000000, rotateRight32(0x00000000, i));
+      expectEquals32(0xFFFFFFFF, rotateRight32(0xFFFFFFFF, i));
+      expectEquals32(0x80000000 >>> j, rotateRight32(0x80000000, i));
+      expectEquals32((0x12345678 >>> j) | (0x12345678 << -j),
+                     rotateRight32(0x12345678, i));
+    }
+
+    expectEquals64(0x8000000000000000L, rotateRight64(0x8000000000000000L, 0));
+    expectEquals64(0x4000000000000000L, rotateRight64(0x8000000000000000L, 1));
+    expectEquals64(0x0000000000000001L, rotateRight64(0x8000000000000000L, 63));
+    expectEquals64(0x8000000000000000L, rotateRight64(0x8000000000000000L, 64));  // overshoot
+    expectEquals64(0xC000000000000000L, rotateRight64(0x8000000000000001L, 1));
+    expectEquals64(0x6000000000000000L, rotateRight64(0x8000000000000001L, 2));
+    expectEquals64(0x0123456789ABCDEFL, rotateRight64(0x123456789ABCDEF0L, 4));
+    expectEquals64(0xF0123456789ABCDEL, rotateRight64(0x123456789ABCDEF0L, 8));
+    for (int i = 0; i < 70; i++) {  // overshoot a bit
+      int j = i & 63;
+      expectEquals64(0x0000000000000000L, rotateRight64(0x0000000000000000L, i));
+      expectEquals64(0xFFFFFFFFFFFFFFFFL, rotateRight64(0xFFFFFFFFFFFFFFFFL, i));
+      expectEquals64(0x8000000000000000L >>> j, rotateRight64(0x8000000000000000L, i));
+      expectEquals64((0x123456789ABCDEF0L >>> j) | (0x123456789ABCDEF0L << -j),
+                     rotateRight64(0x123456789ABCDEF0L, i));
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals32(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+  private static void expectEquals64(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/566-checker-codegen-select/expected.txt b/test/566-checker-codegen-select/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/566-checker-codegen-select/expected.txt
diff --git a/test/566-checker-codegen-select/info.txt b/test/566-checker-codegen-select/info.txt
new file mode 100644
index 0000000..67b6ceb
--- /dev/null
+++ b/test/566-checker-codegen-select/info.txt
@@ -0,0 +1 @@
+Test the use positions of inputs of non-materialized conditions.
\ No newline at end of file
diff --git a/test/566-checker-codegen-select/src/Main.java b/test/566-checker-codegen-select/src/Main.java
new file mode 100644
index 0000000..edb31e6
--- /dev/null
+++ b/test/566-checker-codegen-select/src/Main.java
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: long Main.$noinline$longSelect(long) register (before)
+  /// CHECK:         <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
+  /// CHECK-NEXT:                  Select [{{j\d+}},{{j\d+}},<<Cond>>]
+
+  // Condition must be materialized on X86 because it would need too many
+  // registers otherwise.
+  /// CHECK-START-X86: long Main.$noinline$longSelect(long) disassembly (after)
+  /// CHECK:             LessThanOrEqual
+  /// CHECK-NEXT:          cmp
+  /// CHECK:             Select
+
+  public long $noinline$longSelect(long param) {
+    if (doThrow) { throw new Error(); }
+    long val_true = longB;
+    long val_false = longC;
+    return (param > longA) ? val_true : val_false;
+  }
+
+  /// CHECK-START: long Main.$noinline$longSelect_Constant(long) register (before)
+  /// CHECK:         <<Const:j\d+>> LongConstant
+  /// CHECK:         <<Cond:z\d+>>  LessThanOrEqual [{{j\d+}},<<Const>>]
+  /// CHECK-NEXT:                   Select [{{j\d+}},{{j\d+}},<<Cond>>]
+
+  // Condition can be non-materialized on X86 because the condition does not
+  // request 4 registers any more.
+  /// CHECK-START-X86: long Main.$noinline$longSelect_Constant(long) disassembly (after)
+  /// CHECK:             LessThanOrEqual
+  /// CHECK-NEXT:        Select
+
+  public long $noinline$longSelect_Constant(long param) {
+    if (doThrow) { throw new Error(); }
+    long val_true = longB;
+    long val_false = longC;
+    return (param > 3L) ? val_true : val_false;
+  }
+
+  public static void main(String[] args) {
+    Main m = new Main();
+    assertLongEquals(5L, m.$noinline$longSelect(4L));
+    assertLongEquals(7L, m.$noinline$longSelect(2L));
+    assertLongEquals(5L, m.$noinline$longSelect_Constant(4L));
+    assertLongEquals(7L, m.$noinline$longSelect_Constant(2L));
+  }
+
+  public static void assertLongEquals(long expected, long actual) {
+    if (expected != actual) {
+      throw new Error(expected + " != " + actual);
+    }
+  }
+
+  public boolean doThrow = false;
+
+  public long longA = 3L;
+  public long longB = 5L;
+  public long longC = 7L;
+}
diff --git a/test/566-checker-signum/expected.txt b/test/566-checker-signum/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/566-checker-signum/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/566-checker-signum/info.txt b/test/566-checker-signum/info.txt
new file mode 100644
index 0000000..328e494
--- /dev/null
+++ b/test/566-checker-signum/info.txt
@@ -0,0 +1 @@
+Unit test for 32-bit and 64-bit signum operations.
diff --git a/test/566-checker-signum/src/Main.java b/test/566-checker-signum/src/Main.java
new file mode 100644
index 0000000..cc4a984
--- /dev/null
+++ b/test/566-checker-signum/src/Main.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: int Main.sign32(int) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerSignum
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static int sign32(int x) {
+    return Integer.signum(x);
+  }
+
+  /// CHECK-START: int Main.sign64(long) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect intrinsic:LongSignum
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static int sign64(long x) {
+    return Long.signum(x);
+  }
+
+  public static void main(String args[]) {
+    expectEquals(-1, sign32(Integer.MIN_VALUE));
+    expectEquals(-1, sign32(-12345));
+    expectEquals(-1, sign32(-1));
+    expectEquals(0, sign32(0));
+    expectEquals(1, sign32(1));
+    expectEquals(1, sign32(12345));
+    expectEquals(1, sign32(Integer.MAX_VALUE));
+
+    for (int i = -11; i <= 11; i++) {
+      int expected = 0;
+      if (i < 0) expected = -1;
+      else if (i > 0) expected = 1;
+      expectEquals(expected, sign32(i));
+    }
+
+    expectEquals(-1, sign64(Long.MIN_VALUE));
+    expectEquals(-1, sign64(-12345L));
+    expectEquals(-1, sign64(-1L));
+    expectEquals(0, sign64(0L));
+    expectEquals(1, sign64(1L));
+    expectEquals(1, sign64(12345L));
+    expectEquals(1, sign64(Long.MAX_VALUE));
+
+    for (long i = -11L; i <= 11L; i++) {
+      int expected = 0;
+      if (i < 0) expected = -1;
+      else if (i > 0) expected = 1;
+      expectEquals(expected, sign64(i));
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/566-polymorphic-inlining/expected.txt b/test/566-polymorphic-inlining/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/566-polymorphic-inlining/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/566-polymorphic-inlining/info.txt b/test/566-polymorphic-inlining/info.txt
new file mode 100644
index 0000000..1a47f89
--- /dev/null
+++ b/test/566-polymorphic-inlining/info.txt
@@ -0,0 +1 @@
+Test polymorphic inlining.
diff --git a/test/566-polymorphic-inlining/polymorphic_inline.cc b/test/566-polymorphic-inlining/polymorphic_inline.cc
new file mode 100644
index 0000000..5801b36
--- /dev/null
+++ b/test/566-polymorphic-inlining/polymorphic_inline.cc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "art_method.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
+#include "oat_quick_method_header.h"
+#include "scoped_thread_state_change.h"
+#include "stack_map.h"
+
+namespace art {
+
+static void do_checks(jclass cls, const char* method_name) {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::Class* klass = soa.Decode<mirror::Class*>(cls);
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  jit::JitCodeCache* code_cache = jit->GetCodeCache();
+  ArtMethod* method = klass->FindDeclaredDirectMethodByName(method_name, sizeof(void*));
+  OatQuickMethodHeader* header = OatQuickMethodHeader::FromEntryPoint(
+      method->GetEntryPointFromQuickCompiledCode());
+  CHECK(code_cache->ContainsPc(header->GetCode()));
+
+  CodeInfo info = header->GetOptimizedCodeInfo();
+  CHECK(info.HasInlineInfo());
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_ensureJittedAndPolymorphicInline(JNIEnv*, jclass cls) {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit == nullptr) {
+    return;
+  }
+
+  do_checks(cls, "testInvokeVirtual");
+  do_checks(cls, "testInvokeInterface");
+}
+
+}  // namespace art
diff --git a/test/566-polymorphic-inlining/src/Main.java b/test/566-polymorphic-inlining/src/Main.java
new file mode 100644
index 0000000..7283e86
--- /dev/null
+++ b/test/566-polymorphic-inlining/src/Main.java
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+interface Itf {
+  public Class sameInvokeInterface();
+}
+
+public class Main implements Itf {
+  public static void assertEquals(Object expected, Object actual) {
+    if (expected != actual) {
+      throw new Error("Expected " + expected  + ", got " + actual);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[0]);
+    Main[] mains = new Main[3];
+    Itf[] itfs = new Itf[3];
+    itfs[0] = mains[0] = new Main();
+    itfs[1] = mains[1] = new Subclass();
+    itfs[2] = mains[2] = new OtherSubclass();
+
+    // Make testInvokeVirtual and testInvokeInterface hot to get them jitted.
+    // We pass Main and Subclass to get polymorphic inlining based on calling
+    // the same method.
+    for (int i = 0; i < 10000; ++i) {
+      testInvokeVirtual(mains[0]);
+      testInvokeVirtual(mains[1]);
+      testInvokeInterface(itfs[0]);
+      testInvokeInterface(itfs[1]);
+    }
+
+    ensureJittedAndPolymorphicInline();
+
+    // At this point, the JIT should have compiled both methods, and inline
+    // sameInvokeVirtual and sameInvokeInterface.
+    assertEquals(Main.class, testInvokeVirtual(mains[0]));
+    assertEquals(Main.class, testInvokeVirtual(mains[1]));
+
+    assertEquals(Itf.class, testInvokeInterface(itfs[0]));
+    assertEquals(Itf.class, testInvokeInterface(itfs[1]));
+
+    // This will trigger a deoptimization of the compiled code.
+    assertEquals(OtherSubclass.class, testInvokeVirtual(mains[2]));
+    assertEquals(OtherSubclass.class, testInvokeInterface(itfs[2]));
+  }
+
+  public Class sameInvokeVirtual() {
+    field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo
+    return Main.class;
+  }
+
+  public Class sameInvokeInterface() {
+    field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo
+    return Itf.class;
+  }
+
+  public static Class testInvokeInterface(Itf i) {
+    return i.sameInvokeInterface();
+  }
+
+  public static Class testInvokeVirtual(Main m) {
+    return m.sameInvokeVirtual();
+  }
+
+  public Object field = new Object();
+
+  public static native void ensureJittedAndPolymorphicInline();
+}
+
+class Subclass extends Main {
+}
+
+class OtherSubclass extends Main {
+  public Class sameInvokeVirtual() {
+    return OtherSubclass.class;
+  }
+
+  public Class sameInvokeInterface() {
+    return OtherSubclass.class;
+  }
+}
diff --git a/test/567-checker-compare/expected.txt b/test/567-checker-compare/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/567-checker-compare/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/567-checker-compare/info.txt b/test/567-checker-compare/info.txt
new file mode 100644
index 0000000..5bac7b1
--- /dev/null
+++ b/test/567-checker-compare/info.txt
@@ -0,0 +1 @@
+Unit test for 32-bit and 64-bit compare operations.
diff --git a/test/567-checker-compare/src/Main.java b/test/567-checker-compare/src/Main.java
new file mode 100644
index 0000000..52abb75
--- /dev/null
+++ b/test/567-checker-compare/src/Main.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: int Main.compare32(int, int) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerCompare
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static int compare32(int x, int y) {
+    return Integer.compare(x, y);
+  }
+
+  /// CHECK-START: int Main.compare64(long, long) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect intrinsic:LongCompare
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static int compare64(long x, long y) {
+    return Long.compare(x, y);
+  }
+
+  public static void main(String args[]) {
+    expectEquals(-1, compare32(Integer.MIN_VALUE, Integer.MIN_VALUE + 1));
+    expectEquals(-1, compare32(Integer.MIN_VALUE, -1));
+    expectEquals(-1, compare32(Integer.MIN_VALUE, 0));
+    expectEquals(-1, compare32(Integer.MIN_VALUE, 1));
+    expectEquals(-1, compare32(Integer.MIN_VALUE, Integer.MAX_VALUE));
+    expectEquals(-1, compare32(-1, 0));
+    expectEquals(-1, compare32(-1, 1));
+    expectEquals(-1, compare32(0, 1));
+
+    expectEquals(0, compare32(Integer.MIN_VALUE, Integer.MIN_VALUE));
+    expectEquals(0, compare32(-1, -1));
+    expectEquals(0, compare32(0, 0));
+    expectEquals(0, compare32(1, 1));
+    expectEquals(0, compare32(Integer.MAX_VALUE, Integer.MAX_VALUE));
+
+    expectEquals(1, compare32(0, -1));
+    expectEquals(1, compare32(1, -1));
+    expectEquals(1, compare32(1, 0));
+    expectEquals(1, compare32(Integer.MAX_VALUE, Integer.MIN_VALUE));
+    expectEquals(1, compare32(Integer.MAX_VALUE, -1));
+    expectEquals(1, compare32(Integer.MAX_VALUE, 0));
+    expectEquals(1, compare32(Integer.MAX_VALUE, 1));
+    expectEquals(1, compare32(Integer.MAX_VALUE, Integer.MAX_VALUE - 1));
+
+    for (int i = -11; i <= 11; i++) {
+      for (int j = -11; j <= 11; j++) {
+        int expected = 0;
+        if (i < j) expected = -1;
+        else if (i > j) expected = 1;
+        expectEquals(expected, compare32(i, j));
+      }
+    }
+
+    expectEquals(-1, compare64(Long.MIN_VALUE, Long.MIN_VALUE + 1L));
+    expectEquals(-1, compare64(Long.MIN_VALUE, -1L));
+    expectEquals(-1, compare64(Long.MIN_VALUE, 0L));
+    expectEquals(-1, compare64(Long.MIN_VALUE, 1L));
+    expectEquals(-1, compare64(Long.MIN_VALUE, Long.MAX_VALUE));
+    expectEquals(-1, compare64(-1L, 0L));
+    expectEquals(-1, compare64(-1L, 1L));
+    expectEquals(-1, compare64(0L, 1L));
+
+    expectEquals(0, compare64(Long.MIN_VALUE, Long.MIN_VALUE));
+    expectEquals(0, compare64(-1L, -1L));
+    expectEquals(0, compare64(0L, 0L));
+    expectEquals(0, compare64(1L, 1L));
+    expectEquals(0, compare64(Long.MAX_VALUE, Long.MAX_VALUE));
+
+    expectEquals(1, compare64(0L, -1L));
+    expectEquals(1, compare64(1L, -1L));
+    expectEquals(1, compare64(1L, 0L));
+    expectEquals(1, compare64(Long.MAX_VALUE, Long.MIN_VALUE));
+    expectEquals(1, compare64(Long.MAX_VALUE, -1L));
+    expectEquals(1, compare64(Long.MAX_VALUE, 0L));
+    expectEquals(1, compare64(Long.MAX_VALUE, 1L));
+    expectEquals(1, compare64(Long.MAX_VALUE, Long.MAX_VALUE - 1L));
+
+    for (long i = -11L; i <= 11L; i++) {
+      for (long j = -11L; j <= 11L; j++) {
+        int expected = 0;
+        if (i < j) expected = -1;
+        else if (i > j) expected = 1;
+        expectEquals(expected, compare64(i, j));
+      }
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/568-checker-onebit/expected.txt b/test/568-checker-onebit/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/568-checker-onebit/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/568-checker-onebit/info.txt b/test/568-checker-onebit/info.txt
new file mode 100644
index 0000000..c2b5bf8
--- /dev/null
+++ b/test/568-checker-onebit/info.txt
@@ -0,0 +1 @@
+Unit test for 32-bit and 64-bit high/low-bit operations.
diff --git a/test/568-checker-onebit/src/Main.java b/test/568-checker-onebit/src/Main.java
new file mode 100644
index 0000000..7007c6a
--- /dev/null
+++ b/test/568-checker-onebit/src/Main.java
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: int Main.hi32(int) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerHighestOneBit
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static int hi32(int x) {
+    return Integer.highestOneBit(x);
+  }
+
+  /// CHECK-START: int Main.lo32(int) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerLowestOneBit
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static int lo32(int x) {
+    return Integer.lowestOneBit(x);
+  }
+
+  /// CHECK-START: long Main.hi64(long) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:j\d+>> InvokeStaticOrDirect intrinsic:LongHighestOneBit
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static long hi64(long x) {
+    return Long.highestOneBit(x);
+  }
+
+  /// CHECK-START: long Main.lo64(long) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:j\d+>> InvokeStaticOrDirect intrinsic:LongLowestOneBit
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static long lo64(long x) {
+    return Long.lowestOneBit(x);
+  }
+
+  public static void main(String args[]) {
+    expectEquals32(0x00000000, hi32(0x00000000));
+    expectEquals32(0x00000000, lo32(0x00000000));
+    expectEquals32(0x00010000, hi32(0x00010000));
+    expectEquals32(0x00010000, lo32(0x00010000));
+    expectEquals32(0x00800000, hi32(0x00FF0000));
+    expectEquals32(0x00010000, lo32(0x00FF0000));
+    expectEquals32(0x80000000, hi32(0xFFFFFFFF));
+    expectEquals32(0x00000001, lo32(0xFFFFFFFF));
+
+    for (int i = 0; i < 32; i++) {
+      expectEquals32(1 << i, hi32(1 << i));
+      expectEquals32(1 << i, lo32(1 << i));
+      int expected = i < 29 ? 0x8 << i : 0x80000000;
+      expectEquals32(expected, hi32(0xF << i));
+      expectEquals32(0x1 << i, lo32(0xF << i));
+    }
+
+    expectEquals64(0x0000000000000000L, hi64(0x0000000000000000L));
+    expectEquals64(0x0000000000000000L, lo64(0x0000000000000000L));
+    expectEquals64(0x0000000100000000L, hi64(0x0000000100000000L));
+    expectEquals64(0x0000000100000000L, lo64(0x0000000100000000L));
+    expectEquals64(0x0000008000000000L, hi64(0x000000FF00000000L));
+    expectEquals64(0x0000000100000000L, lo64(0x000000FF00000000L));
+    expectEquals64(0x8000000000000000L, hi64(0xFFFFFFFFFFFFFFFFL));
+    expectEquals64(0x0000000000000001L, lo64(0xFFFFFFFFFFFFFFFFL));
+
+    for (int i = 0; i < 64; i++) {
+      expectEquals64(1L << i, hi64(1L << i));
+      expectEquals64(1L << i, lo64(1L << i));
+      long expected = i < 61 ? 0x8L << i : 0x8000000000000000L;
+      expectEquals64(expected, hi64(0xFL << i));
+      expectEquals64(0x1L << i, lo64(0xFL << i));
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals32(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+  private static void expectEquals64(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/960-default-smali/run b/test/960-default-smali/run
deleted file mode 100755
index 22f6800..0000000
--- a/test/960-default-smali/run
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-#
-# Copyright 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-${RUN} --experimental default-methods "$@"
diff --git a/test/961-default-iface-resolution-generated/run b/test/961-default-iface-resolution-generated/run
deleted file mode 100755
index 22f6800..0000000
--- a/test/961-default-iface-resolution-generated/run
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-#
-# Copyright 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-${RUN} --experimental default-methods "$@"
diff --git a/test/962-iface-static/run b/test/962-iface-static/run
deleted file mode 100755
index d37737f..0000000
--- a/test/962-iface-static/run
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-${RUN} --experimental default-methods "$@"
diff --git a/test/963-default-range-smali/run b/test/963-default-range-smali/run
deleted file mode 100755
index d37737f..0000000
--- a/test/963-default-range-smali/run
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-${RUN} --experimental default-methods "$@"
diff --git a/test/964-default-iface-init-generated/run b/test/964-default-iface-init-generated/run
deleted file mode 100755
index 22f6800..0000000
--- a/test/964-default-iface-init-generated/run
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-#
-# Copyright 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-${RUN} --experimental default-methods "$@"
diff --git a/test/965-default-verify/run b/test/965-default-verify/run
deleted file mode 100755
index 8944ea9..0000000
--- a/test/965-default-verify/run
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-${RUN} "$@" --experimental default-methods
diff --git a/test/966-default-conflict/run b/test/966-default-conflict/run
deleted file mode 100755
index 8944ea9..0000000
--- a/test/966-default-conflict/run
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-${RUN} "$@" --experimental default-methods
diff --git a/test/967-default-ame/run b/test/967-default-ame/run
deleted file mode 100755
index 8944ea9..0000000
--- a/test/967-default-ame/run
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-${RUN} "$@" --experimental default-methods
diff --git a/test/968-default-partial-compile-generated/run b/test/968-default-partial-compile-generated/run
deleted file mode 100755
index 6d2930d..0000000
--- a/test/968-default-partial-compile-generated/run
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-#
-# Copyright 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-${RUN} "$@" --experimental default-methods
diff --git a/test/969-iface-super/run b/test/969-iface-super/run
deleted file mode 100755
index 8944ea9..0000000
--- a/test/969-iface-super/run
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-${RUN} "$@" --experimental default-methods
diff --git a/test/970-iface-super-resolution-generated/run b/test/970-iface-super-resolution-generated/run
deleted file mode 100755
index 6d2930d..0000000
--- a/test/970-iface-super-resolution-generated/run
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-#
-# Copyright 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-${RUN} "$@" --experimental default-methods
diff --git a/test/971-iface-super/run b/test/971-iface-super/run
deleted file mode 100755
index 6d2930d..0000000
--- a/test/971-iface-super/run
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-#
-# Copyright 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-${RUN} "$@" --experimental default-methods
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index b922b45..faaf1f0 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -39,7 +39,8 @@
   461-get-reference-vreg/get_reference_vreg_jni.cc \
   466-get-live-vreg/get_live_vreg_jni.cc \
   497-inlining-and-class-loader/clear_dex_cache.cc \
-  543-env-long-ref/env_long_ref.cc
+  543-env-long-ref/env_long_ref.cc \
+  566-polymorphic-inlining/polymorphic_inline.cc
 
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 586c805..d6c2983 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -70,6 +70,7 @@
 	$(hide) DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \
 	  SMALI=$(abspath $(HOST_OUT_EXECUTABLES)/smali) \
 	  DXMERGER=$(abspath $(HOST_OUT_EXECUTABLES)/dexmerger) \
+	  JACK_VERSION=$(JACK_DEFAULT_VERSION) \
 	  JACK=$(abspath $(JACK)) \
 	  JACK_CLASSPATH=$(TARGET_JACK_CLASSPATH) \
 	  JILL_JAR=$(abspath $(JILL_JAR)) \
@@ -524,16 +525,17 @@
 
 TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS :=
 
+# Tests that should fail in the read barrier configuration with the interpreter.
+TEST_ART_BROKEN_INTERPRETER_READ_BARRIER_RUN_TESTS :=
 
-# Tests that should fail in the read barrier configuration with the default (Quick) compiler.
-# 137: Quick has no support for read barriers and punts to the
-#      interpreter, but CFI unwinding expects managed frames.
-# 554: Quick does not support JIT profiling.
+# Tests that should fail in the read barrier configuration with the default (Quick) compiler (AOT).
+# Quick has no support for read barriers and punts to the interpreter, so this list is composed of
+# tests expected to fail with the interpreter, both on the concurrent collector and in general.
 TEST_ART_BROKEN_DEFAULT_READ_BARRIER_RUN_TESTS := \
-  137-cfi \
-  554-jit-profile-file
+  $(TEST_ART_BROKEN_INTERPRETER_READ_BARRIER_RUN_TESTS) \
+  $(TEST_ART_BROKEN_INTERPRETER_RUN_TESTS)
 
-# Tests that should fail in the read barrier configuration with the Optimizing compiler.
+# Tests that should fail in the read barrier configuration with the Optimizing compiler (AOT).
 # 484: Baker's fast path based read barrier compiler instrumentation generates code containing
 #      more parallel moves on x86, thus some Checker assertions may fail.
 # 527: On ARM64, the read barrier instrumentation does not support the HArm64IntermediateAddress
@@ -546,11 +548,18 @@
   537-checker-arraycopy
 
 # Tests that should fail in the read barrier configuration with JIT.
-# 141: Disabled because of intermittent failures on the ART Builtbot (b/25866001).
+# 496: Occasional timeout: "Fault message: timeout: the monitored command dumped core" (b/26786304).
 TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS := \
-  141-class-unload
+  496-checker-inlining-and-class-loader
 
 ifeq ($(ART_USE_READ_BARRIER),true)
+  ifneq (,$(filter interpreter,$(COMPILER_TYPES)))
+    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
+        $(PREBUILD_TYPES),interpreter,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
+        $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+        $(TEST_ART_BROKEN_INTERPRETER_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+  endif
+
   ifneq (,$(filter default,$(COMPILER_TYPES)))
     ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
         $(PREBUILD_TYPES),default,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
@@ -967,6 +976,7 @@
 	    JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \
 	    SMALI=$(abspath $(HOST_OUT_EXECUTABLES)/smali) \
 	    DXMERGER=$(abspath $(HOST_OUT_EXECUTABLES)/dexmerger) \
+	    JACK_VERSION=$(JACK_DEFAULT_VERSION) \
 	    JACK=$(abspath $(JACK)) \
 	    JACK_CLASSPATH=$$(PRIVATE_JACK_CLASSPATH) \
 	    JILL_JAR=$(abspath $(JILL_JAR)) \
diff --git a/test/ProfileTestMultiDex/Main.java b/test/ProfileTestMultiDex/Main.java
new file mode 100644
index 0000000..41532ea
--- /dev/null
+++ b/test/ProfileTestMultiDex/Main.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Main {
+  public String getA() {
+    return "A";
+  }
+  public String getB() {
+    return "B";
+  }
+  public String getC() {
+    return "C";
+  }
+}
diff --git a/test/ProfileTestMultiDex/Second.java b/test/ProfileTestMultiDex/Second.java
new file mode 100644
index 0000000..4ac5abc
--- /dev/null
+++ b/test/ProfileTestMultiDex/Second.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Second {
+  public String getX() {
+    return "X";
+  }
+  public String getY() {
+    return "Y";
+  }
+  public String getZ() {
+    return "Z";
+  }
+}
diff --git a/test/ProfileTestMultiDex/main.jpp b/test/ProfileTestMultiDex/main.jpp
new file mode 100644
index 0000000..f2e3b4e
--- /dev/null
+++ b/test/ProfileTestMultiDex/main.jpp
@@ -0,0 +1,3 @@
+main:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Second
diff --git a/test/ProfileTestMultiDex/main.list b/test/ProfileTestMultiDex/main.list
new file mode 100644
index 0000000..44ba78e
--- /dev/null
+++ b/test/ProfileTestMultiDex/main.list
@@ -0,0 +1 @@
+Main.class
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index e004b6c..8245ccf 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -74,6 +74,14 @@
         fi
         ARGS="${ARGS} $1"
         shift
+    elif [ "x$1" = "x--args" ]; then
+        shift
+        if [ "x$1" = "x" ]; then
+            echo "$0 missing argument to --args" 1>&2
+            exit 1
+        fi
+        ARGS="${ARGS} $1"
+        shift
     elif [ "x$1" = "x-Xcompiler-option" ]; then
         shift
         option="$1"
@@ -533,6 +541,7 @@
 
     cd $ANDROID_BUILD_TOP
 
+    rm -rf ${DEX_LOCATION}/dalvik-cache/
     $mkdir_cmdline || exit 1
     $dex2oat_cmdline || { echo "Dex2oat failed." >&2 ; exit 2; }
 
diff --git a/test/run-test b/test/run-test
index 033e2c6..faa597e 100755
--- a/test/run-test
+++ b/test/run-test
@@ -733,7 +733,10 @@
 # To cause tests to fail fast, limit the file sizes created by dx, dex2oat and ART output to 2MB.
 build_file_size_limit=2048
 run_file_size_limit=2048
-if echo "$test_dir" | grep -Eq "(083|089|964|971)" > /dev/null; then
+
+# Add tests requiring a higher ulimit to this list. Ulimits might need to be raised to deal with
+# large amounts of expected output or large generated files.
+if echo "$test_dir" | grep -Eq "(083|089|961|964|971)" > /dev/null; then
   build_file_size_limit=5120
   run_file_size_limit=5120
 fi
diff --git a/tools/ahat/README.txt b/tools/ahat/README.txt
index a3ecf86..da5225c 100644
--- a/tools/ahat/README.txt
+++ b/tools/ahat/README.txt
@@ -77,6 +77,9 @@
  * Instance.isRoot and Instance.getRootTypes.
 
 Release History:
+ 0.4 Pending
+   Show registered native allocations for heap dumps that support it.
+
  0.3 Dec 15, 2015
    Fix page loading performance by showing a limited number of entries by default.
    Fix mismatch between overview and "roots" totals.
diff --git a/tools/ahat/src/AhatSnapshot.java b/tools/ahat/src/AhatSnapshot.java
index fc7911b..2adec6f 100644
--- a/tools/ahat/src/AhatSnapshot.java
+++ b/tools/ahat/src/AhatSnapshot.java
@@ -43,22 +43,27 @@
  * ahat.
  */
 class AhatSnapshot {
-  private Snapshot mSnapshot;
-  private List<Heap> mHeaps;
+  private final Snapshot mSnapshot;
+  private final List<Heap> mHeaps;
 
   // Map from Instance to the list of Instances it immediately dominates.
-  private Map<Instance, List<Instance>> mDominated;
+  private final Map<Instance, List<Instance>> mDominated
+    = new HashMap<Instance, List<Instance>>();
 
   // Collection of objects whose immediate dominator is the SENTINEL_ROOT.
-  private List<Instance> mRooted;
+  private final List<Instance> mRooted = new ArrayList<Instance>();
 
   // Map from roots to their types.
   // Instances are only included if they are roots, and the collection of root
   // types is guaranteed to be non-empty.
-  private Map<Instance, Collection<RootType>> mRoots;
+  private final Map<Instance, Collection<RootType>> mRoots
+    = new HashMap<Instance, Collection<RootType>>();
 
-  private Site mRootSite;
-  private Map<Heap, Long> mHeapSizes;
+  private final Site mRootSite = new Site("ROOT");
+  private final Map<Heap, Long> mHeapSizes = new HashMap<Heap, Long>();
+
+  private final List<InstanceUtils.NativeAllocation> mNativeAllocations
+    = new ArrayList<InstanceUtils.NativeAllocation>();
 
   /**
    * Create an AhatSnapshot from an hprof file.
@@ -77,10 +82,6 @@
   private AhatSnapshot(Snapshot snapshot) {
     mSnapshot = snapshot;
     mHeaps = new ArrayList<Heap>(mSnapshot.getHeaps());
-    mDominated = new HashMap<Instance, List<Instance>>();
-    mRootSite = new Site("ROOT");
-    mHeapSizes = new HashMap<Heap, Long>();
-    mRooted = new ArrayList<Instance>();
 
     ClassObj javaLangClass = mSnapshot.findClass("java.lang.Class");
     for (Heap heap : mHeaps) {
@@ -118,13 +119,18 @@
             }
           }
           mRootSite.add(stackId, 0, path.iterator(), inst);
+
+          // Update native allocations.
+          InstanceUtils.NativeAllocation alloc = InstanceUtils.getNativeAllocation(inst);
+          if (alloc != null) {
+            mNativeAllocations.add(alloc);
+          }
         }
       }
       mHeapSizes.put(heap, total);
     }
 
     // Record the roots and their types.
-    mRoots = new HashMap<Instance, Collection<RootType>>();
     for (RootObj root : snapshot.getGCRoots()) {
       Instance inst = root.getReferredInstance();
       Collection<RootType> types = mRoots.get(inst);
@@ -259,4 +265,9 @@
     }
     return site;
   }
+
+  // Return a list of known native allocations in the snapshot.
+  public List<InstanceUtils.NativeAllocation> getNativeAllocations() {
+    return mNativeAllocations;
+  }
 }
diff --git a/tools/ahat/src/InstanceUtils.java b/tools/ahat/src/InstanceUtils.java
index 7fa53c7..8b7f9ea 100644
--- a/tools/ahat/src/InstanceUtils.java
+++ b/tools/ahat/src/InstanceUtils.java
@@ -20,6 +20,7 @@
 import com.android.tools.perflib.heap.ClassInstance;
 import com.android.tools.perflib.heap.ClassObj;
 import com.android.tools.perflib.heap.Instance;
+import com.android.tools.perflib.heap.Heap;
 import com.android.tools.perflib.heap.Type;
 import java.awt.image.BufferedImage;
 
@@ -31,7 +32,7 @@
    * Returns true if the given instance is an instance of a class with the
    * given name.
    */
-  public static boolean isInstanceOfClass(Instance inst, String className) {
+  private static boolean isInstanceOfClass(Instance inst, String className) {
     ClassObj cls = (inst == null) ? null : inst.getClassObj();
     return (cls != null && className.equals(cls.getClassName()));
   }
@@ -118,12 +119,12 @@
       return null;
     }
 
-    Integer width = getIntField(inst, "mWidth");
+    Integer width = getIntField(inst, "mWidth", null);
     if (width == null) {
       return null;
     }
 
-    Integer height = getIntField(inst, "mHeight");
+    Integer height = getIntField(inst, "mHeight", null);
     if (height == null) {
       return null;
     }
@@ -186,23 +187,29 @@
   /**
    * Read an int field of an instance.
    * The field is assumed to be an int type.
-   * Returns null if the field value is not an int or could not be read.
+   * Returns <code>def</code> if the field value is not an int or could not be
+   * read.
    */
-  private static Integer getIntField(Instance inst, String fieldName) {
+  private static Integer getIntField(Instance inst, String fieldName, Integer def) {
     Object value = getField(inst, fieldName);
     if (!(value instanceof Integer)) {
-      return null;
+      return def;
     }
     return (Integer)value;
   }
 
   /**
-   * Read an int field of an instance, returning a default value if the field
-   * was not an int or could not be read.
+   * Read a long field of an instance.
+   * The field is assumed to be a long type.
+   * Returns <code>def</code> if the field value is not an long or could not
+   * be read.
    */
-  private static int getIntField(Instance inst, String fieldName, int def) {
-    Integer value = getIntField(inst, fieldName);
-    return value == null ? def : value;
+  private static Long getLongField(Instance inst, String fieldName, Long def) {
+    Object value = getField(inst, fieldName);
+    if (!(value instanceof Long)) {
+      return def;
+    }
+    return (Long)value;
   }
 
   /**
@@ -278,4 +285,73 @@
     }
     return null;
   }
+
+  public static class NativeAllocation {
+    public long size;
+    public Heap heap;
+    public long pointer;
+    public Instance referent;
+
+    public NativeAllocation(long size, Heap heap, long pointer, Instance referent) {
+      this.size = size;
+      this.heap = heap;
+      this.pointer = pointer;
+      this.referent = referent;
+    }
+  }
+
+  /**
+   * Assuming inst represents a NativeAllocation, return information about the
+   * native allocation. Returns null if the given instance doesn't represent a
+   * native allocation.
+   */
+  public static NativeAllocation getNativeAllocation(Instance inst) {
+    if (!isInstanceOfClass(inst, "libcore.util.NativeAllocationRegistry$CleanerThunk")) {
+      return null;
+    }
+
+    Long pointer = InstanceUtils.getLongField(inst, "nativePtr", null);
+    if (pointer == null) {
+      return null;
+    }
+
+    // Search for the registry field of inst.
+    // Note: We know inst as an instance of ClassInstance because we already
+    // read the nativePtr field from it.
+    Instance registry = null;
+    for (ClassInstance.FieldValue field : ((ClassInstance)inst).getValues()) {
+      Object fieldValue = field.getValue();
+      if (fieldValue instanceof Instance) {
+        Instance fieldInst = (Instance)fieldValue;
+        if (isInstanceOfClass(fieldInst, "libcore.util.NativeAllocationRegistry")) {
+          registry = fieldInst;
+          break;
+        }
+      }
+    }
+
+    if (registry == null) {
+      return null;
+    }
+
+    Long size = InstanceUtils.getLongField(registry, "size", null);
+    if (size == null) {
+      return null;
+    }
+
+    Instance referent = null;
+    for (Instance ref : inst.getHardReferences()) {
+      if (isInstanceOfClass(ref, "sun.misc.Cleaner")) {
+        referent = InstanceUtils.getReferent(ref);
+        if (referent != null) {
+          break;
+        }
+      }
+    }
+
+    if (referent == null) {
+      return null;
+    }
+    return new NativeAllocation(size, inst.getHeap(), pointer, referent);
+  }
 }
diff --git a/tools/ahat/src/Main.java b/tools/ahat/src/Main.java
index 091820f..d784599 100644
--- a/tools/ahat/src/Main.java
+++ b/tools/ahat/src/Main.java
@@ -78,6 +78,7 @@
     server.createContext("/object", new AhatHttpHandler(new ObjectHandler(ahat)));
     server.createContext("/objects", new AhatHttpHandler(new ObjectsHandler(ahat)));
     server.createContext("/site", new AhatHttpHandler(new SiteHandler(ahat)));
+    server.createContext("/native", new AhatHttpHandler(new NativeAllocationsHandler(ahat)));
     server.createContext("/bitmap", new BitmapHandler(ahat));
     server.createContext("/help", new HelpHandler());
     server.createContext("/style.css", new StaticHandler("style.css", "text/css"));
diff --git a/tools/ahat/src/Menu.java b/tools/ahat/src/Menu.java
index 018e019..232b849 100644
--- a/tools/ahat/src/Menu.java
+++ b/tools/ahat/src/Menu.java
@@ -27,6 +27,8 @@
       .append(" - ")
       .appendLink(DocString.uri("sites"), DocString.text("allocations"))
       .append(" - ")
+      .appendLink(DocString.uri("native"), DocString.text("native"))
+      .append(" - ")
       .appendLink(DocString.uri("help"), DocString.text("help"));
 
   /**
diff --git a/tools/ahat/src/NativeAllocationsHandler.java b/tools/ahat/src/NativeAllocationsHandler.java
new file mode 100644
index 0000000..17407e1
--- /dev/null
+++ b/tools/ahat/src/NativeAllocationsHandler.java
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+class NativeAllocationsHandler implements AhatHandler {
+  private static final String ALLOCATIONS_ID = "allocations";
+
+  private AhatSnapshot mSnapshot;
+
+  public NativeAllocationsHandler(AhatSnapshot snapshot) {
+    mSnapshot = snapshot;
+  }
+
+  @Override
+  public void handle(Doc doc, Query query) throws IOException {
+    List<InstanceUtils.NativeAllocation> allocs = mSnapshot.getNativeAllocations();
+
+    doc.title("Registered Native Allocations");
+
+    doc.section("Overview");
+    long totalSize = 0;
+    for (InstanceUtils.NativeAllocation alloc : allocs) {
+      totalSize += alloc.size;
+    }
+    doc.descriptions();
+    doc.description(DocString.text("Number of Registered Native Allocations"),
+        DocString.format("%,14d", allocs.size()));
+    doc.description(DocString.text("Total Size of Registered Native Allocations"),
+        DocString.format("%,14d", totalSize));
+    doc.end();
+
+    doc.section("List of Allocations");
+    if (allocs.isEmpty()) {
+      doc.println(DocString.text("(none)"));
+    } else {
+      doc.table(
+          new Column("Size", Column.Align.RIGHT),
+          new Column("Heap"),
+          new Column("Native Pointer"),
+          new Column("Referent"));
+      Comparator<InstanceUtils.NativeAllocation> compare
+        = new Sort.WithPriority<InstanceUtils.NativeAllocation>(
+            new Sort.NativeAllocationByHeapName(),
+            new Sort.NativeAllocationBySize());
+      Collections.sort(allocs, compare);
+      SubsetSelector<InstanceUtils.NativeAllocation> selector
+        = new SubsetSelector(query, ALLOCATIONS_ID, allocs);
+      for (InstanceUtils.NativeAllocation alloc : selector.selected()) {
+        doc.row(
+            DocString.format("%,14d", alloc.size),
+            DocString.text(alloc.heap.getName()),
+            DocString.format("0x%x", alloc.pointer),
+            Value.render(mSnapshot, alloc.referent));
+      }
+
+      // Print a summary of the remaining entries if there are any.
+      List<InstanceUtils.NativeAllocation> remaining = selector.remaining();
+      if (!remaining.isEmpty()) {
+        long total = 0;
+        for (InstanceUtils.NativeAllocation alloc : remaining) {
+          total += alloc.size;
+        }
+
+        doc.row(
+            DocString.format("%,14d", total),
+            DocString.text("..."),
+            DocString.text("..."),
+            DocString.text("..."));
+      }
+
+      doc.end();
+      selector.render(doc);
+    }
+  }
+}
+
diff --git a/tools/ahat/src/OverviewHandler.java b/tools/ahat/src/OverviewHandler.java
index 720fcb4..0dbad7e 100644
--- a/tools/ahat/src/OverviewHandler.java
+++ b/tools/ahat/src/OverviewHandler.java
@@ -48,6 +48,22 @@
 
     doc.section("Heap Sizes");
     printHeapSizes(doc, query);
+
+    List<InstanceUtils.NativeAllocation> allocs = mSnapshot.getNativeAllocations();
+    if (!allocs.isEmpty()) {
+      doc.section("Registered Native Allocations");
+      long totalSize = 0;
+      for (InstanceUtils.NativeAllocation alloc : allocs) {
+        totalSize += alloc.size;
+      }
+      doc.descriptions();
+      doc.description(DocString.text("Number of Registered Native Allocations"),
+          DocString.format("%,14d", allocs.size()));
+      doc.description(DocString.text("Total Size of Registered Native Allocations"),
+          DocString.format("%,14d", totalSize));
+      doc.end();
+    }
+
     doc.big(Menu.getMenu());
   }
 
diff --git a/tools/ahat/src/Sort.java b/tools/ahat/src/Sort.java
index 3b79166..c5f89c3 100644
--- a/tools/ahat/src/Sort.java
+++ b/tools/ahat/src/Sort.java
@@ -177,5 +177,31 @@
       return aName.compareTo(bName);
     }
   }
+
+  /**
+   * Compare AhatSnapshot.NativeAllocation by heap name.
+   * Different allocations with the same heap name are considered equal for
+   * the purposes of comparison.
+   */
+  public static class NativeAllocationByHeapName
+      implements Comparator<InstanceUtils.NativeAllocation> {
+    @Override
+    public int compare(InstanceUtils.NativeAllocation a, InstanceUtils.NativeAllocation b) {
+      return a.heap.getName().compareTo(b.heap.getName());
+    }
+  }
+
+  /**
+   * Compare InstanceUtils.NativeAllocation by their size.
+   * Different allocations with the same size are considered equal for the
+   * purposes of comparison.
+   * This sorts allocations from larger size to smaller size.
+   */
+  public static class NativeAllocationBySize implements Comparator<InstanceUtils.NativeAllocation> {
+    @Override
+    public int compare(InstanceUtils.NativeAllocation a, InstanceUtils.NativeAllocation b) {
+      return Long.compare(b.size, a.size);
+    }
+  }
 }
 
diff --git a/tools/ahat/test-dump/Main.java b/tools/ahat/test-dump/Main.java
index 90cd7af..701d60e 100644
--- a/tools/ahat/test-dump/Main.java
+++ b/tools/ahat/test-dump/Main.java
@@ -19,6 +19,7 @@
 import java.lang.ref.PhantomReference;
 import java.lang.ref.ReferenceQueue;
 import java.lang.ref.WeakReference;
+import libcore.util.NativeAllocationRegistry;
 
 /**
  * Program used to create a heap dump for test purposes.
@@ -47,6 +48,9 @@
       for (int i = 0; i < N; i++) {
         bigArray[i] = (byte)((i*i) & 0xFF);
       }
+
+      NativeAllocationRegistry registry = new NativeAllocationRegistry(0x12345, 42);
+      registry.registerNativeAllocation(anObject, 0xABCDABCD);
     }
   }
 
diff --git a/tools/ahat/test/NativeAllocationTest.java b/tools/ahat/test/NativeAllocationTest.java
new file mode 100644
index 0000000..7ad4c1d
--- /dev/null
+++ b/tools/ahat/test/NativeAllocationTest.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import com.android.tools.perflib.heap.Instance;
+import java.io.IOException;
+import static org.junit.Assert.fail;
+import static org.junit.Assert.assertEquals;
+import org.junit.Test;
+
+public class NativeAllocationTest {
+
+  @Test
+  public void nativeAllocation() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+
+    AhatSnapshot snapshot = dump.getAhatSnapshot();
+    Instance referent = (Instance)dump.getDumpedThing("anObject");
+    for (InstanceUtils.NativeAllocation alloc : snapshot.getNativeAllocations()) {
+      if (alloc.referent == referent) {
+        assertEquals(42 , alloc.size);
+        assertEquals(referent.getHeap(), alloc.heap);
+        assertEquals(0xABCDABCD , alloc.pointer);
+        return;
+      }
+    }
+    fail("No native allocation found with anObject as the referent");
+  }
+}
+
diff --git a/tools/ahat/test/Tests.java b/tools/ahat/test/Tests.java
index e8894e2..3291470 100644
--- a/tools/ahat/test/Tests.java
+++ b/tools/ahat/test/Tests.java
@@ -23,6 +23,7 @@
     if (args.length == 0) {
       args = new String[]{
         "com.android.ahat.InstanceUtilsTest",
+        "com.android.ahat.NativeAllocationTest",
         "com.android.ahat.PerformanceTest",
         "com.android.ahat.QueryTest",
         "com.android.ahat.SortTest",
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 351e99e..6d67f84 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -260,6 +260,7 @@
           "dalvik.system.JniTest#testPassingShorts",
           "dalvik.system.JniTest#testPassingThis",
           "libcore.util.NativeAllocationRegistryTest#testBadSize",
+          "libcore.util.NativeAllocationRegistryTest#testEarlyFree",
           "libcore.util.NativeAllocationRegistryTest#testNativeAllocationAllocatorAndNoSharedRegistry",
           "libcore.util.NativeAllocationRegistryTest#testNativeAllocationAllocatorAndSharedRegistry",
           "libcore.util.NativeAllocationRegistryTest#testNativeAllocationNoAllocatorAndNoSharedRegistry",
diff --git a/tools/libcore_failures_concurrent_collector.txt b/tools/libcore_failures_concurrent_collector.txt
index 6ea83d2..95d1292 100644
--- a/tools/libcore_failures_concurrent_collector.txt
+++ b/tools/libcore_failures_concurrent_collector.txt
@@ -34,5 +34,13 @@
           "libcore.java.util.zip.ZipFileTest#testZipFileWithLotsOfEntries",
           "libcore.java.util.zip.ZipInputStreamTest#testLongMessage"],
   bug: 26507762
+},
+{
+  description: "TimeoutException on hammerhead-concurrent-collector",
+  result: EXEC_FAILED,
+  modes: [device],
+  names: ["libcore.icu.RelativeDateTimeFormatterTest#test_bug25821045",
+          "libcore.java.text.SimpleDateFormatTest#testLocales"],
+  bug: 26711853
 }
 ]