Merge "ART: Fix JIT profile saver"
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index df7df26..93e310e 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -57,6 +57,9 @@
 # Do you want optimizing compiler tests run?
 ART_TEST_OPTIMIZING ?= true
 
+# Do you want to test the optimizing compiler with graph coloring register allocation?
+ART_TEST_OPTIMIZING_GRAPH_COLOR ?= $(ART_TEST_FULL)
+
 # Do we want to test a PIC-compiled core image?
 ART_TEST_PIC_IMAGE ?= $(ART_TEST_FULL)
 
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 1e2c4ef..c79205f 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -234,8 +234,6 @@
   runtime/interpreter/unstarted_runtime_test.cc \
   runtime/java_vm_ext_test.cc \
   runtime/jit/profile_compilation_info_test.cc \
-  runtime/lambda/closure_test.cc \
-  runtime/lambda/shorty_field_type_test.cc \
   runtime/leb128_test.cc \
   runtime/mem_map_test.cc \
   runtime/memory_region_test.cc \
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index 7ded3bf..5809dcd 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -501,11 +501,6 @@
   EXPECT_SINGLE_PARSE_VALUE(ExperimentalFlags::kNone,
                             "-Xexperimental:none",
                             M::Experimental);
-
-  // Enabled explicitly
-  EXPECT_SINGLE_PARSE_VALUE(ExperimentalFlags::kLambdas,
-                            "-Xexperimental:lambdas",
-                            M::Experimental);
 }
 
 // -Xverify:_
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index f05648c..1146f95 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -735,8 +735,6 @@
   Result ParseAndAppend(const std::string& option, ExperimentalFlags& existing) {
     if (option == "none") {
       existing = ExperimentalFlags::kNone;
-    } else if (option == "lambdas") {
-      existing = existing | ExperimentalFlags::kLambdas;
     } else {
       return Result::Failure(std::string("Unknown option '") + option + "'");
     }
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 8261a87..0ede30d 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -67,8 +67,9 @@
 	optimizing/parallel_move_resolver.cc \
 	optimizing/prepare_for_register_allocation.cc \
 	optimizing/reference_type_propagation.cc \
-	optimizing/register_allocator.cc \
 	optimizing/register_allocation_resolver.cc \
+	optimizing/register_allocator.cc \
+	optimizing/register_allocator_graph_color.cc \
 	optimizing/register_allocator_linear_scan.cc \
 	optimizing/select_generator.cc \
 	optimizing/sharpening.cc \
@@ -80,6 +81,7 @@
 	optimizing/x86_memory_gen.cc \
 	trampolines/trampoline_compiler.cc \
 	utils/assembler.cc \
+	utils/jni_macro_assembler.cc \
 	utils/swap_space.cc \
 	compiler.cc \
 	elf_writer.cc \
@@ -96,6 +98,7 @@
 	utils/arm/assembler_arm.cc \
 	utils/arm/assembler_arm32.cc \
 	utils/arm/assembler_thumb2.cc \
+	utils/arm/jni_macro_assembler_arm.cc \
 	utils/arm/managed_register_arm.cc \
 
 # TODO We should really separate out those files that are actually needed for both variants of an
@@ -112,6 +115,7 @@
 	optimizing/instruction_simplifier_shared.cc \
 	optimizing/intrinsics_arm64.cc \
 	utils/arm64/assembler_arm64.cc \
+	utils/arm64/jni_macro_assembler_arm64.cc \
 	utils/arm64/managed_register_arm64.cc \
 
 LIBART_COMPILER_SRC_FILES_mips := \
@@ -141,6 +145,7 @@
 	optimizing/intrinsics_x86.cc \
 	optimizing/pc_relative_fixups_x86.cc \
 	utils/x86/assembler_x86.cc \
+	utils/x86/jni_macro_assembler_x86.cc \
 	utils/x86/managed_register_x86.cc \
 
 LIBART_COMPILER_SRC_FILES_x86_64 := \
@@ -150,6 +155,7 @@
 	optimizing/intrinsics_x86_64.cc \
 	optimizing/code_generator_x86_64.cc \
 	utils/x86_64/assembler_x86_64.cc \
+	utils/x86_64/jni_macro_assembler_x86_64.cc \
 	utils/x86_64/managed_register_x86_64.cc \
 
 
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index f20dba3..30ba8c9 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -44,7 +44,9 @@
       init_failure_output_(nullptr),
       dump_cfg_file_name_(""),
       dump_cfg_append_(false),
-      force_determinism_(false) {
+      force_determinism_(false),
+      register_allocation_strategy_(RegisterAllocator::kRegisterAllocatorDefault),
+      passes_to_run_(nullptr) {
 }
 
 CompilerOptions::~CompilerOptions() {
@@ -74,7 +76,9 @@
                                  bool abort_on_hard_verifier_failure,
                                  const std::string& dump_cfg_file_name,
                                  bool dump_cfg_append,
-                                 bool force_determinism
+                                 bool force_determinism,
+                                 RegisterAllocator::Strategy regalloc_strategy,
+                                 const std::vector<std::string>* passes_to_run
                                  ) :  // NOLINT(whitespace/parens)
     compiler_filter_(compiler_filter),
     huge_method_threshold_(huge_method_threshold),
@@ -99,7 +103,9 @@
     init_failure_output_(init_failure_output),
     dump_cfg_file_name_(dump_cfg_file_name),
     dump_cfg_append_(dump_cfg_append),
-    force_determinism_(force_determinism) {
+    force_determinism_(force_determinism),
+    register_allocation_strategy_(regalloc_strategy),
+    passes_to_run_(passes_to_run) {
 }
 
 void CompilerOptions::ParseHugeMethodMax(const StringPiece& option, UsageFn Usage) {
@@ -144,6 +150,19 @@
   }
 }
 
+void CompilerOptions::ParseRegisterAllocationStrategy(const StringPiece& option,
+                                                      UsageFn Usage) {
+  DCHECK(option.starts_with("--register-allocation-strategy="));
+  StringPiece choice = option.substr(strlen("--register-allocation-strategy=")).data();
+  if (choice == "linear-scan") {
+    register_allocation_strategy_ = RegisterAllocator::Strategy::kRegisterAllocatorLinearScan;
+  } else if (choice == "graph-color") {
+    register_allocation_strategy_ = RegisterAllocator::Strategy::kRegisterAllocatorGraphColor;
+  } else {
+    Usage("Unrecognized register allocation strategy. Try linear-scan, or graph-color.");
+  }
+}
+
 bool CompilerOptions::ParseCompilerOption(const StringPiece& option, UsageFn Usage) {
   if (option.starts_with("--compiler-filter=")) {
     const char* compiler_filter_string = option.substr(strlen("--compiler-filter=")).data();
@@ -190,6 +209,8 @@
     dump_cfg_file_name_ = option.substr(strlen("--dump-cfg=")).data();
   } else if (option.starts_with("--dump-cfg-append")) {
     dump_cfg_append_ = true;
+  } else if (option.starts_with("--register-allocation-strategy=")) {
+    ParseRegisterAllocationStrategy(option, Usage);
   } else {
     // Option not recognized.
     return false;
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 60b700a..abc58d7 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -24,6 +24,7 @@
 #include "base/macros.h"
 #include "compiler_filter.h"
 #include "globals.h"
+#include "optimizing/register_allocator.h"
 #include "utils.h"
 
 namespace art {
@@ -74,7 +75,9 @@
                   bool abort_on_hard_verifier_failure,
                   const std::string& dump_cfg_file_name,
                   bool dump_cfg_append,
-                  bool force_determinism);
+                  bool force_determinism,
+                  RegisterAllocator::Strategy regalloc_strategy,
+                  const std::vector<std::string>* passes_to_run);
 
   CompilerFilter::Filter GetCompilerFilter() const {
     return compiler_filter_;
@@ -244,6 +247,14 @@
     return force_determinism_;
   }
 
+  RegisterAllocator::Strategy GetRegisterAllocationStrategy() const {
+    return register_allocation_strategy_;
+  }
+
+  const std::vector<std::string>* GetPassesToRun() const {
+    return passes_to_run_;
+  }
+
  private:
   void ParseDumpInitFailures(const StringPiece& option, UsageFn Usage);
   void ParseDumpCfgPasses(const StringPiece& option, UsageFn Usage);
@@ -254,6 +265,7 @@
   void ParseSmallMethodMax(const StringPiece& option, UsageFn Usage);
   void ParseLargeMethodMax(const StringPiece& option, UsageFn Usage);
   void ParseHugeMethodMax(const StringPiece& option, UsageFn Usage);
+  void ParseRegisterAllocationStrategy(const StringPiece& option, UsageFn Usage);
 
   CompilerFilter::Filter compiler_filter_;
   size_t huge_method_threshold_;
@@ -297,6 +309,16 @@
   // outcomes.
   bool force_determinism_;
 
+  RegisterAllocator::Strategy register_allocation_strategy_;
+
+  // If not null, specifies optimization passes which will be run instead of defaults.
+  // Note that passes_to_run_ is not checked for correctness and providing an incorrect
+  // list of passes can lead to unexpected compiler behaviour. This is caused by dependencies
+  // between passes. Failing to satisfy them can for example lead to compiler crashes.
+  // Passing pass names which are not recognized by the compiler will result in
+  // compiler-dependant behavior.
+  const std::vector<std::string>* passes_to_run_;
+
   friend class Dex2Oat;
 
   DISALLOW_COPY_AND_ASSIGN(CompilerOptions);
diff --git a/compiler/exception_test.cc b/compiler/exception_test.cc
index e223534..86f91c5 100644
--- a/compiler/exception_test.cc
+++ b/compiler/exception_test.cc
@@ -170,7 +170,7 @@
   Runtime* r = Runtime::Current();
   r->SetInstructionSet(kRuntimeISA);
   ArtMethod* save_method = r->CreateCalleeSaveMethod();
-  r->SetCalleeSaveMethod(save_method, Runtime::kSaveAll);
+  r->SetCalleeSaveMethod(save_method, Runtime::kSaveAllCalleeSaves);
   QuickMethodFrameInfo frame_info = r->GetRuntimeMethodFrameInfo(save_method);
 
   ASSERT_EQ(kStackAlignment, 16U);
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 7a34683..efae4d0 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -39,6 +39,7 @@
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/heap_bitmap.h"
 #include "gc/accounting/space_bitmap-inl.h"
+#include "gc/collector/concurrent_copying.h"
 #include "gc/heap.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
@@ -1372,11 +1373,14 @@
   image_methods_[ImageHeader::kResolutionMethod] = runtime->GetResolutionMethod();
   image_methods_[ImageHeader::kImtConflictMethod] = runtime->GetImtConflictMethod();
   image_methods_[ImageHeader::kImtUnimplementedMethod] = runtime->GetImtUnimplementedMethod();
-  image_methods_[ImageHeader::kCalleeSaveMethod] = runtime->GetCalleeSaveMethod(Runtime::kSaveAll);
-  image_methods_[ImageHeader::kRefsOnlySaveMethod] =
-      runtime->GetCalleeSaveMethod(Runtime::kRefsOnly);
-  image_methods_[ImageHeader::kRefsAndArgsSaveMethod] =
-      runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs);
+  image_methods_[ImageHeader::kSaveAllCalleeSavesMethod] =
+      runtime->GetCalleeSaveMethod(Runtime::kSaveAllCalleeSaves);
+  image_methods_[ImageHeader::kSaveRefsOnlyMethod] =
+      runtime->GetCalleeSaveMethod(Runtime::kSaveRefsOnly);
+  image_methods_[ImageHeader::kSaveRefsAndArgsMethod] =
+      runtime->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs);
+  image_methods_[ImageHeader::kSaveEverythingMethod] =
+      runtime->GetCalleeSaveMethod(Runtime::kSaveEverything);
   // Visit image methods first to have the main runtime methods in the first image.
   for (auto* m : image_methods_) {
     CHECK(m != nullptr);
@@ -1823,6 +1827,11 @@
   const auto it = saved_hashcode_map_.find(obj);
   dst->SetLockWord(it != saved_hashcode_map_.end() ?
       LockWord::FromHashCode(it->second, 0u) : LockWord::Default(), false);
+  if (kUseBakerReadBarrier && gc::collector::ConcurrentCopying::kGrayDirtyImmuneObjects) {
+    // Treat all of the objects in the image as marked to avoid unnecessary dirty pages. This is
+    // safe since we mark all of the objects that may reference non immune objects as gray.
+    CHECK(dst->AtomicSetMarkBit(0, 1));
+  }
   FixupObject(obj, dst);
 }
 
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 626a975..7d13656 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -217,8 +217,7 @@
   // uint32 = typeof(lockword_)
   // Subtract read barrier bits since we want these to remain 0, or else it may result in DCHECK
   // failures due to invalid read barrier bits during object field reads.
-  static const size_t kBinShift = BitSizeOf<uint32_t>() - kBinBits -
-      LockWord::kReadBarrierStateSize;
+  static const size_t kBinShift = BitSizeOf<uint32_t>() - kBinBits - LockWord::kGCStateSize;
   // 111000.....0
   static const size_t kBinMask = ((static_cast<size_t>(1) << kBinBits) - 1) << kBinShift;
 
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 1785338..6f6a8f5 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -32,6 +32,7 @@
 #include "oat_file-inl.h"
 #include "oat_quick_method_header.h"
 #include "object_lock.h"
+#include "optimizing/register_allocator.h"
 #include "thread_list.h"
 
 namespace art {
@@ -110,7 +111,9 @@
       /* abort_on_hard_verifier_failure */ false,
       /* dump_cfg_file_name */ "",
       /* dump_cfg_append */ false,
-      /* force_determinism */ false));
+      /* force_determinism */ false,
+      RegisterAllocator::kRegisterAllocatorDefault,
+      /* passes_to_run */ nullptr));
   for (const std::string& argument : Runtime::Current()->GetCompilerOptions()) {
     compiler_options_->ParseCompilerOption(argument, Usage);
   }
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
index 3526802..524ce4d 100644
--- a/compiler/jni/jni_cfi_test.cc
+++ b/compiler/jni/jni_cfi_test.cc
@@ -19,10 +19,12 @@
 
 #include "arch/instruction_set.h"
 #include "base/arena_allocator.h"
+#include "base/enums.h"
 #include "cfi_test.h"
 #include "gtest/gtest.h"
 #include "jni/quick/calling_convention.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 
 #include "jni/jni_cfi_test_expected.inc"
 
@@ -36,9 +38,23 @@
   // Enable this flag to generate the expected outputs.
   static constexpr bool kGenerateExpected = false;
 
-  void TestImpl(InstructionSet isa, const char* isa_str,
+  void TestImpl(InstructionSet isa,
+                const char* isa_str,
                 const std::vector<uint8_t>& expected_asm,
                 const std::vector<uint8_t>& expected_cfi) {
+    if (Is64BitInstructionSet(isa)) {
+      TestImplSized<PointerSize::k64>(isa, isa_str, expected_asm, expected_cfi);
+    } else {
+      TestImplSized<PointerSize::k32>(isa, isa_str, expected_asm, expected_cfi);
+    }
+  }
+
+ private:
+  template <PointerSize kPointerSize>
+  void TestImplSized(InstructionSet isa,
+                     const char* isa_str,
+                     const std::vector<uint8_t>& expected_asm,
+                     const std::vector<uint8_t>& expected_cfi) {
     // Description of simple method.
     const bool is_static = true;
     const bool is_synchronized = false;
@@ -55,7 +71,8 @@
     ArrayRef<const ManagedRegister> callee_save_regs = jni_conv->CalleeSaveRegisters();
 
     // Assemble the method.
-    std::unique_ptr<Assembler> jni_asm(Assembler::Create(&arena, isa));
+    std::unique_ptr<JNIMacroAssembler<kPointerSize>> jni_asm(
+        JNIMacroAssembler<kPointerSize>::Create(&arena, isa));
     jni_asm->cfi().SetEnabled(true);
     jni_asm->BuildFrame(frame_size, mr_conv->MethodRegister(),
                         callee_save_regs, mr_conv->EntrySpills());
diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc
index 16b4386..da72c75 100644
--- a/compiler/jni/jni_cfi_test_expected.inc
+++ b/compiler/jni/jni_cfi_test_expected.inc
@@ -1,8 +1,7 @@
 static constexpr uint8_t expected_asm_kThumb2[] = {
     0x2D, 0xE9, 0xE0, 0x4D, 0x2D, 0xED, 0x10, 0x8A, 0x89, 0xB0, 0x00, 0x90,
-    0xCD, 0xF8, 0x84, 0x10, 0x8D, 0xED, 0x22, 0x0A, 0xCD, 0xF8, 0x8C, 0x20,
-    0xCD, 0xF8, 0x90, 0x30, 0x88, 0xB0, 0x08, 0xB0, 0x09, 0xB0, 0xBD, 0xEC,
-    0x10, 0x8A, 0xBD, 0xE8, 0xE0, 0x8D,
+    0x21, 0x91, 0x8D, 0xED, 0x22, 0x0A, 0x23, 0x92, 0x24, 0x93, 0x88, 0xB0,
+    0x08, 0xB0, 0x09, 0xB0, 0xBD, 0xEC, 0x10, 0x8A, 0xBD, 0xE8, 0xE0, 0x8D,
 };
 static constexpr uint8_t expected_cfi_kThumb2[] = {
     0x44, 0x0E, 0x1C, 0x85, 0x07, 0x86, 0x06, 0x87, 0x05, 0x88, 0x04, 0x8A,
@@ -11,7 +10,7 @@
     0x55, 0x12, 0x05, 0x56, 0x11, 0x05, 0x57, 0x10, 0x05, 0x58, 0x0F, 0x05,
     0x59, 0x0E, 0x05, 0x5A, 0x0D, 0x05, 0x5B, 0x0C, 0x05, 0x5C, 0x0B, 0x05,
     0x5D, 0x0A, 0x05, 0x5E, 0x09, 0x05, 0x5F, 0x08, 0x42, 0x0E, 0x80, 0x01,
-    0x54, 0x0E, 0xA0, 0x01, 0x42, 0x0E, 0x80, 0x01, 0x0A, 0x42, 0x0E, 0x5C,
+    0x4E, 0x0E, 0xA0, 0x01, 0x42, 0x0E, 0x80, 0x01, 0x0A, 0x42, 0x0E, 0x5C,
     0x44, 0x0E, 0x1C, 0x06, 0x50, 0x06, 0x51, 0x06, 0x52, 0x06, 0x53, 0x06,
     0x54, 0x06, 0x55, 0x06, 0x56, 0x06, 0x57, 0x06, 0x58, 0x06, 0x59, 0x06,
     0x5A, 0x06, 0x5B, 0x06, 0x5C, 0x06, 0x5D, 0x06, 0x5E, 0x06, 0x5F, 0x44,
@@ -47,38 +46,38 @@
 // 0x00000008: sub sp, sp, #36
 // 0x0000000a: .cfi_def_cfa_offset: 128
 // 0x0000000a: str r0, [sp, #0]
-// 0x0000000c: str.w r1, [sp, #132]
-// 0x00000010: vstr.f32 s0, [sp, #136]
-// 0x00000014: str.w r2, [sp, #140]
-// 0x00000018: str.w r3, [sp, #144]
-// 0x0000001c: sub sp, sp, #32
-// 0x0000001e: .cfi_def_cfa_offset: 160
-// 0x0000001e: add sp, sp, #32
-// 0x00000020: .cfi_def_cfa_offset: 128
-// 0x00000020: .cfi_remember_state
-// 0x00000020: add sp, sp, #36
-// 0x00000022: .cfi_def_cfa_offset: 92
-// 0x00000022: vpop.f32 {s16-s31}
-// 0x00000026: .cfi_def_cfa_offset: 28
-// 0x00000026: .cfi_restore_extended: r80
-// 0x00000026: .cfi_restore_extended: r81
-// 0x00000026: .cfi_restore_extended: r82
-// 0x00000026: .cfi_restore_extended: r83
-// 0x00000026: .cfi_restore_extended: r84
-// 0x00000026: .cfi_restore_extended: r85
-// 0x00000026: .cfi_restore_extended: r86
-// 0x00000026: .cfi_restore_extended: r87
-// 0x00000026: .cfi_restore_extended: r88
-// 0x00000026: .cfi_restore_extended: r89
-// 0x00000026: .cfi_restore_extended: r90
-// 0x00000026: .cfi_restore_extended: r91
-// 0x00000026: .cfi_restore_extended: r92
-// 0x00000026: .cfi_restore_extended: r93
-// 0x00000026: .cfi_restore_extended: r94
-// 0x00000026: .cfi_restore_extended: r95
-// 0x00000026: pop {r5, r6, r7, r8, r10, r11, pc}
-// 0x0000002a: .cfi_restore_state
-// 0x0000002a: .cfi_def_cfa_offset: 128
+// 0x0000000c: str r1, [sp, #132]
+// 0x0000000e: vstr.f32 s0, [sp, #136]
+// 0x00000012: str r2, [sp, #140]
+// 0x00000014: str r3, [sp, #144]
+// 0x00000016: sub sp, sp, #32
+// 0x00000018: .cfi_def_cfa_offset: 160
+// 0x00000018: add sp, sp, #32
+// 0x0000001a: .cfi_def_cfa_offset: 128
+// 0x0000001a: .cfi_remember_state
+// 0x0000001a: add sp, sp, #36
+// 0x0000001c: .cfi_def_cfa_offset: 92
+// 0x0000001c: vpop.f32 {s16-s31}
+// 0x00000020: .cfi_def_cfa_offset: 28
+// 0x00000020: .cfi_restore_extended: r80
+// 0x00000020: .cfi_restore_extended: r81
+// 0x00000020: .cfi_restore_extended: r82
+// 0x00000020: .cfi_restore_extended: r83
+// 0x00000020: .cfi_restore_extended: r84
+// 0x00000020: .cfi_restore_extended: r85
+// 0x00000020: .cfi_restore_extended: r86
+// 0x00000020: .cfi_restore_extended: r87
+// 0x00000020: .cfi_restore_extended: r88
+// 0x00000020: .cfi_restore_extended: r89
+// 0x00000020: .cfi_restore_extended: r90
+// 0x00000020: .cfi_restore_extended: r91
+// 0x00000020: .cfi_restore_extended: r92
+// 0x00000020: .cfi_restore_extended: r93
+// 0x00000020: .cfi_restore_extended: r94
+// 0x00000020: .cfi_restore_extended: r95
+// 0x00000020: pop {r5, r6, r7, r8, r10, r11, pc}
+// 0x00000024: .cfi_restore_state
+// 0x00000024: .cfi_def_cfa_offset: 128
 
 static constexpr uint8_t expected_asm_kArm64[] = {
     0xFF, 0x03, 0x03, 0xD1, 0xF3, 0x53, 0x06, 0xA9, 0xF5, 0x5B, 0x07, 0xA9,
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 277b794..f99f6a8 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -26,6 +26,7 @@
 #include "base/enums.h"
 #include "base/logging.h"
 #include "base/macros.h"
+#include "memory_region.h"
 #include "calling_convention.h"
 #include "class_linker.h"
 #include "compiled_method.h"
@@ -34,7 +35,9 @@
 #include "driver/compiler_options.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "jni_env_ext.h"
+#include "debug/dwarf/debug_frame_opcode_writer.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 #include "utils/managed_register.h"
 #include "utils/arm/managed_register_arm.h"
 #include "utils/arm64/managed_register_arm64.h"
@@ -47,22 +50,32 @@
 
 namespace art {
 
-static void CopyParameter(Assembler* jni_asm,
+template <PointerSize kPointerSize>
+static void CopyParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
                           ManagedRuntimeCallingConvention* mr_conv,
                           JniCallingConvention* jni_conv,
                           size_t frame_size, size_t out_arg_size);
-static void SetNativeParameter(Assembler* jni_asm,
+template <PointerSize kPointerSize>
+static void SetNativeParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
                                JniCallingConvention* jni_conv,
                                ManagedRegister in_reg);
 
+template <PointerSize kPointerSize>
+static std::unique_ptr<JNIMacroAssembler<kPointerSize>> GetMacroAssembler(
+    ArenaAllocator* arena, InstructionSet isa, const InstructionSetFeatures* features) {
+  return JNIMacroAssembler<kPointerSize>::Create(arena, isa, features);
+}
+
 // Generate the JNI bridge for the given method, general contract:
 // - Arguments are in the managed runtime format, either on stack or in
 //   registers, a reference to the method object is supplied as part of this
 //   convention.
 //
-CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
-                                            uint32_t access_flags, uint32_t method_idx,
-                                            const DexFile& dex_file) {
+template <PointerSize kPointerSize>
+static CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
+                                                   uint32_t access_flags,
+                                                   uint32_t method_idx,
+                                                   const DexFile& dex_file) {
   const bool is_native = (access_flags & kAccNative) != 0;
   CHECK(is_native);
   const bool is_static = (access_flags & kAccStatic) != 0;
@@ -70,7 +83,6 @@
   const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
   InstructionSet instruction_set = driver->GetInstructionSet();
   const InstructionSetFeatures* instruction_set_features = driver->GetInstructionSetFeatures();
-  const bool is_64_bit_target = Is64BitInstructionSet(instruction_set);
 
   ArenaPool pool;
   ArenaAllocator arena(&pool);
@@ -101,8 +113,8 @@
       &arena, is_static, is_synchronized, jni_end_shorty, instruction_set));
 
   // Assembler that holds generated instructions
-  std::unique_ptr<Assembler> jni_asm(
-      Assembler::Create(&arena, instruction_set, instruction_set_features));
+  std::unique_ptr<JNIMacroAssembler<kPointerSize>> jni_asm =
+      GetMacroAssembler<kPointerSize>(&arena, instruction_set, instruction_set_features);
   jni_asm->cfi().SetEnabled(driver->GetCompilerOptions().GenerateAnyDebugInfo());
 
   // Offsets into data structures
@@ -124,21 +136,12 @@
                            main_jni_conv->ReferenceCount(),
                            mr_conv->InterproceduralScratchRegister());
 
-  if (is_64_bit_target) {
-    __ CopyRawPtrFromThread64(main_jni_conv->HandleScopeLinkOffset(),
-                              Thread::TopHandleScopeOffset<PointerSize::k64>(),
+  __ CopyRawPtrFromThread(main_jni_conv->HandleScopeLinkOffset(),
+                          Thread::TopHandleScopeOffset<kPointerSize>(),
+                          mr_conv->InterproceduralScratchRegister());
+  __ StoreStackOffsetToThread(Thread::TopHandleScopeOffset<kPointerSize>(),
+                              main_jni_conv->HandleScopeOffset(),
                               mr_conv->InterproceduralScratchRegister());
-    __ StoreStackOffsetToThread64(Thread::TopHandleScopeOffset<PointerSize::k64>(),
-                                  main_jni_conv->HandleScopeOffset(),
-                                  mr_conv->InterproceduralScratchRegister());
-  } else {
-    __ CopyRawPtrFromThread32(main_jni_conv->HandleScopeLinkOffset(),
-                              Thread::TopHandleScopeOffset<PointerSize::k32>(),
-                              mr_conv->InterproceduralScratchRegister());
-    __ StoreStackOffsetToThread32(Thread::TopHandleScopeOffset<PointerSize::k32>(),
-                                  main_jni_conv->HandleScopeOffset(),
-                                  mr_conv->InterproceduralScratchRegister());
-  }
 
   // 3. Place incoming reference arguments into handle scope
   main_jni_conv->Next();  // Skip JNIEnv*
@@ -188,11 +191,7 @@
   }
 
   // 4. Write out the end of the quick frames.
-  if (is_64_bit_target) {
-    __ StoreStackPointerToThread64(Thread::TopOfManagedStackOffset<PointerSize::k64>());
-  } else {
-    __ StoreStackPointerToThread32(Thread::TopOfManagedStackOffset<PointerSize::k32>());
-  }
+  __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>());
 
   // 5. Move frame down to allow space for out going args.
   const size_t main_out_arg_size = main_jni_conv->OutArgSize();
@@ -202,10 +201,8 @@
   // Call the read barrier for the declaring class loaded from the method for a static call.
   // Note that we always have outgoing param space available for at least two params.
   if (kUseReadBarrier && is_static) {
-    ThreadOffset32 read_barrier32 =
-        QUICK_ENTRYPOINT_OFFSET(PointerSize::k32, pReadBarrierJni);
-    ThreadOffset64 read_barrier64 =
-        QUICK_ENTRYPOINT_OFFSET(PointerSize::k64, pReadBarrierJni);
+    ThreadOffset<kPointerSize> read_barrier = QUICK_ENTRYPOINT_OFFSET(kPointerSize,
+                                                                      pReadBarrierJni);
     main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
     main_jni_conv->Next();  // Skip JNIEnv.
     FrameOffset class_handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
@@ -225,21 +222,13 @@
     // Pass the current thread as the second argument and call.
     if (main_jni_conv->IsCurrentParamInRegister()) {
       __ GetCurrentThread(main_jni_conv->CurrentParamRegister());
-      if (is_64_bit_target) {
-        __ Call(main_jni_conv->CurrentParamRegister(), Offset(read_barrier64),
-                main_jni_conv->InterproceduralScratchRegister());
-      } else {
-        __ Call(main_jni_conv->CurrentParamRegister(), Offset(read_barrier32),
-                main_jni_conv->InterproceduralScratchRegister());
-      }
+      __ Call(main_jni_conv->CurrentParamRegister(),
+              Offset(read_barrier),
+              main_jni_conv->InterproceduralScratchRegister());
     } else {
       __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset(),
                           main_jni_conv->InterproceduralScratchRegister());
-      if (is_64_bit_target) {
-        __ CallFromThread64(read_barrier64, main_jni_conv->InterproceduralScratchRegister());
-      } else {
-        __ CallFromThread32(read_barrier32, main_jni_conv->InterproceduralScratchRegister());
-      }
+      __ CallFromThread(read_barrier, main_jni_conv->InterproceduralScratchRegister());
     }
     main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));  // Reset.
   }
@@ -248,14 +237,10 @@
   //    can occur. The result is the saved JNI local state that is restored by the exit call. We
   //    abuse the JNI calling convention here, that is guaranteed to support passing 2 pointer
   //    arguments.
-  ThreadOffset32 jni_start32 =
+  ThreadOffset<kPointerSize> jni_start =
       is_synchronized
-          ? QUICK_ENTRYPOINT_OFFSET(PointerSize::k32, pJniMethodStartSynchronized)
-          : QUICK_ENTRYPOINT_OFFSET(PointerSize::k32, pJniMethodStart);
-  ThreadOffset64 jni_start64 =
-      is_synchronized
-          ? QUICK_ENTRYPOINT_OFFSET(PointerSize::k64, pJniMethodStartSynchronized)
-          : QUICK_ENTRYPOINT_OFFSET(PointerSize::k64, pJniMethodStart);
+          ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStartSynchronized)
+          : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart);
   main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
   FrameOffset locked_object_handle_scope_offset(0);
   if (is_synchronized) {
@@ -276,21 +261,13 @@
   }
   if (main_jni_conv->IsCurrentParamInRegister()) {
     __ GetCurrentThread(main_jni_conv->CurrentParamRegister());
-    if (is_64_bit_target) {
-      __ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_start64),
-              main_jni_conv->InterproceduralScratchRegister());
-    } else {
-      __ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_start32),
-              main_jni_conv->InterproceduralScratchRegister());
-    }
+    __ Call(main_jni_conv->CurrentParamRegister(),
+            Offset(jni_start),
+            main_jni_conv->InterproceduralScratchRegister());
   } else {
     __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset(),
                         main_jni_conv->InterproceduralScratchRegister());
-    if (is_64_bit_target) {
-      __ CallFromThread64(jni_start64, main_jni_conv->InterproceduralScratchRegister());
-    } else {
-      __ CallFromThread32(jni_start32, main_jni_conv->InterproceduralScratchRegister());
-    }
+    __ CallFromThread(jni_start, main_jni_conv->InterproceduralScratchRegister());
   }
   if (is_synchronized) {  // Check for exceptions from monitor enter.
     __ ExceptionPoll(main_jni_conv->InterproceduralScratchRegister(), main_out_arg_size);
@@ -352,20 +329,12 @@
   if (main_jni_conv->IsCurrentParamInRegister()) {
     ManagedRegister jni_env = main_jni_conv->CurrentParamRegister();
     DCHECK(!jni_env.Equals(main_jni_conv->InterproceduralScratchRegister()));
-    if (is_64_bit_target) {
-      __ LoadRawPtrFromThread64(jni_env, Thread::JniEnvOffset<PointerSize::k64>());
-    } else {
-      __ LoadRawPtrFromThread32(jni_env, Thread::JniEnvOffset<PointerSize::k32>());
-    }
+    __ LoadRawPtrFromThread(jni_env, Thread::JniEnvOffset<kPointerSize>());
   } else {
     FrameOffset jni_env = main_jni_conv->CurrentParamStackOffset();
-    if (is_64_bit_target) {
-      __ CopyRawPtrFromThread64(jni_env, Thread::JniEnvOffset<PointerSize::k64>(),
-                                main_jni_conv->InterproceduralScratchRegister());
-    } else {
-      __ CopyRawPtrFromThread32(jni_env, Thread::JniEnvOffset<PointerSize::k32>(),
-                                main_jni_conv->InterproceduralScratchRegister());
-    }
+    __ CopyRawPtrFromThread(jni_env,
+                            Thread::JniEnvOffset<kPointerSize>(),
+                            main_jni_conv->InterproceduralScratchRegister());
   }
 
   // 9. Plant call to native code associated with method.
@@ -398,7 +367,9 @@
                                              + static_cast<size_t>(kMipsPointerSize));
     }
     CHECK_LT(return_save_location.Uint32Value(), frame_size + main_out_arg_size);
-    __ Store(return_save_location, main_jni_conv->ReturnRegister(), main_jni_conv->SizeOfReturnValue());
+    __ Store(return_save_location,
+             main_jni_conv->ReturnRegister(),
+             main_jni_conv->SizeOfReturnValue());
   }
 
   // Increase frame size for out args if needed by the end_jni_conv.
@@ -414,27 +385,18 @@
   }
   //     thread.
   end_jni_conv->ResetIterator(FrameOffset(end_out_arg_size));
-  ThreadOffset32 jni_end32(-1);
-  ThreadOffset64 jni_end64(-1);
+  ThreadOffset<kPointerSize> jni_end(-1);
   if (reference_return) {
     // Pass result.
-    jni_end32 = is_synchronized
-        ? QUICK_ENTRYPOINT_OFFSET(PointerSize::k32,
-                                  pJniMethodEndWithReferenceSynchronized)
-        : QUICK_ENTRYPOINT_OFFSET(PointerSize::k32, pJniMethodEndWithReference);
-    jni_end64 = is_synchronized
-        ? QUICK_ENTRYPOINT_OFFSET(PointerSize::k64,
-                                  pJniMethodEndWithReferenceSynchronized)
-        : QUICK_ENTRYPOINT_OFFSET(PointerSize::k64, pJniMethodEndWithReference);
+    jni_end = is_synchronized
+                  ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReferenceSynchronized)
+                  : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference);
     SetNativeParameter(jni_asm.get(), end_jni_conv.get(), end_jni_conv->ReturnRegister());
     end_jni_conv->Next();
   } else {
-    jni_end32 = is_synchronized
-        ? QUICK_ENTRYPOINT_OFFSET(PointerSize::k32, pJniMethodEndSynchronized)
-        : QUICK_ENTRYPOINT_OFFSET(PointerSize::k32, pJniMethodEnd);
-    jni_end64 = is_synchronized
-        ? QUICK_ENTRYPOINT_OFFSET(PointerSize::k64, pJniMethodEndSynchronized)
-        : QUICK_ENTRYPOINT_OFFSET(PointerSize::k64, pJniMethodEnd);
+    jni_end = is_synchronized
+                  ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndSynchronized)
+                  : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd);
   }
   // Pass saved local reference state.
   if (end_jni_conv->IsCurrentParamOnStack()) {
@@ -461,23 +423,13 @@
   }
   if (end_jni_conv->IsCurrentParamInRegister()) {
     __ GetCurrentThread(end_jni_conv->CurrentParamRegister());
-    if (is_64_bit_target) {
-      __ Call(end_jni_conv->CurrentParamRegister(), Offset(jni_end64),
-              end_jni_conv->InterproceduralScratchRegister());
-    } else {
-      __ Call(end_jni_conv->CurrentParamRegister(), Offset(jni_end32),
-              end_jni_conv->InterproceduralScratchRegister());
-    }
+    __ Call(end_jni_conv->CurrentParamRegister(),
+            Offset(jni_end),
+            end_jni_conv->InterproceduralScratchRegister());
   } else {
     __ GetCurrentThread(end_jni_conv->CurrentParamStackOffset(),
                         end_jni_conv->InterproceduralScratchRegister());
-    if (is_64_bit_target) {
-      __ CallFromThread64(ThreadOffset64(jni_end64),
-                          end_jni_conv->InterproceduralScratchRegister());
-    } else {
-      __ CallFromThread32(ThreadOffset32(jni_end32),
-                          end_jni_conv->InterproceduralScratchRegister());
-    }
+    __ CallFromThread(jni_end, end_jni_conv->InterproceduralScratchRegister());
   }
 
   // 13. Reload return value
@@ -517,7 +469,8 @@
 }
 
 // Copy a single parameter from the managed to the JNI calling convention.
-static void CopyParameter(Assembler* jni_asm,
+template <PointerSize kPointerSize>
+static void CopyParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
                           ManagedRuntimeCallingConvention* mr_conv,
                           JniCallingConvention* jni_conv,
                           size_t frame_size, size_t out_arg_size) {
@@ -606,7 +559,8 @@
   }
 }
 
-static void SetNativeParameter(Assembler* jni_asm,
+template <PointerSize kPointerSize>
+static void SetNativeParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
                                JniCallingConvention* jni_conv,
                                ManagedRegister in_reg) {
   if (jni_conv->IsCurrentParamOnStack()) {
@@ -621,7 +575,13 @@
 
 CompiledMethod* ArtQuickJniCompileMethod(CompilerDriver* compiler, uint32_t access_flags,
                                          uint32_t method_idx, const DexFile& dex_file) {
-  return ArtJniCompileMethodInternal(compiler, access_flags, method_idx, dex_file);
+  if (Is64BitInstructionSet(compiler->GetInstructionSet())) {
+    return ArtJniCompileMethodInternal<PointerSize::k64>(
+        compiler, access_flags, method_idx, dex_file);
+  } else {
+    return ArtJniCompileMethodInternal<PointerSize::k32>(
+        compiler, access_flags, method_idx, dex_file);
+  }
 }
 
 }  // namespace art
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index d4dd978..2471f79 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -31,10 +31,6 @@
 }
 
 uint32_t ArmBaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) {
-  // NOTE: The final thunk can be reserved from InitCodeMethodVisitor::EndClass() while it
-  // may be written early by WriteCodeMethodVisitor::VisitMethod() for a deduplicated chunk
-  // of code. To avoid any alignment discrepancies for the final chunk, we always align the
-  // offset after reserving of writing any chunk.
   uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
   bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset,
                                                 MethodReference(nullptr, 0u),
@@ -46,7 +42,7 @@
     unprocessed_patches_.clear();
 
     thunk_locations_.push_back(aligned_offset);
-    offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), instruction_set_);
+    offset = aligned_offset + thunk_code_.size();
   }
   return offset;
 }
@@ -65,13 +61,7 @@
     if (UNLIKELY(!WriteRelCallThunk(out, ArrayRef<const uint8_t>(thunk_code_)))) {
       return 0u;
     }
-    uint32_t thunk_end_offset = aligned_offset + thunk_code_.size();
-    // Align after writing chunk, see the ReserveSpace() above.
-    offset = CompiledMethod::AlignCode(thunk_end_offset, instruction_set_);
-    aligned_code_delta = offset - thunk_end_offset;
-    if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) {
-      return 0u;
-    }
+    offset = aligned_offset + thunk_code_.size();
   }
   return offset;
 }
@@ -92,7 +82,7 @@
                                                       MethodReference method_ref,
                                                       uint32_t max_extra_space) {
   uint32_t quick_code_size = compiled_method->GetQuickCode().size();
-  uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader);
+  uint32_t quick_code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader));
   uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size);
   // Adjust for extra space required by the subclass.
   next_aligned_offset = compiled_method->AlignCode(next_aligned_offset + max_extra_space);
@@ -106,9 +96,9 @@
     if (needs_thunk) {
       // A single thunk will cover all pending patches.
       unprocessed_patches_.clear();
-      uint32_t thunk_location = compiled_method->AlignCode(offset);
+      uint32_t thunk_location = CompiledMethod::AlignCode(offset, instruction_set_);
       thunk_locations_.push_back(thunk_location);
-      offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), instruction_set_);
+      offset = thunk_location + thunk_code_.size();
     }
   }
   for (const LinkerPatch& patch : compiled_method->GetPatches()) {
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
index a8078e3..eace3d4 100644
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -48,18 +48,18 @@
                              const ArrayRef<const LinkerPatch>& method3_patches,
                              uint32_t distance_without_thunks) {
     CHECK_EQ(distance_without_thunks % kArmAlignment, 0u);
-    const uint32_t method1_offset =
-        CompiledCode::AlignCode(kTrampolineSize, kThumb2) + sizeof(OatQuickMethodHeader);
+    uint32_t method1_offset =
+        kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
     AddCompiledMethod(MethodRef(1u), method1_code, method1_patches);
 
     // We want to put the method3 at a very precise offset.
     const uint32_t method3_offset = method1_offset + distance_without_thunks;
-    CHECK_ALIGNED(method3_offset - sizeof(OatQuickMethodHeader), kArmAlignment);
+    CHECK_ALIGNED(method3_offset, kArmAlignment);
 
     // Calculate size of method2 so that we put method3 at the correct place.
+    const uint32_t method1_end = method1_offset + method1_code.size();
     const uint32_t method2_offset =
-        CompiledCode::AlignCode(method1_offset + method1_code.size(), kThumb2) +
-        sizeof(OatQuickMethodHeader);
+        method1_end + CodeAlignmentSize(method1_end) + sizeof(OatQuickMethodHeader);
     const uint32_t method2_size = (method3_offset - sizeof(OatQuickMethodHeader) - method2_offset);
     std::vector<uint8_t> method2_raw_code(method2_size);
     ArrayRef<const uint8_t> method2_code(method2_raw_code);
@@ -78,8 +78,11 @@
     if (result3.second == method3_offset + 1 /* thumb mode */) {
       return false;  // No thunk.
     } else {
-      uint32_t aligned_thunk_size = CompiledCode::AlignCode(ThunkSize(), kThumb2);
-      CHECK_EQ(result3.second, method3_offset + aligned_thunk_size + 1 /* thumb mode */);
+      uint32_t thunk_end =
+          CompiledCode::AlignCode(method3_offset - sizeof(OatQuickMethodHeader), kThumb2) +
+          ThunkSize();
+      uint32_t header_offset = thunk_end + CodeAlignmentSize(thunk_end);
+      CHECK_EQ(result3.second, header_offset + sizeof(OatQuickMethodHeader) + 1 /* thumb mode */);
       return true;   // Thunk present.
     }
   }
@@ -352,9 +355,12 @@
 
   uint32_t method1_offset = GetMethodOffset(1u);
   uint32_t method3_offset = GetMethodOffset(3u);
+  ASSERT_TRUE(IsAligned<kArmAlignment>(method3_offset));
   uint32_t method3_header_offset = method3_offset - sizeof(OatQuickMethodHeader);
-  ASSERT_TRUE(IsAligned<kArmAlignment>(method3_header_offset));
-  uint32_t thunk_offset = method3_header_offset - CompiledCode::AlignCode(ThunkSize(), kThumb2);
+  uint32_t thunk_offset =
+      RoundDown(method3_header_offset - ThunkSize(), GetInstructionSetAlignment(kThumb2));
+  DCHECK_EQ(thunk_offset + ThunkSize() + CodeAlignmentSize(thunk_offset + ThunkSize()),
+            method3_header_offset);
   ASSERT_TRUE(IsAligned<kArmAlignment>(thunk_offset));
   uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1 + 4u /* PC adjustment */);
   ASSERT_EQ(diff & 1u, 0u);
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index fdd14be..4c8788e 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -83,7 +83,7 @@
 
   // Now that we have the actual offset where the code will be placed, locate the ADRP insns
   // that actually require the thunk.
-  uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader);
+  uint32_t quick_code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader));
   ArrayRef<const uint8_t> code = compiled_method->GetQuickCode();
   uint32_t thunk_offset = compiled_method->AlignCode(quick_code_offset + code.size());
   DCHECK(compiled_method != nullptr);
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
index 09729fd..573de73 100644
--- a/compiler/linker/arm64/relative_patcher_arm64_test.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc
@@ -67,36 +67,39 @@
                                  const ArrayRef<const LinkerPatch>& last_method_patches,
                                  uint32_t distance_without_thunks) {
     CHECK_EQ(distance_without_thunks % kArm64Alignment, 0u);
-    const uint32_t method1_offset =
-        CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader);
+    uint32_t method1_offset =
+        kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
     AddCompiledMethod(MethodRef(1u), method1_code, method1_patches);
-    const uint32_t gap_start =
-        CompiledCode::AlignCode(method1_offset + method1_code.size(), kArm64);
+    const uint32_t gap_start = method1_offset + method1_code.size();
 
     // We want to put the method3 at a very precise offset.
     const uint32_t last_method_offset = method1_offset + distance_without_thunks;
+    CHECK_ALIGNED(last_method_offset, kArm64Alignment);
     const uint32_t gap_end = last_method_offset - sizeof(OatQuickMethodHeader);
-    CHECK_ALIGNED(gap_end, kArm64Alignment);
 
-    // Fill the gap with intermediate methods in chunks of 2MiB and the last in [2MiB, 4MiB).
+    // Fill the gap with intermediate methods in chunks of 2MiB and the first in [2MiB, 4MiB).
     // (This allows deduplicating the small chunks to avoid using 256MiB of memory for +-128MiB
-    // offsets by this test.)
+    // offsets by this test. Making the first chunk bigger makes it easy to give all intermediate
+    // methods the same alignment of the end, so the thunk insertion adds a predictable size as
+    // long as it's after the first chunk.)
     uint32_t method_idx = 2u;
     constexpr uint32_t kSmallChunkSize = 2 * MB;
     std::vector<uint8_t> gap_code;
-    size_t gap_size = gap_end - gap_start;
-    for (; gap_size >= 2u * kSmallChunkSize; gap_size -= kSmallChunkSize) {
-      uint32_t chunk_code_size = kSmallChunkSize - sizeof(OatQuickMethodHeader);
+    uint32_t gap_size = gap_end - gap_start;
+    uint32_t num_small_chunks = std::max(gap_size / kSmallChunkSize, 1u) - 1u;
+    uint32_t chunk_start = gap_start;
+    uint32_t chunk_size = gap_size - num_small_chunks * kSmallChunkSize;
+    for (uint32_t i = 0; i <= num_small_chunks; ++i) {  // num_small_chunks+1 iterations.
+      uint32_t chunk_code_size =
+          chunk_size - CodeAlignmentSize(chunk_start) - sizeof(OatQuickMethodHeader);
       gap_code.resize(chunk_code_size, 0u);
       AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code),
                         ArrayRef<const LinkerPatch>());
       method_idx += 1u;
+      chunk_start += chunk_size;
+      chunk_size = kSmallChunkSize;  // For all but the first chunk.
+      DCHECK_EQ(CodeAlignmentSize(gap_end), CodeAlignmentSize(chunk_start));
     }
-    uint32_t chunk_code_size = gap_size - sizeof(OatQuickMethodHeader);
-    gap_code.resize(chunk_code_size, 0u);
-    AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code),
-                      ArrayRef<const LinkerPatch>());
-    method_idx += 1u;
 
     // Add the last method and link
     AddCompiledMethod(MethodRef(method_idx), last_method_code, last_method_patches);
@@ -109,8 +112,9 @@
     // There may be a thunk before method2.
     if (last_result.second != last_method_offset) {
       // Thunk present. Check that there's only one.
-      uint32_t aligned_thunk_size = CompiledCode::AlignCode(ThunkSize(), kArm64);
-      CHECK_EQ(last_result.second, last_method_offset + aligned_thunk_size);
+      uint32_t thunk_end = CompiledCode::AlignCode(gap_end, kArm64) + ThunkSize();
+      uint32_t header_offset = thunk_end + CodeAlignmentSize(thunk_end);
+      CHECK_EQ(last_result.second, header_offset + sizeof(OatQuickMethodHeader));
     }
     return method_idx;
   }
@@ -341,7 +345,7 @@
                         uint32_t dex_cache_arrays_begin,
                         uint32_t element_offset) {
     uint32_t method1_offset =
-        CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader);
+        kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
     ASSERT_LT(method1_offset, adrp_offset);
     CHECK_ALIGNED(adrp_offset, 4u);
     uint32_t num_nops = (adrp_offset - method1_offset) / 4u;
@@ -391,7 +395,7 @@
                         bool has_thunk,
                         uint32_t string_offset) {
     uint32_t method1_offset =
-        CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader);
+        kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
     ASSERT_LT(method1_offset, adrp_offset);
     CHECK_ALIGNED(adrp_offset, 4u);
     uint32_t num_nops = (adrp_offset - method1_offset) / 4u;
@@ -614,10 +618,12 @@
 
   uint32_t method1_offset = GetMethodOffset(1u);
   uint32_t last_method_offset = GetMethodOffset(last_method_idx);
+  ASSERT_TRUE(IsAligned<kArm64Alignment>(last_method_offset));
   uint32_t last_method_header_offset = last_method_offset - sizeof(OatQuickMethodHeader);
-  ASSERT_TRUE(IsAligned<kArm64Alignment>(last_method_header_offset));
-  uint32_t thunk_offset = last_method_header_offset - CompiledCode::AlignCode(ThunkSize(), kArm64);
-  ASSERT_TRUE(IsAligned<kArm64Alignment>(thunk_offset));
+  uint32_t thunk_offset =
+      RoundDown(last_method_header_offset - ThunkSize(), GetInstructionSetAlignment(kArm64));
+  DCHECK_EQ(thunk_offset + ThunkSize() + CodeAlignmentSize(thunk_offset + ThunkSize()),
+            last_method_header_offset);
   uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1);
   CHECK_ALIGNED(diff, 4u);
   ASSERT_LT(diff, 128 * MB);
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index ec69107..d21f33e 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -98,6 +98,14 @@
         patches));
   }
 
+  uint32_t CodeAlignmentSize(uint32_t header_offset_to_align) {
+    // We want to align the code rather than the preheader.
+    uint32_t unaligned_code_offset = header_offset_to_align + sizeof(OatQuickMethodHeader);
+    uint32_t aligned_code_offset =
+        CompiledMethod::AlignCode(unaligned_code_offset, instruction_set_);
+    return aligned_code_offset - unaligned_code_offset;
+  }
+
   void Link() {
     // Reserve space.
     static_assert(kTrampolineOffset == 0u, "Unexpected trampoline offset.");
@@ -106,9 +114,8 @@
     for (auto& compiled_method : compiled_methods_) {
       offset = patcher_->ReserveSpace(offset, compiled_method.get(), compiled_method_refs_[idx]);
 
-      uint32_t aligned_offset = compiled_method->AlignCode(offset);
-      uint32_t aligned_code_delta = aligned_offset - offset;
-      offset += aligned_code_delta;
+      uint32_t alignment_size = CodeAlignmentSize(offset);
+      offset += alignment_size;
 
       offset += sizeof(OatQuickMethodHeader);
       uint32_t quick_code_offset = offset + compiled_method->CodeDelta();
@@ -136,11 +143,10 @@
     for (auto& compiled_method : compiled_methods_) {
       offset = patcher_->WriteThunks(&out_, offset);
 
-      uint32_t aligned_offset = compiled_method->AlignCode(offset);
-      uint32_t aligned_code_delta = aligned_offset - offset;
-      CHECK_LE(aligned_code_delta, sizeof(kPadding));
-      out_.WriteFully(kPadding, aligned_code_delta);
-      offset += aligned_code_delta;
+      uint32_t alignment_size = CodeAlignmentSize(offset);
+      CHECK_LE(alignment_size, sizeof(kPadding));
+      out_.WriteFully(kPadding, alignment_size);
+      offset += alignment_size;
 
       out_.WriteFully(dummy_header, sizeof(OatQuickMethodHeader));
       offset += sizeof(OatQuickMethodHeader);
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index f20c715..8273b15 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -87,6 +87,13 @@
   OatHeader* const oat_header_;
 };
 
+inline uint32_t CodeAlignmentSize(uint32_t header_offset, const CompiledMethod& compiled_method) {
+  // We want to align the code rather than the preheader.
+  uint32_t unaligned_code_offset = header_offset + sizeof(OatQuickMethodHeader);
+  uint32_t aligned_code_offset =  compiled_method.AlignCode(unaligned_code_offset);
+  return aligned_code_offset - unaligned_code_offset;
+}
+
 }  // anonymous namespace
 
 // Defines the location of the raw dex file to write.
@@ -817,8 +824,8 @@
                               uint32_t thumb_offset) {
     offset_ = writer_->relative_patcher_->ReserveSpace(
         offset_, compiled_method, MethodReference(dex_file_, it.GetMemberIndex()));
-    offset_ = compiled_method->AlignCode(offset_);
-    DCHECK_ALIGNED_PARAM(offset_,
+    offset_ += CodeAlignmentSize(offset_, *compiled_method);
+    DCHECK_ALIGNED_PARAM(offset_ + sizeof(OatQuickMethodHeader),
                          GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
     return offset_ + sizeof(OatQuickMethodHeader) + thumb_offset;
   }
@@ -1011,17 +1018,16 @@
           ReportWriteFailure("relative call thunk", it);
           return false;
         }
-        uint32_t aligned_offset = compiled_method->AlignCode(offset_);
-        uint32_t aligned_code_delta = aligned_offset - offset_;
-        if (aligned_code_delta != 0) {
-          if (!writer_->WriteCodeAlignment(out, aligned_code_delta)) {
+        uint32_t alignment_size = CodeAlignmentSize(offset_, *compiled_method);
+        if (alignment_size != 0) {
+          if (!writer_->WriteCodeAlignment(out, alignment_size)) {
             ReportWriteFailure("code alignment padding", it);
             return false;
           }
-          offset_ += aligned_code_delta;
+          offset_ += alignment_size;
           DCHECK_OFFSET_();
         }
-        DCHECK_ALIGNED_PARAM(offset_,
+        DCHECK_ALIGNED_PARAM(offset_ + sizeof(OatQuickMethodHeader),
                              GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
         DCHECK_EQ(method_offsets.code_offset_,
                   offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta())
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 4a4b98c..5152075 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -765,16 +765,24 @@
   LocationSummary* locations = instruction->GetLocations();
 
   uint32_t register_mask = locations->GetRegisterMask();
-  if (locations->OnlyCallsOnSlowPath()) {
-    // In case of slow path, we currently set the location of caller-save registers
-    // to register (instead of their stack location when pushed before the slow-path
-    // call). Therefore register_mask contains both callee-save and caller-save
-    // registers that hold objects. We must remove the caller-save from the mask, since
-    // they will be overwritten by the callee.
-    register_mask &= core_callee_save_mask_;
+  if (instruction->IsSuspendCheck()) {
+    // Suspend check has special ABI that saves the caller-save registers in callee,
+    // so we want to emit stack maps containing the registers.
+    // TODO: Register allocator still reserves space for the caller-save registers.
+    // We should add slow-path-specific caller-save information into LocationSummary
+    // and refactor the code here as well as in the register allocator to use it.
+  } else {
+    if (locations->OnlyCallsOnSlowPath()) {
+      // In case of slow path, we currently set the location of caller-save registers
+      // to register (instead of their stack location when pushed before the slow-path
+      // call). Therefore register_mask contains both callee-save and caller-save
+      // registers that hold objects. We must remove the caller-save from the mask, since
+      // they will be overwritten by the callee.
+      register_mask &= core_callee_save_mask_;
+    }
+    // The register mask must be a subset of callee-save registers.
+    DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask);
   }
-  // The register mask must be a subset of callee-save registers.
-  DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask);
   stack_map_stream_.BeginStackMapEntry(outer_dex_pc,
                                        native_pc,
                                        register_mask,
@@ -1174,7 +1182,7 @@
         << "instruction->DebugName()=" << instruction->DebugName()
         << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString();
   } else {
-    DCHECK(instruction->GetLocations()->OnlyCallsOnSlowPath() || slow_path->IsFatal())
+    DCHECK(instruction->GetLocations()->CallsOnSlowPath() || slow_path->IsFatal())
         << "instruction->DebugName()=" << instruction->DebugName()
         << " slow_path->GetDescription()=" << slow_path->GetDescription();
     DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) ||
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index ad02ecf..fd396c4 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -340,6 +340,9 @@
   bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
   bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
 
+  bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; }
+  bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; }
+
   // Helper that returns the pointer offset of an index in an object array.
   // Note: this method assumes we always have the same pointer size, regardless
   // of the architecture.
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index c18b793..ab85c12 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -119,11 +119,9 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, instruction_->GetLocations());
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
-    RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ b(GetReturnLabel());
     } else {
@@ -434,6 +432,11 @@
            (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
     __ Bind(GetEntryLabel());
     // No need to save live registers; it's taken care of by the
@@ -514,6 +517,11 @@
            (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -1289,6 +1297,44 @@
 void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
+void InstructionCodeGeneratorARM::GenerateVcmp(HInstruction* instruction) {
+  Primitive::Type type = instruction->InputAt(0)->GetType();
+  Location lhs_loc = instruction->GetLocations()->InAt(0);
+  Location rhs_loc = instruction->GetLocations()->InAt(1);
+  if (rhs_loc.IsConstant()) {
+    // 0.0 is the only immediate that can be encoded directly in
+    // a VCMP instruction.
+    //
+    // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
+    // specify that in a floating-point comparison, positive zero
+    // and negative zero are considered equal, so we can use the
+    // literal 0.0 for both cases here.
+    //
+    // Note however that some methods (Float.equal, Float.compare,
+    // Float.compareTo, Double.equal, Double.compare,
+    // Double.compareTo, Math.max, Math.min, StrictMath.max,
+    // StrictMath.min) consider 0.0 to be (strictly) greater than
+    // -0.0. So if we ever translate calls to these methods into a
+    // HCompare instruction, we must handle the -0.0 case with
+    // care here.
+    DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
+    if (type == Primitive::kPrimFloat) {
+      __ vcmpsz(lhs_loc.AsFpuRegister<SRegister>());
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimDouble);
+      __ vcmpdz(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()));
+    }
+  } else {
+    if (type == Primitive::kPrimFloat) {
+      __ vcmps(lhs_loc.AsFpuRegister<SRegister>(), rhs_loc.AsFpuRegister<SRegister>());
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimDouble);
+      __ vcmpd(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()),
+               FromLowSToD(rhs_loc.AsFpuRegisterPairLow<SRegister>()));
+    }
+  }
+}
+
 void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond,
                                                   Label* true_label,
                                                   Label* false_label ATTRIBUTE_UNUSED) {
@@ -1389,22 +1435,14 @@
   Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
   Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
 
-  LocationSummary* locations = condition->GetLocations();
-  Location left = locations->InAt(0);
-  Location right = locations->InAt(1);
-
   Primitive::Type type = condition->InputAt(0)->GetType();
   switch (type) {
     case Primitive::kPrimLong:
       GenerateLongComparesAndJumps(condition, true_target, false_target);
       break;
     case Primitive::kPrimFloat:
-      __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>());
-      GenerateFPJumps(condition, true_target, false_target);
-      break;
     case Primitive::kPrimDouble:
-      __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()),
-               FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
+      GenerateVcmp(condition);
       GenerateFPJumps(condition, true_target, false_target);
       break;
     default:
@@ -1585,7 +1623,7 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
       if (!cond->IsEmittedAtUseSite()) {
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       }
@@ -1632,12 +1670,8 @@
       GenerateLongComparesAndJumps(cond, &true_label, &false_label);
       break;
     case Primitive::kPrimFloat:
-      __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>());
-      GenerateFPJumps(cond, &true_label, &false_label);
-      break;
     case Primitive::kPrimDouble:
-      __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()),
-               FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
+      GenerateVcmp(cond);
       GenerateFPJumps(cond, &true_label, &false_label);
       break;
   }
@@ -2497,7 +2531,7 @@
 
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, ArmEncodableConstantOrRegister(add->InputAt(1), ADD));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -2534,13 +2568,18 @@
       break;
 
     case Primitive::kPrimLong: {
-      DCHECK(second.IsRegisterPair());
-      __ adds(out.AsRegisterPairLow<Register>(),
-              first.AsRegisterPairLow<Register>(),
-              ShifterOperand(second.AsRegisterPairLow<Register>()));
-      __ adc(out.AsRegisterPairHigh<Register>(),
-             first.AsRegisterPairHigh<Register>(),
-             ShifterOperand(second.AsRegisterPairHigh<Register>()));
+      if (second.IsConstant()) {
+        uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
+        GenerateAddLongConst(out, first, value);
+      } else {
+        DCHECK(second.IsRegisterPair());
+        __ adds(out.AsRegisterPairLow<Register>(),
+                first.AsRegisterPairLow<Register>(),
+                ShifterOperand(second.AsRegisterPairLow<Register>()));
+        __ adc(out.AsRegisterPairHigh<Register>(),
+               first.AsRegisterPairHigh<Register>(),
+               ShifterOperand(second.AsRegisterPairHigh<Register>()));
+      }
       break;
     }
 
@@ -2574,7 +2613,7 @@
 
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, ArmEncodableConstantOrRegister(sub->InputAt(1), SUB));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -2610,13 +2649,18 @@
     }
 
     case Primitive::kPrimLong: {
-      DCHECK(second.IsRegisterPair());
-      __ subs(out.AsRegisterPairLow<Register>(),
-              first.AsRegisterPairLow<Register>(),
-              ShifterOperand(second.AsRegisterPairLow<Register>()));
-      __ sbc(out.AsRegisterPairHigh<Register>(),
-             first.AsRegisterPairHigh<Register>(),
-             ShifterOperand(second.AsRegisterPairHigh<Register>()));
+      if (second.IsConstant()) {
+        uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
+        GenerateAddLongConst(out, first, -value);
+      } else {
+        DCHECK(second.IsRegisterPair());
+        __ subs(out.AsRegisterPairLow<Register>(),
+                first.AsRegisterPairLow<Register>(),
+                ShifterOperand(second.AsRegisterPairLow<Register>()));
+        __ sbc(out.AsRegisterPairHigh<Register>(),
+               first.AsRegisterPairHigh<Register>(),
+               ShifterOperand(second.AsRegisterPairHigh<Register>()));
+      }
       break;
     }
 
@@ -3654,7 +3698,7 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1)));
       locations->SetOut(Location::RequiresRegister());
       break;
     }
@@ -3699,12 +3743,7 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       __ LoadImmediate(out, 0);
-      if (type == Primitive::kPrimFloat) {
-        __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>());
-      } else {
-        __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()),
-                 FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
-      }
+      GenerateVcmp(compare);
       __ vmstat();  // transfer FP status register to ARM APSR.
       less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
       break;
@@ -3998,6 +4037,17 @@
   }
 }
 
+Location LocationsBuilderARM::ArithmeticZeroOrFpuRegister(HInstruction* input) {
+  DCHECK(input->GetType() == Primitive::kPrimDouble || input->GetType() == Primitive::kPrimFloat)
+      << input->GetType();
+  if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) ||
+      (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) {
+    return Location::ConstantLocation(input->AsConstant());
+  } else {
+    return Location::RequiresFpuRegister();
+  }
+}
+
 Location LocationsBuilderARM::ArmEncodableConstantOrRegister(HInstruction* constant,
                                                              Opcode opcode) {
   DCHECK(!Primitive::IsFloatingPointType(constant->GetType()));
@@ -4012,31 +4062,51 @@
                                                        Opcode opcode) {
   uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst));
   if (Primitive::Is64BitType(input_cst->GetType())) {
-    return CanEncodeConstantAsImmediate(Low32Bits(value), opcode) &&
-        CanEncodeConstantAsImmediate(High32Bits(value), opcode);
+    Opcode high_opcode = opcode;
+    SetCc low_set_cc = kCcDontCare;
+    switch (opcode) {
+      case SUB:
+        // Flip the operation to an ADD.
+        value = -value;
+        opcode = ADD;
+        FALLTHROUGH_INTENDED;
+      case ADD:
+        if (Low32Bits(value) == 0u) {
+          return CanEncodeConstantAsImmediate(High32Bits(value), opcode, kCcDontCare);
+        }
+        high_opcode = ADC;
+        low_set_cc = kCcSet;
+        break;
+      default:
+        break;
+    }
+    return CanEncodeConstantAsImmediate(Low32Bits(value), opcode, low_set_cc) &&
+        CanEncodeConstantAsImmediate(High32Bits(value), high_opcode, kCcDontCare);
   } else {
     return CanEncodeConstantAsImmediate(Low32Bits(value), opcode);
   }
 }
 
-bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode) {
+bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value,
+                                                       Opcode opcode,
+                                                       SetCc set_cc) {
   ShifterOperand so;
   ArmAssembler* assembler = codegen_->GetAssembler();
-  if (assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, opcode, value, &so)) {
+  if (assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, opcode, value, set_cc, &so)) {
     return true;
   }
   Opcode neg_opcode = kNoOperand;
   switch (opcode) {
-    case AND:
-      neg_opcode = BIC;
-      break;
-    case ORR:
-      neg_opcode = ORN;
-      break;
+    case AND: neg_opcode = BIC; value = ~value; break;
+    case ORR: neg_opcode = ORN; value = ~value; break;
+    case ADD: neg_opcode = SUB; value = -value; break;
+    case ADC: neg_opcode = SBC; value = ~value; break;
+    case SUB: neg_opcode = ADD; value = -value; break;
+    case SBC: neg_opcode = ADC; value = ~value; break;
     default:
       return false;
   }
-  return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, ~value, &so);
+  return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, value, set_cc, &so);
 }
 
 void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
@@ -4437,8 +4507,6 @@
   Primitive::Type type = instruction->GetType();
   HInstruction* array_instr = instruction->GetArray();
   bool has_intermediate_address = array_instr->IsIntermediateAddress();
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
 
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -4473,6 +4541,11 @@
     }
 
     case Primitive::kPrimNot: {
+      // The read barrier instrumentation of object ArrayGet
+      // instructions does not support the HIntermediateAddress
+      // instruction.
+      DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
+
       static_assert(
           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
@@ -4615,8 +4688,6 @@
   Location value_loc = locations->InAt(2);
   HInstruction* array_instr = instruction->GetArray();
   bool has_intermediate_address = array_instr->IsIntermediateAddress();
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
@@ -4881,8 +4952,6 @@
 }
 
 void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) {
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!kEmitCompilerReadBarrier);
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
 
@@ -4897,9 +4966,6 @@
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
 
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!kEmitCompilerReadBarrier);
-
   if (second.IsRegister()) {
     __ add(out.AsRegister<Register>(),
            first.AsRegister<Register>(),
@@ -6166,6 +6232,34 @@
   __ eor(out, first, ShifterOperand(value));
 }
 
+void InstructionCodeGeneratorARM::GenerateAddLongConst(Location out,
+                                                       Location first,
+                                                       uint64_t value) {
+  Register out_low = out.AsRegisterPairLow<Register>();
+  Register out_high = out.AsRegisterPairHigh<Register>();
+  Register first_low = first.AsRegisterPairLow<Register>();
+  Register first_high = first.AsRegisterPairHigh<Register>();
+  uint32_t value_low = Low32Bits(value);
+  uint32_t value_high = High32Bits(value);
+  if (value_low == 0u) {
+    if (out_low != first_low) {
+      __ mov(out_low, ShifterOperand(first_low));
+    }
+    __ AddConstant(out_high, first_high, value_high);
+    return;
+  }
+  __ AddConstantSetFlags(out_low, first_low, value_low);
+  ShifterOperand so;
+  if (__ ShifterOperandCanHold(out_high, first_high, ADC, value_high, kCcDontCare, &so)) {
+    __ adc(out_high, first_high, so);
+  } else if (__ ShifterOperandCanHold(out_low, first_low, SBC, ~value_high, kCcDontCare, &so)) {
+    __ sbc(out_high, first_high, so);
+  } else {
+    LOG(FATAL) << "Unexpected constant " << value_high;
+    UNREACHABLE();
+  }
+}
+
 void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   Location first = locations->InAt(0);
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index f9fcabd..5d9b2dc 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -180,9 +180,10 @@
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
 
+  Location ArithmeticZeroOrFpuRegister(HInstruction* input);
   Location ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode);
   bool CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode);
-  bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode);
+  bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode, SetCc set_cc = kCcDontCare);
 
   CodeGeneratorARM* const codegen_;
   InvokeDexCallingConventionVisitorARM parameter_visitor_;
@@ -219,6 +220,7 @@
   void GenerateAndConst(Register out, Register first, uint32_t value);
   void GenerateOrrConst(Register out, Register first, uint32_t value);
   void GenerateEorConst(Register out, Register first, uint32_t value);
+  void GenerateAddLongConst(Location out, Location first, uint64_t value);
   void HandleBitwiseOperation(HBinaryOperation* operation);
   void HandleCondition(HCondition* condition);
   void HandleIntegerRotate(LocationSummary* locations);
@@ -281,6 +283,7 @@
   void GenerateCompareTestAndBranch(HCondition* condition,
                                     Label* true_target,
                                     Label* false_target);
+  void GenerateVcmp(HInstruction* instruction);
   void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
   void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 115cee6..9ceb310 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -398,11 +398,9 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, instruction_->GetLocations());
     arm64_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
-    RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ B(GetReturnLabel());
     } else {
@@ -600,6 +598,11 @@
            (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
     __ Bind(GetEntryLabel());
     // No need to save live registers; it's taken care of by the
@@ -609,6 +612,8 @@
     DCHECK_NE(obj_.reg(), LR);
     DCHECK_NE(obj_.reg(), WSP);
     DCHECK_NE(obj_.reg(), WZR);
+    // WIP0 is used by the slow path as a temp, it can not be the object register.
+    DCHECK_NE(obj_.reg(), IP0);
     DCHECK(0 <= obj_.reg() && obj_.reg() < kNumberOfWRegisters) << obj_.reg();
     // "Compact" slow path, saving two moves.
     //
@@ -680,7 +685,9 @@
            (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
-    // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
     DCHECK(!(instruction_->IsArrayGet() &&
              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
@@ -751,10 +758,7 @@
                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
             << instruction_->AsInvoke()->GetIntrinsic();
         DCHECK_EQ(offset_, 0U);
-        DCHECK(index_.IsRegisterPair());
-        // UnsafeGet's offset location is a register pair, the low
-        // part contains the correct offset.
-        index = index_.ToLow();
+        DCHECK(index_.IsRegister());
       }
     }
 
@@ -1284,17 +1288,21 @@
       UseScratchRegisterScope temps(GetVIXLAssembler());
       HConstant* src_cst = source.GetConstant();
       CPURegister temp;
-      if (src_cst->IsIntConstant() || src_cst->IsNullConstant()) {
-        temp = temps.AcquireW();
-      } else if (src_cst->IsLongConstant()) {
-        temp = temps.AcquireX();
-      } else if (src_cst->IsFloatConstant()) {
-        temp = temps.AcquireS();
+      if (src_cst->IsZeroBitPattern()) {
+        temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant()) ? xzr : wzr;
       } else {
-        DCHECK(src_cst->IsDoubleConstant());
-        temp = temps.AcquireD();
+        if (src_cst->IsIntConstant()) {
+          temp = temps.AcquireW();
+        } else if (src_cst->IsLongConstant()) {
+          temp = temps.AcquireX();
+        } else if (src_cst->IsFloatConstant()) {
+          temp = temps.AcquireS();
+        } else {
+          DCHECK(src_cst->IsDoubleConstant());
+          temp = temps.AcquireD();
+        }
+        MoveConstant(temp, src_cst);
       }
-      MoveConstant(temp, src_cst);
       __ Str(temp, StackOperandFrom(destination));
     } else {
       DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
@@ -1982,8 +1990,6 @@
 }
 
 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!kEmitCompilerReadBarrier);
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
@@ -1991,10 +1997,7 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorARM64::VisitIntermediateAddress(
-    HIntermediateAddress* instruction) {
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!kEmitCompilerReadBarrier);
+void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
   __ Add(OutputRegister(instruction),
          InputRegisterAt(instruction, 0),
          Operand(InputOperandAt(instruction, 1)));
@@ -2090,11 +2093,15 @@
   // Block pools between `Load` and `MaybeRecordImplicitNullCheck`.
   BlockPoolsScope block_pools(masm);
 
+  // The read barrier instrumentation of object ArrayGet instructions
+  // does not support the HIntermediateAddress instruction.
+  DCHECK(!((type == Primitive::kPrimNot) &&
+           instruction->GetArray()->IsIntermediateAddress() &&
+           kEmitCompilerReadBarrier));
+
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // Object ArrayGet with Baker's read barrier case.
     Register temp = temps.AcquireW();
-    // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-    DCHECK(!instruction->GetArray()->IsIntermediateAddress());
     // Note that a potential implicit null check is handled in the
     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
     codegen_->GenerateArrayLoadWithBakerReadBarrier(
@@ -2108,9 +2115,6 @@
     } else {
       Register temp = temps.AcquireSameSizeAs(obj);
       if (instruction->GetArray()->IsIntermediateAddress()) {
-        // The read barrier instrumentation does not support the
-        // HIntermediateAddress instruction yet.
-        DCHECK(!kEmitCompilerReadBarrier);
         // We do not need to compute the intermediate address from the array: the
         // input instruction has done it already. See the comment in
         // `TryExtractArrayAccessAddress()`.
@@ -2200,9 +2204,6 @@
       UseScratchRegisterScope temps(masm);
       Register temp = temps.AcquireSameSizeAs(array);
       if (instruction->GetArray()->IsIntermediateAddress()) {
-        // The read barrier instrumentation does not support the
-        // HIntermediateAddress instruction yet.
-        DCHECK(!kEmitCompilerReadBarrier);
         // We do not need to compute the intermediate address from the array: the
         // input instruction has done it already. See the comment in
         // `TryExtractArrayAccessAddress()`.
@@ -2222,7 +2223,6 @@
     codegen_->Store(value_type, value, destination);
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   } else {
-    DCHECK(needs_write_barrier);
     DCHECK(!instruction->GetArray()->IsIntermediateAddress());
     vixl::aarch64::Label done;
     SlowPathCodeARM64* slow_path = nullptr;
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 240936c..1b5fa85 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -243,7 +243,7 @@
   }
 
   Arm64Assembler* GetAssembler() const { return assembler_; }
-  vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; }
+  vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
 
  private:
   void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
@@ -364,7 +364,7 @@
  private:
   Arm64Assembler* GetAssembler() const;
   vixl::aarch64::MacroAssembler* GetVIXLAssembler() const {
-    return GetAssembler()->vixl_masm_;
+    return GetAssembler()->GetVIXLAssembler();
   }
 
   CodeGeneratorARM64* const codegen_;
@@ -413,7 +413,7 @@
   HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
   Arm64Assembler* GetAssembler() OVERRIDE { return &assembler_; }
   const Arm64Assembler& GetAssembler() const OVERRIDE { return assembler_; }
-  vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; }
+  vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
 
   // Emit a write barrier.
   void MarkGCCard(vixl::aarch64::Register object,
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 8dd82ef..59e103a 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -351,14 +351,12 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, instruction_->GetLocations());
     mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
                                 instruction_,
                                 instruction_->GetDexPc(),
                                 this,
                                 IsDirectEntrypoint(kQuickTestSuspend));
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
-    RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ B(GetReturnLabel());
     } else {
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 3472830..fe1fddc 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -300,13 +300,11 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, instruction_->GetLocations());
     mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
-    RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ Bc(GetReturnLabel());
     } else {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index a2fa245..ade2117 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -192,13 +192,11 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, instruction_->GetLocations());
     x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
-    RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ jmp(GetReturnLabel());
     } else {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 5d5fa85..eadb431 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -149,13 +149,11 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, instruction_->GetLocations());
     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
-    RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ jmp(GetReturnLabel());
     } else {
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index 0ce0ec1..58e700d 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -31,13 +31,11 @@
  public:
   HDeadCodeElimination(HGraph* graph,
                        OptimizingCompilerStats* stats = nullptr,
-                       const char* name = kInitialDeadCodeEliminationPassName)
+                       const char* name = kDeadCodeEliminationPassName)
       : HOptimization(graph, name, stats) {}
 
   void Run() OVERRIDE;
-
-  static constexpr const char* kInitialDeadCodeEliminationPassName = "dead_code_elimination";
-  static constexpr const char* kFinalDeadCodeEliminationPassName = "dead_code_elimination_final";
+  static constexpr const char* kDeadCodeEliminationPassName = "dead_code_elimination";
 
  private:
   void MaybeRecordDeadBlock(HBasicBlock* block);
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.h b/compiler/optimizing/dex_cache_array_fixups_arm.h
index 015f910..9142e29 100644
--- a/compiler/optimizing/dex_cache_array_fixups_arm.h
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.h
@@ -26,7 +26,9 @@
 class DexCacheArrayFixups : public HOptimization {
  public:
   DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats)
-      : HOptimization(graph, "dex_cache_array_fixups_arm", stats) {}
+      : HOptimization(graph, kDexCacheArrayFixupsArmPassName, stats) {}
+
+  static constexpr const char* kDexCacheArrayFixupsArmPassName = "dex_cache_array_fixups_arm";
 
   void Run() OVERRIDE;
 };
diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.h b/compiler/optimizing/dex_cache_array_fixups_mips.h
index 21056e1..861a199 100644
--- a/compiler/optimizing/dex_cache_array_fixups_mips.h
+++ b/compiler/optimizing/dex_cache_array_fixups_mips.h
@@ -29,9 +29,11 @@
 class DexCacheArrayFixups : public HOptimization {
  public:
   DexCacheArrayFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
-      : HOptimization(graph, "dex_cache_array_fixups_mips", stats),
+      : HOptimization(graph, kDexCacheArrayFixupsMipsPassName, stats),
         codegen_(codegen) {}
 
+  static constexpr const char* kDexCacheArrayFixupsMipsPassName = "dex_cache_array_fixups_mips";
+
   void Run() OVERRIDE;
 
  private:
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 0b4c569..89d80cc 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -298,6 +298,12 @@
         stream << constant->AsIntConstant()->GetValue();
       } else if (constant->IsLongConstant()) {
         stream << constant->AsLongConstant()->GetValue();
+      } else if (constant->IsFloatConstant()) {
+        stream << constant->AsFloatConstant()->GetValue();
+      } else if (constant->IsDoubleConstant()) {
+        stream << constant->AsDoubleConstant()->GetValue();
+      } else if (constant->IsNullConstant()) {
+        stream << "null";
       }
     } else if (location.IsInvalid()) {
       stream << "invalid";
diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h
index 7c74816..cd4c830 100644
--- a/compiler/optimizing/induction_var_analysis.h
+++ b/compiler/optimizing/induction_var_analysis.h
@@ -39,9 +39,9 @@
 
   void Run() OVERRIDE;
 
- private:
   static constexpr const char* kInductionPassName = "induction_var_analysis";
 
+ private:
   struct NodeInfo {
     explicit NodeInfo(uint32_t d) : depth(d), done(false) {}
     uint32_t depth;
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
index 3d297da..782110c 100644
--- a/compiler/optimizing/instruction_simplifier_arm.h
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -48,7 +48,9 @@
 class InstructionSimplifierArm : public HOptimization {
  public:
   InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats)
-    : HOptimization(graph, "instruction_simplifier_arm", stats) {}
+    : HOptimization(graph, kInstructionSimplifierArmPassName, stats) {}
+
+  static constexpr const char* kInstructionSimplifierArmPassName = "instruction_simplifier_arm";
 
   void Run() OVERRIDE {
     InstructionSimplifierArmVisitor visitor(graph_, stats_);
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 28648b3..f71684e 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -82,8 +82,9 @@
 class InstructionSimplifierArm64 : public HOptimization {
  public:
   InstructionSimplifierArm64(HGraph* graph, OptimizingCompilerStats* stats)
-    : HOptimization(graph, "instruction_simplifier_arm64", stats) {}
-
+    : HOptimization(graph, kInstructionSimplifierArm64PassName, stats) {}
+  static constexpr const char* kInstructionSimplifierArm64PassName
+      = "instruction_simplifier_arm64";
   void Run() OVERRIDE {
     InstructionSimplifierArm64Visitor visitor(graph_, stats_);
     visitor.VisitReversePostOrder();
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index 8f7778f..6632cd9 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -231,15 +231,6 @@
                                   HInstruction* array,
                                   HInstruction* index,
                                   size_t data_offset) {
-  if (kEmitCompilerReadBarrier) {
-    // The read barrier instrumentation does not support the
-    // HIntermediateAddress instruction yet.
-    //
-    // TODO: Handle this case properly in the ARM64 and ARM code generator and
-    // re-enable this optimization; otherwise, remove this TODO.
-    // b/26601270
-    return false;
-  }
   if (index->IsConstant() ||
       (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
     // When the index is a constant all the addressing can be fitted in the
@@ -251,6 +242,13 @@
     // The access may require a runtime call or the original array pointer.
     return false;
   }
+  if (kEmitCompilerReadBarrier &&
+      access->IsArrayGet() &&
+      access->AsArrayGet()->GetType() == Primitive::kPrimNot) {
+    // For object arrays, the read barrier instrumentation requires
+    // the original array pointer.
+    return false;
+  }
 
   // Proceed to extract the base address computation.
   HGraph* graph = access->GetBlock()->GetGraph();
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index be061f5..27d9d48 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1212,7 +1212,7 @@
 
 void IntrinsicLocationsBuilderARM::VisitStringIndexOf(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   // best to align the inputs accordingly.
@@ -1232,7 +1232,7 @@
 
 void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   // best to align the inputs accordingly.
@@ -1250,7 +1250,7 @@
 
 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1311,7 +1311,7 @@
 
 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 06d1148..e7c40e6 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -26,7 +26,6 @@
 #include "mirror/string.h"
 #include "thread.h"
 #include "utils/arm64/assembler_arm64.h"
-#include "utils/arm64/constants_arm64.h"
 
 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
 
@@ -62,14 +61,14 @@
 }  // namespace
 
 MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
-  return codegen_->GetAssembler()->vixl_masm_;
+  return codegen_->GetVIXLAssembler();
 }
 
 ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() {
   return codegen_->GetGraph()->GetArena();
 }
 
-#define __ codegen->GetAssembler()->vixl_masm_->
+#define __ codegen->GetVIXLAssembler()->
 
 static void MoveFromReturnRegister(Location trg,
                                    Primitive::Type type,
@@ -782,7 +781,7 @@
   DCHECK((type == Primitive::kPrimInt) ||
          (type == Primitive::kPrimLong) ||
          (type == Primitive::kPrimNot));
-  MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
+  MacroAssembler* masm = codegen->GetVIXLAssembler();
   Location base_loc = locations->InAt(1);
   Register base = WRegisterFrom(base_loc);      // Object pointer.
   Location offset_loc = locations->InAt(2);
@@ -916,7 +915,7 @@
                          bool is_volatile,
                          bool is_ordered,
                          CodeGeneratorARM64* codegen) {
-  MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
+  MacroAssembler* masm = codegen->GetVIXLAssembler();
 
   Register base = WRegisterFrom(locations->InAt(1));    // Object pointer.
   Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
@@ -1035,7 +1034,7 @@
 }
 
 static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) {
-  MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
+  MacroAssembler* masm = codegen->GetVIXLAssembler();
 
   Register out = WRegisterFrom(locations->Out());                  // Boolean result.
 
@@ -1409,7 +1408,7 @@
 
 void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   // best to align the inputs accordingly.
@@ -1429,7 +1428,7 @@
 
 void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   // best to align the inputs accordingly.
@@ -1447,7 +1446,7 @@
 
 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
@@ -1506,7 +1505,7 @@
 
 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 9449f79..55e1ab2 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2070,7 +2070,7 @@
 // int java.lang.String.indexOf(int ch)
 void IntrinsicLocationsBuilderMIPS::VisitStringIndexOf(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime
   // calling convention. So it's best to align the inputs accordingly.
@@ -2095,7 +2095,7 @@
 // int java.lang.String.indexOf(int ch, int fromIndex)
 void IntrinsicLocationsBuilderMIPS::VisitStringIndexOfAfter(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime
   // calling convention. So it's best to align the inputs accordingly.
@@ -2121,7 +2121,7 @@
 // java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount)
 void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2186,7 +2186,7 @@
 // java.lang.StringFactory.newStringFromString(String toCopy)
 void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 8d4d3e5..1e18540 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1707,7 +1707,7 @@
 // int java.lang.String.indexOf(int ch)
 void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOf(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime
   // calling convention. So it's best to align the inputs accordingly.
@@ -1728,7 +1728,7 @@
 // int java.lang.String.indexOf(int ch, int fromIndex)
 void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime
   // calling convention. So it's best to align the inputs accordingly.
@@ -1748,7 +1748,7 @@
 // java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount)
 void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1816,7 +1816,7 @@
 // java.lang.StringFactory.newStringFromString(String toCopy)
 void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 65f4def..dc409c9 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -752,8 +752,6 @@
   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
 }
 
-// Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble,
-// as it needs 64 bit instructions.
 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
   // See intrinsics.h.
   if (!kRoundIsPlusPointFive) {
@@ -762,10 +760,17 @@
 
   // Do we have instruction support?
   if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
+    HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
+    DCHECK(static_or_direct != nullptr);
     LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                               LocationSummary::kNoCall,
                                                               kIntrinsified);
     locations->SetInAt(0, Location::RequiresFpuRegister());
+    if (static_or_direct->HasSpecialInput() &&
+        invoke->InputAt(
+            static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
+      locations->SetInAt(1, Location::RequiresRegister());
+    }
     locations->SetOut(Location::RequiresRegister());
     locations->AddTemp(Location::RequiresFpuRegister());
     locations->AddTemp(Location::RequiresFpuRegister());
@@ -774,7 +779,7 @@
 
   // We have to fall back to a call to the intrinsic.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kCallOnMainOnly);
+                                                            LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
   locations->SetOut(Location::RegisterLocation(EAX));
@@ -784,47 +789,42 @@
 
 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
   LocationSummary* locations = invoke->GetLocations();
-  if (locations->WillCall()) {
+  if (locations->WillCall()) {  // TODO: can we reach this?
     InvokeOutOfLineIntrinsic(codegen_, invoke);
     return;
   }
 
-  // Implement RoundFloat as t1 = floor(input + 0.5f);  convert to int.
   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+  Register constant_area = locations->InAt(1).AsRegister<Register>();
+  XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+  XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
   Register out = locations->Out().AsRegister<Register>();
-  XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-  XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
-  NearLabel done, nan;
+  NearLabel skip_incr, done;
   X86Assembler* assembler = GetAssembler();
 
-  // Generate 0.5 into inPlusPointFive.
-  __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
-  __ movd(inPlusPointFive, out);
+  // Since no direct x86 rounding instruction matches the required semantics,
+  // this intrinsic is implemented as follows:
+  //  result = floor(in);
+  //  if (in - result >= 0.5f)
+  //    result = result + 1.0f;
+  __ movss(t2, in);
+  __ roundss(t1, in, Immediate(1));
+  __ subss(t2, t1);
+  __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), constant_area));
+  __ j(kBelow, &skip_incr);
+  __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), constant_area));
+  __ Bind(&skip_incr);
 
-  // Add in the input.
-  __ addss(inPlusPointFive, in);
-
-  // And truncate to an integer.
-  __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
-
+  // Final conversion to an integer. Unfortunately this also does not have a
+  // direct x86 instruction, since NaN should map to 0 and large positive
+  // values need to be clipped to the extreme value.
   __ movl(out, Immediate(kPrimIntMax));
-  // maxInt = int-to-float(out)
-  __ cvtsi2ss(maxInt, out);
-
-  // if inPlusPointFive >= maxInt goto done
-  __ comiss(inPlusPointFive, maxInt);
-  __ j(kAboveEqual, &done);
-
-  // if input == NaN goto nan
-  __ j(kUnordered, &nan);
-
-  // output = float-to-int-truncate(input)
-  __ cvttss2si(out, inPlusPointFive);
-  __ jmp(&done);
-  __ Bind(&nan);
-
-  //  output = 0
-  __ xorl(out, out);
+  __ cvtsi2ss(t2, out);
+  __ comiss(t1, t2);
+  __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
+  __ movl(out, Immediate(0));  // does not change flags
+  __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
+  __ cvttss2si(out, t1);
   __ Bind(&done);
 }
 
@@ -1216,7 +1216,7 @@
 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
   // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1490,7 +1490,7 @@
 
 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1543,7 +1543,7 @@
 
 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 7e0d729..7dfbfb0 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -583,6 +583,7 @@
     locations->SetInAt(0, Location::RequiresFpuRegister());
     locations->SetOut(Location::RequiresRegister());
     locations->AddTemp(Location::RequiresFpuRegister());
+    locations->AddTemp(Location::RequiresFpuRegister());
     return;
   }
 
@@ -598,9 +599,10 @@
 
 void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
   // See intrinsics.h.
-  if (kRoundIsPlusPointFive) {
-    CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
+  if (!kRoundIsPlusPointFive) {
+    return;
   }
+  CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
@@ -610,47 +612,45 @@
     return;
   }
 
-  // Implement RoundFloat as t1 = floor(input + 0.5f);  convert to int.
   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-  XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-  NearLabel done, nan;
+  XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+  XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+  NearLabel skip_incr, done;
   X86_64Assembler* assembler = GetAssembler();
 
-  // Load 0.5 into inPlusPointFive.
-  __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f));
+  // Since no direct x86 rounding instruction matches the required semantics,
+  // this intrinsic is implemented as follows:
+  //  result = floor(in);
+  //  if (in - result >= 0.5f)
+  //    result = result + 1.0f;
+  __ movss(t2, in);
+  __ roundss(t1, in, Immediate(1));
+  __ subss(t2, t1);
+  __ comiss(t2, codegen_->LiteralFloatAddress(0.5f));
+  __ j(kBelow, &skip_incr);
+  __ addss(t1, codegen_->LiteralFloatAddress(1.0f));
+  __ Bind(&skip_incr);
 
-  // Add in the input.
-  __ addss(inPlusPointFive, in);
-
-  // And truncate to an integer.
-  __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
-
-  // Load maxInt into out.
-  codegen_->Load64BitValue(out, kPrimIntMax);
-
-  // if inPlusPointFive >= maxInt goto done
-  __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
-  __ j(kAboveEqual, &done);
-
-  // if input == NaN goto nan
-  __ j(kUnordered, &nan);
-
-  // output = float-to-int-truncate(input)
-  __ cvttss2si(out, inPlusPointFive);
-  __ jmp(&done);
-  __ Bind(&nan);
-
-  //  output = 0
-  __ xorl(out, out);
+  // Final conversion to an integer. Unfortunately this also does not have a
+  // direct x86 instruction, since NaN should map to 0 and large positive
+  // values need to be clipped to the extreme value.
+  codegen_->Load32BitValue(out, kPrimIntMax);
+  __ cvtsi2ss(t2, out);
+  __ comiss(t1, t2);
+  __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
+  __ movl(out, Immediate(0));  // does not change flags
+  __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
+  __ cvttss2si(out, t1);
   __ Bind(&done);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
   // See intrinsics.h.
-  if (kRoundIsPlusPointFive) {
-    CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
+  if (!kRoundIsPlusPointFive) {
+    return;
   }
+  CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
@@ -660,39 +660,36 @@
     return;
   }
 
-  // Implement RoundDouble as t1 = floor(input + 0.5);  convert to long.
   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-  XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-  NearLabel done, nan;
+  XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+  XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+  NearLabel skip_incr, done;
   X86_64Assembler* assembler = GetAssembler();
 
-  // Load 0.5 into inPlusPointFive.
-  __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5));
+  // Since no direct x86 rounding instruction matches the required semantics,
+  // this intrinsic is implemented as follows:
+  //  result = floor(in);
+  //  if (in - result >= 0.5)
+  //    result = result + 1.0f;
+  __ movsd(t2, in);
+  __ roundsd(t1, in, Immediate(1));
+  __ subsd(t2, t1);
+  __ comisd(t2, codegen_->LiteralDoubleAddress(0.5));
+  __ j(kBelow, &skip_incr);
+  __ addsd(t1, codegen_->LiteralDoubleAddress(1.0f));
+  __ Bind(&skip_incr);
 
-  // Add in the input.
-  __ addsd(inPlusPointFive, in);
-
-  // And truncate to an integer.
-  __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
-
-  // Load maxLong into out.
+  // Final conversion to an integer. Unfortunately this also does not have a
+  // direct x86 instruction, since NaN should map to 0 and large positive
+  // values need to be clipped to the extreme value.
   codegen_->Load64BitValue(out, kPrimLongMax);
-
-  // if inPlusPointFive >= maxLong goto done
-  __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
-  __ j(kAboveEqual, &done);
-
-  // if input == NaN goto nan
-  __ j(kUnordered, &nan);
-
-  // output = double-to-long-truncate(input)
-  __ cvttsd2si(out, inPlusPointFive, /* is64bit */ true);
-  __ jmp(&done);
-  __ Bind(&nan);
-
-  //  output = 0
-  __ xorl(out, out);
+  __ cvtsi2sd(t2, out, /* is64bit */ true);
+  __ comisd(t1, t2);
+  __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
+  __ movl(out, Immediate(0));  // does not change flags, implicit zero extension to 64-bit
+  __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
+  __ cvttsd2si(out, t1, /* is64bit */ true);
   __ Bind(&done);
 }
 
@@ -1303,7 +1300,7 @@
 
 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1577,7 +1574,7 @@
 
 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1634,7 +1631,7 @@
 
 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 7a78bfd..5fdfb9b 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -376,6 +376,10 @@
     return PolicyField::Decode(GetPayload());
   }
 
+  bool RequiresRegisterKind() const {
+    return GetPolicy() == kRequiresRegister || GetPolicy() == kRequiresFpuRegister;
+  }
+
   uintptr_t GetEncoding() const {
     return GetPayload();
   }
@@ -480,6 +484,7 @@
  public:
   enum CallKind {
     kNoCall,
+    kCallOnMainAndSlowPath,
     kCallOnSlowPath,
     kCallOnMainOnly
   };
@@ -540,10 +545,29 @@
 
   Location Out() const { return output_; }
 
-  bool CanCall() const { return call_kind_ != kNoCall; }
-  bool WillCall() const { return call_kind_ == kCallOnMainOnly; }
-  bool OnlyCallsOnSlowPath() const { return call_kind_ == kCallOnSlowPath; }
-  bool NeedsSafepoint() const { return CanCall(); }
+  bool CanCall() const {
+    return call_kind_ != kNoCall;
+  }
+
+  bool WillCall() const {
+    return call_kind_ == kCallOnMainOnly || call_kind_ == kCallOnMainAndSlowPath;
+  }
+
+  bool CallsOnSlowPath() const {
+    return call_kind_ == kCallOnSlowPath || call_kind_ == kCallOnMainAndSlowPath;
+  }
+
+  bool OnlyCallsOnSlowPath() const {
+    return call_kind_ == kCallOnSlowPath;
+  }
+
+  bool CallsOnMainAndSlowPath() const {
+    return call_kind_ == kCallOnMainAndSlowPath;
+  }
+
+  bool NeedsSafepoint() const {
+    return CanCall();
+  }
 
   void SetStackBit(uint32_t index) {
     stack_mask_->SetBit(index);
@@ -629,8 +653,7 @@
   // Whether these are locations for an intrinsified call.
   bool intrinsified_;
 
-  ART_FRIEND_TEST(RegisterAllocatorTest, ExpectedInRegisterHint);
-  ART_FRIEND_TEST(RegisterAllocatorTest, SameAsFirstInputHint);
+  friend class RegisterAllocatorTest;
   DISALLOW_COPY_AND_ASSIGN(LocationSummary);
 };
 
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index 2f59d4c..0819fb0 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -37,7 +37,10 @@
 
   virtual ~HOptimization() {}
 
-  // Return the name of the pass.
+  // Return the name of the pass. Pass names for a single HOptimization should be of form
+  // <optimization_name> or <optimization_name>$<pass_name> for common <optimization_name> prefix.
+  // Example: 'instruction_simplifier', 'instruction_simplifier$after_bce',
+  // 'instruction_simplifier$before_codegen'.
   const char* GetPassName() const { return pass_name_; }
 
   // Perform the analysis itself.
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index d5b0d77..f7c82d1 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -95,6 +95,8 @@
 
 static constexpr size_t kArenaAllocatorMemoryReportThreshold = 8 * MB;
 
+static constexpr const char* kPassNameSeparator = "$";
+
 /**
  * Used by the code generator, to allocate the code in a vector.
  */
@@ -266,7 +268,7 @@
 class OptimizingCompiler FINAL : public Compiler {
  public:
   explicit OptimizingCompiler(CompilerDriver* driver);
-  ~OptimizingCompiler();
+  ~OptimizingCompiler() OVERRIDE;
 
   bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const OVERRIDE;
 
@@ -305,17 +307,17 @@
       OVERRIDE
       SHARED_REQUIRES(Locks::mutator_lock_);
 
- protected:
-  virtual void RunOptimizations(HGraph* graph,
-                                CodeGenerator* codegen,
-                                CompilerDriver* driver,
-                                const DexCompilationUnit& dex_compilation_unit,
-                                PassObserver* pass_observer,
-                                StackHandleScopeCollection* handles) const;
+ private:
+  void RunOptimizations(HGraph* graph,
+                        CodeGenerator* codegen,
+                        CompilerDriver* driver,
+                        const DexCompilationUnit& dex_compilation_unit,
+                        PassObserver* pass_observer,
+                        StackHandleScopeCollection* handles) const;
 
-  virtual void RunOptimizations(HOptimization* optimizations[],
-                                size_t length,
-                                PassObserver* pass_observer) const;
+  void RunOptimizations(HOptimization* optimizations[],
+                        size_t length,
+                        PassObserver* pass_observer) const;
 
  private:
   // Create a 'CompiledMethod' for an optimized graph.
@@ -420,6 +422,117 @@
       || instruction_set == kX86_64;
 }
 
+static HOptimization* BuildOptimization(
+    const std::string& opt_name,
+    ArenaAllocator* arena,
+    HGraph* graph,
+    OptimizingCompilerStats* stats,
+    CodeGenerator* codegen,
+    CompilerDriver* driver,
+    const DexCompilationUnit& dex_compilation_unit,
+    StackHandleScopeCollection* handles,
+    SideEffectsAnalysis* most_recent_side_effects,
+    HInductionVarAnalysis* most_recent_induction) {
+  if (opt_name == arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName) {
+    return new (arena) arm::InstructionSimplifierArm(graph, stats);
+  } else if (opt_name == arm64::InstructionSimplifierArm64::kInstructionSimplifierArm64PassName) {
+    return new (arena) arm64::InstructionSimplifierArm64(graph, stats);
+  } else if (opt_name == BoundsCheckElimination::kBoundsCheckEliminationPassName) {
+    CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr);
+    return new (arena) BoundsCheckElimination(graph,
+                                              *most_recent_side_effects,
+                                              most_recent_induction);
+  } else if (opt_name == GVNOptimization::kGlobalValueNumberingPassName) {
+    CHECK(most_recent_side_effects != nullptr);
+    return new (arena) GVNOptimization(graph, *most_recent_side_effects);
+  } else if (opt_name == HConstantFolding::kConstantFoldingPassName) {
+    return new (arena) HConstantFolding(graph);
+  } else if (opt_name == HDeadCodeElimination::kDeadCodeEliminationPassName) {
+    return new (arena) HDeadCodeElimination(graph, stats);
+  } else if (opt_name == HInliner::kInlinerPassName) {
+    size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_;
+    return new (arena) HInliner(graph,                   // outer_graph
+                                graph,                   // outermost_graph
+                                codegen,
+                                dex_compilation_unit,    // outer_compilation_unit
+                                dex_compilation_unit,    // outermost_compilation_unit
+                                driver,
+                                handles,
+                                stats,
+                                number_of_dex_registers,
+                                /* depth */ 0);
+  } else if (opt_name == HSharpening::kSharpeningPassName) {
+    return new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
+  } else if (opt_name == HSelectGenerator::kSelectGeneratorPassName) {
+    return new (arena) HSelectGenerator(graph, stats);
+  } else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
+    return new (arena) HInductionVarAnalysis(graph);
+  } else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) {
+    return new (arena) InstructionSimplifier(graph, stats);
+  } else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) {
+    return new (arena) IntrinsicsRecognizer(graph, driver, stats);
+  } else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) {
+    CHECK(most_recent_side_effects != nullptr);
+    return new (arena) LICM(graph, *most_recent_side_effects, stats);
+  } else if (opt_name == LoadStoreElimination::kLoadStoreEliminationPassName) {
+    CHECK(most_recent_side_effects != nullptr);
+    return new (arena) LoadStoreElimination(graph, *most_recent_side_effects);
+  } else if (opt_name == mips::DexCacheArrayFixups::kDexCacheArrayFixupsMipsPassName) {
+    return new (arena) mips::DexCacheArrayFixups(graph, codegen, stats);
+  } else if (opt_name == mips::PcRelativeFixups::kPcRelativeFixupsMipsPassName) {
+    return new (arena) mips::PcRelativeFixups(graph, codegen, stats);
+  } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
+    return new (arena) SideEffectsAnalysis(graph);
+  } else if (opt_name == x86::PcRelativeFixups::kPcRelativeFixupsX86PassName) {
+    return new (arena) x86::PcRelativeFixups(graph, codegen, stats);
+  } else if (opt_name == x86::X86MemoryOperandGeneration::kX86MemoryOperandGenerationPassName) {
+    return new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats);
+  }
+  return nullptr;
+}
+
+static ArenaVector<HOptimization*> BuildOptimizations(
+    const std::vector<std::string>& pass_names,
+    ArenaAllocator* arena,
+    HGraph* graph,
+    OptimizingCompilerStats* stats,
+    CodeGenerator* codegen,
+    CompilerDriver* driver,
+    const DexCompilationUnit& dex_compilation_unit,
+    StackHandleScopeCollection* handles) {
+  // Few HOptimizations constructors require SideEffectsAnalysis or HInductionVarAnalysis
+  // instances. This method assumes that each of them expects the nearest instance preceeding it
+  // in the pass name list.
+  SideEffectsAnalysis* most_recent_side_effects = nullptr;
+  HInductionVarAnalysis* most_recent_induction = nullptr;
+  ArenaVector<HOptimization*> ret(arena->Adapter());
+  for (std::string pass_name : pass_names) {
+    size_t pos = pass_name.find(kPassNameSeparator);    // Strip suffix to get base pass name.
+    std::string opt_name = pos == std::string::npos ? pass_name : pass_name.substr(0, pos);
+
+    HOptimization* opt = BuildOptimization(
+        opt_name,
+        arena,
+        graph,
+        stats,
+        codegen,
+        driver,
+        dex_compilation_unit,
+        handles,
+        most_recent_side_effects,
+        most_recent_induction);
+    CHECK(opt != nullptr) << "Couldn't build optimization: \"" << pass_name << "\"";
+    ret.push_back(opt);
+
+    if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
+      most_recent_side_effects = down_cast<SideEffectsAnalysis*>(opt);
+    } else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
+      most_recent_induction = down_cast<HInductionVarAnalysis*>(opt);
+    }
+  }
+  return ret;
+}
+
 void OptimizingCompiler::RunOptimizations(HOptimization* optimizations[],
                                           size_t length,
                                           PassObserver* pass_observer) const {
@@ -444,11 +557,11 @@
   }
   size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_;
   HInliner* inliner = new (graph->GetArena()) HInliner(
-      graph,
-      graph,
+      graph,                   // outer_graph
+      graph,                   // outermost_graph
       codegen,
-      dex_compilation_unit,
-      dex_compilation_unit,
+      dex_compilation_unit,    // outer_compilation_unit
+      dex_compilation_unit,    // outermost_compilation_unit
       driver,
       handles,
       stats,
@@ -473,7 +586,7 @@
       arm::InstructionSimplifierArm* simplifier =
           new (arena) arm::InstructionSimplifierArm(graph, stats);
       SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
-      GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN_after_arch");
+      GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch");
       HOptimization* arm_optimizations[] = {
         simplifier,
         side_effects,
@@ -489,7 +602,7 @@
       arm64::InstructionSimplifierArm64* simplifier =
           new (arena) arm64::InstructionSimplifierArm64(graph, stats);
       SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
-      GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN_after_arch");
+      GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch");
       HOptimization* arm64_optimizations[] = {
         simplifier,
         side_effects,
@@ -518,7 +631,7 @@
       x86::PcRelativeFixups* pc_relative_fixups =
           new (arena) x86::PcRelativeFixups(graph, codegen, stats);
       x86::X86MemoryOperandGeneration* memory_gen =
-          new(arena) x86::X86MemoryOperandGeneration(graph, stats, codegen);
+          new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats);
       HOptimization* x86_optimizations[] = {
           pc_relative_fixups,
           memory_gen
@@ -530,7 +643,7 @@
 #ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64: {
       x86::X86MemoryOperandGeneration* memory_gen =
-          new(arena) x86::X86MemoryOperandGeneration(graph, stats, codegen);
+          new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats);
       HOptimization* x86_64_optimizations[] = {
           memory_gen
       };
@@ -546,7 +659,8 @@
 NO_INLINE  // Avoid increasing caller's frame size by large stack-allocated objects.
 static void AllocateRegisters(HGraph* graph,
                               CodeGenerator* codegen,
-                              PassObserver* pass_observer) {
+                              PassObserver* pass_observer,
+                              RegisterAllocator::Strategy strategy) {
   {
     PassScope scope(PrepareForRegisterAllocation::kPrepareForRegisterAllocationPassName,
                     pass_observer);
@@ -559,7 +673,7 @@
   }
   {
     PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer);
-    RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters();
+    RegisterAllocator::Create(graph->GetArena(), codegen, liveness, strategy)->AllocateRegisters();
   }
 }
 
@@ -571,15 +685,30 @@
                                           StackHandleScopeCollection* handles) const {
   OptimizingCompilerStats* stats = compilation_stats_.get();
   ArenaAllocator* arena = graph->GetArena();
+  if (driver->GetCompilerOptions().GetPassesToRun() != nullptr) {
+    ArenaVector<HOptimization*> optimizations = BuildOptimizations(
+        *driver->GetCompilerOptions().GetPassesToRun(),
+        arena,
+        graph,
+        stats,
+        codegen,
+        driver,
+        dex_compilation_unit,
+        handles);
+    RunOptimizations(&optimizations[0], optimizations.size(), pass_observer);
+    return;
+  }
+
   HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination(
-      graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName);
+      graph, stats, "dead_code_elimination$initial");
   HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination(
-      graph, stats, HDeadCodeElimination::kFinalDeadCodeEliminationPassName);
+      graph, stats, "dead_code_elimination$final");
   HConstantFolding* fold1 = new (arena) HConstantFolding(graph);
   InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
   HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats);
-  HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining");
-  HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding_after_bce");
+  HConstantFolding* fold2 = new (arena) HConstantFolding(
+      graph, "constant_folding$after_inlining");
+  HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding$after_bce");
   SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
   GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
   LICM* licm = new (arena) LICM(graph, *side_effects, stats);
@@ -588,9 +717,9 @@
   BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
   HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
   InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
-      graph, stats, "instruction_simplifier_after_bce");
+      graph, stats, "instruction_simplifier$after_bce");
   InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
-      graph, stats, "instruction_simplifier_before_codegen");
+      graph, stats, "instruction_simplifier$before_codegen");
   IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver, stats);
 
   HOptimization* optimizations1[] = {
@@ -626,7 +755,6 @@
   RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
 
   RunArchOptimizations(driver->GetInstructionSet(), graph, codegen, pass_observer);
-  AllocateRegisters(graph, codegen, pass_observer);
 }
 
 static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) {
@@ -841,6 +969,10 @@
                      &pass_observer,
                      &handles);
 
+    RegisterAllocator::Strategy regalloc_strategy =
+      compiler_options.GetRegisterAllocationStrategy();
+    AllocateRegisters(graph, codegen.get(), &pass_observer, regalloc_strategy);
+
     codegen->Compile(code_allocator);
     pass_observer.DumpDisassembly();
   }
diff --git a/compiler/optimizing/pc_relative_fixups_mips.h b/compiler/optimizing/pc_relative_fixups_mips.h
index 1e8b071..5a7397b 100644
--- a/compiler/optimizing/pc_relative_fixups_mips.h
+++ b/compiler/optimizing/pc_relative_fixups_mips.h
@@ -32,6 +32,8 @@
       : HOptimization(graph, "pc_relative_fixups_mips", stats),
         codegen_(codegen) {}
 
+  static constexpr const char* kPcRelativeFixupsMipsPassName = "pc_relative_fixups_mips";
+
   void Run() OVERRIDE;
 
  private:
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index 921f3df..ad0921d 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -227,6 +227,7 @@
       case Intrinsics::kMathMaxFloatFloat:
       case Intrinsics::kMathMinDoubleDouble:
       case Intrinsics::kMathMinFloatFloat:
+      case Intrinsics::kMathRoundFloat:
         if (!base_added) {
           DCHECK(invoke_static_or_direct != nullptr);
           DCHECK(!invoke_static_or_direct->HasCurrentMethodInput());
diff --git a/compiler/optimizing/pc_relative_fixups_x86.h b/compiler/optimizing/pc_relative_fixups_x86.h
index 03de2fc..72fa71e 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.h
+++ b/compiler/optimizing/pc_relative_fixups_x86.h
@@ -29,9 +29,11 @@
 class PcRelativeFixups : public HOptimization {
  public:
   PcRelativeFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
-      : HOptimization(graph, "pc_relative_fixups_x86", stats),
+      : HOptimization(graph, kPcRelativeFixupsX86PassName, stats),
         codegen_(codegen) {}
 
+  static constexpr const char* kPcRelativeFixupsX86PassName  = "pc_relative_fixups_x86";
+
   void Run() OVERRIDE;
 
  private:
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 2367ce1..5b768d5 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -21,6 +21,7 @@
 
 #include "base/bit_vector-inl.h"
 #include "code_generator.h"
+#include "register_allocator_graph_color.h"
 #include "register_allocator_linear_scan.h"
 #include "ssa_liveness_analysis.h"
 
@@ -41,6 +42,8 @@
   switch (strategy) {
     case kRegisterAllocatorLinearScan:
       return new (allocator) RegisterAllocatorLinearScan(allocator, codegen, analysis);
+    case kRegisterAllocatorGraphColor:
+      return new (allocator) RegisterAllocatorGraphColor(allocator, codegen, analysis);
     default:
       LOG(FATAL) << "Invalid register allocation strategy: " << strategy;
       UNREACHABLE();
@@ -163,6 +166,19 @@
               } else {
                 codegen.DumpFloatingPointRegister(message, current->GetRegister());
               }
+              for (LiveInterval* interval : intervals) {
+                if (interval->HasRegister()
+                    && interval->GetRegister() == current->GetRegister()
+                    && interval->CoversSlow(j)) {
+                  message << std::endl;
+                  if (interval->GetDefinedBy() != nullptr) {
+                    message << interval->GetDefinedBy()->GetKind() << " ";
+                  } else {
+                    message << "physical ";
+                  }
+                  interval->Dump(message);
+                }
+              }
               LOG(FATAL) << message.str();
             } else {
               return false;
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 729eede..7e1fff8 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -40,7 +40,8 @@
 class RegisterAllocator : public ArenaObject<kArenaAllocRegisterAllocator> {
  public:
   enum Strategy {
-    kRegisterAllocatorLinearScan
+    kRegisterAllocatorLinearScan,
+    kRegisterAllocatorGraphColor
   };
 
   static constexpr Strategy kRegisterAllocatorDefault = kRegisterAllocatorLinearScan;
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
new file mode 100644
index 0000000..79ca5a0
--- /dev/null
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -0,0 +1,1012 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "register_allocator_graph_color.h"
+
+#include "code_generator.h"
+#include "register_allocation_resolver.h"
+#include "ssa_liveness_analysis.h"
+#include "thread-inl.h"
+
+namespace art {
+
+// Highest number of registers that we support for any platform. This can be used for std::bitset,
+// for example, which needs to know its size at compile time.
+static constexpr size_t kMaxNumRegs = 32;
+
+// The maximum number of graph coloring attempts before triggering a DCHECK.
+// This is meant to catch changes to the graph coloring algorithm that undermine its forward
+// progress guarantees. Forward progress for the algorithm means splitting live intervals on
+// every graph coloring attempt so that eventually the interference graph will be sparse enough
+// to color. The main threat to forward progress is trying to split short intervals which cannot be
+// split further; this could cause infinite looping because the interference graph would never
+// change. This is avoided by prioritizing short intervals before long ones, so that long
+// intervals are split when coloring fails.
+static constexpr size_t kMaxGraphColoringAttemptsDebug = 100;
+
+// Interference nodes make up the interference graph, which is the primary data structure in
+// graph coloring register allocation. Each node represents a single live interval, and contains
+// a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory,
+// pre-colored nodes never contain outgoing edges (only incoming ones).
+//
+// As nodes are pruned from the interference graph, incoming edges of the pruned node are removed,
+// but outgoing edges remain in order to later color the node based on the colors of its neighbors.
+//
+// Note that a pair interval is represented by a single node in the interference graph, which
+// essentially requires two colors. One consequence of this is that the degree of a node is not
+// necessarily equal to the number of adjacent nodes--instead, the degree reflects the maximum
+// number of colors with which a node could interfere. We model this by giving edges different
+// weights (1 or 2) to control how much it increases the degree of adjacent nodes.
+// For example, the edge between two single nodes will have weight 1. On the other hand,
+// the edge between a single node and a pair node will have weight 2. This is because the pair
+// node could block up to two colors for the single node, and because the single node could
+// block an entire two-register aligned slot for the pair node.
+// The degree is defined this way because we use it to decide whether a node is guaranteed a color,
+// and thus whether it is safe to prune it from the interference graph early on.
+class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> {
+ public:
+  InterferenceNode(ArenaAllocator* allocator, LiveInterval* interval, size_t id)
+        : interval_(interval),
+          adjacent_nodes_(CmpPtr, allocator->Adapter(kArenaAllocRegisterAllocator)),
+          out_degree_(0),
+          id_(id) {}
+
+  // Used to maintain determinism when storing InterferenceNode pointers in sets.
+  static bool CmpPtr(const InterferenceNode* lhs, const InterferenceNode* rhs) {
+    return lhs->id_ < rhs->id_;
+  }
+
+  void AddInterference(InterferenceNode* other) {
+    if (adjacent_nodes_.insert(other).second) {
+      out_degree_ += EdgeWeightWith(other);
+    }
+  }
+
+  void RemoveInterference(InterferenceNode* other) {
+    if (adjacent_nodes_.erase(other) > 0) {
+      out_degree_ -= EdgeWeightWith(other);
+    }
+  }
+
+  bool ContainsInterference(InterferenceNode* other) const {
+    return adjacent_nodes_.count(other) > 0;
+  }
+
+  LiveInterval* GetInterval() const {
+    return interval_;
+  }
+
+  const ArenaSet<InterferenceNode*, decltype(&CmpPtr)>& GetAdjacentNodes() const {
+    return adjacent_nodes_;
+  }
+
+  size_t GetOutDegree() const {
+    return out_degree_;
+  }
+
+  size_t GetId() const {
+    return id_;
+  }
+
+ private:
+  // We give extra weight to edges adjacent to pair nodes. See the general comment on the
+  // interference graph above.
+  size_t EdgeWeightWith(InterferenceNode* other) const {
+    return (interval_->HasHighInterval() || other->interval_->HasHighInterval()) ? 2 : 1;
+  }
+
+  // The live interval that this node represents.
+  LiveInterval* const interval_;
+
+  // All nodes interfering with this one.
+  // TODO: There is potential to use a cheaper data structure here, especially since
+  //       adjacency sets will usually be small.
+  ArenaSet<InterferenceNode*, decltype(&CmpPtr)> adjacent_nodes_;
+
+  // The maximum number of colors with which this node could interfere. This could be more than
+  // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes.
+  // We use "out" degree because incoming edges come from nodes already pruned from the graph,
+  // and do not affect the coloring of this node.
+  size_t out_degree_;
+
+  // A unique identifier for this node, used to maintain determinism when storing
+  // interference nodes in sets.
+  const size_t id_;
+
+  // TODO: We could cache the result of interval_->RequiresRegister(), since it
+  //       will not change for the lifetime of this node. (Currently, RequiresRegister() requires
+  //       iterating through all uses of a live interval.)
+
+  DISALLOW_COPY_AND_ASSIGN(InterferenceNode);
+};
+
+static bool IsCoreInterval(LiveInterval* interval) {
+  return interval->GetType() != Primitive::kPrimFloat
+      && interval->GetType() != Primitive::kPrimDouble;
+}
+
+static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) {
+  return static_cast<size_t>(InstructionSetPointerSize(codegen.GetInstructionSet())) / kVRegSize;
+}
+
+RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocator,
+                                                         CodeGenerator* codegen,
+                                                         const SsaLivenessAnalysis& liveness)
+      : RegisterAllocator(allocator, codegen, liveness),
+        core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        physical_core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        physical_fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        int_spill_slot_counter_(0),
+        double_spill_slot_counter_(0),
+        float_spill_slot_counter_(0),
+        long_spill_slot_counter_(0),
+        catch_phi_spill_slot_counter_(0),
+        reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)),
+        reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()),
+        number_of_globally_blocked_core_regs_(0),
+        number_of_globally_blocked_fp_regs_(0),
+        max_safepoint_live_core_regs_(0),
+        max_safepoint_live_fp_regs_(0),
+        coloring_attempt_allocator_(nullptr) {
+  // Before we ask for blocked registers, set them up in the code generator.
+  codegen->SetupBlockedRegisters();
+
+  // Initialize physical core register live intervals and blocked registers.
+  // This includes globally blocked registers, such as the stack pointer.
+  physical_core_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr);
+  for (size_t i = 0; i < codegen->GetNumberOfCoreRegisters(); ++i) {
+    LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimInt);
+    physical_core_intervals_[i] = interval;
+    core_intervals_.push_back(interval);
+    if (codegen_->IsBlockedCoreRegister(i)) {
+      ++number_of_globally_blocked_core_regs_;
+      interval->AddRange(0, liveness.GetMaxLifetimePosition());
+    }
+  }
+  // Initialize physical floating point register live intervals and blocked registers.
+  physical_fp_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr);
+  for (size_t i = 0; i < codegen->GetNumberOfFloatingPointRegisters(); ++i) {
+    LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimFloat);
+    physical_fp_intervals_[i] = interval;
+    fp_intervals_.push_back(interval);
+    if (codegen_->IsBlockedFloatingPointRegister(i)) {
+      ++number_of_globally_blocked_fp_regs_;
+      interval->AddRange(0, liveness.GetMaxLifetimePosition());
+    }
+  }
+}
+
+void RegisterAllocatorGraphColor::AllocateRegisters() {
+  // (1) Collect and prepare live intervals.
+  ProcessInstructions();
+
+  for (bool processing_core_regs : {true, false}) {
+    ArenaVector<LiveInterval*>& intervals = processing_core_regs
+        ? core_intervals_
+        : fp_intervals_;
+    size_t num_registers = processing_core_regs
+        ? codegen_->GetNumberOfCoreRegisters()
+        : codegen_->GetNumberOfFloatingPointRegisters();
+
+    size_t attempt = 0;
+    while (true) {
+      ++attempt;
+      DCHECK(attempt <= kMaxGraphColoringAttemptsDebug)
+          << "Exceeded debug max graph coloring register allocation attempts. "
+          << "This could indicate that the register allocator is not making forward progress, "
+          << "which could be caused by prioritizing the wrong live intervals. (Short intervals "
+          << "should be prioritized over long ones, because they cannot be split further.)";
+
+      // Reset the allocator for the next coloring attempt.
+      ArenaAllocator coloring_attempt_allocator(allocator_->GetArenaPool());
+      coloring_attempt_allocator_ = &coloring_attempt_allocator;
+
+      // (2) Build the interference graph.
+      ArenaVector<InterferenceNode*> prunable_nodes(
+          coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator));
+      ArenaVector<InterferenceNode*> safepoints(
+          coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator));
+      BuildInterferenceGraph(intervals, &prunable_nodes, &safepoints);
+
+      // (3) Prune all uncolored nodes from interference graph.
+      ArenaStdStack<InterferenceNode*> pruned_nodes(
+          coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator));
+      PruneInterferenceGraph(prunable_nodes, num_registers, &pruned_nodes);
+
+      // (4) Color pruned nodes based on interferences.
+      bool successful = ColorInterferenceGraph(&pruned_nodes, num_registers);
+
+      if (successful) {
+        // Compute the maximum number of live registers across safepoints.
+        // Notice that we do not count globally blocked registers, such as the stack pointer.
+        if (safepoints.size() > 0) {
+          size_t max_safepoint_live_regs = ComputeMaxSafepointLiveRegisters(safepoints);
+          if (processing_core_regs) {
+            max_safepoint_live_core_regs_ =
+                max_safepoint_live_regs - number_of_globally_blocked_core_regs_;
+          } else {
+            max_safepoint_live_fp_regs_=
+                max_safepoint_live_regs - number_of_globally_blocked_fp_regs_;
+          }
+        }
+
+        // Tell the code generator which registers were allocated.
+        // We only look at prunable_nodes because we already told the code generator about
+        // fixed intervals while processing instructions. We also ignore the fixed intervals
+        // placed at the top of catch blocks.
+        for (InterferenceNode* node : prunable_nodes) {
+          LiveInterval* interval = node->GetInterval();
+          if (interval->HasRegister()) {
+            Location low_reg = processing_core_regs
+                ? Location::RegisterLocation(interval->GetRegister())
+                : Location::FpuRegisterLocation(interval->GetRegister());
+            codegen_->AddAllocatedRegister(low_reg);
+            if (interval->HasHighInterval()) {
+              LiveInterval* high = interval->GetHighInterval();
+              DCHECK(high->HasRegister());
+              Location high_reg = processing_core_regs
+                  ? Location::RegisterLocation(high->GetRegister())
+                  : Location::FpuRegisterLocation(high->GetRegister());
+              codegen_->AddAllocatedRegister(high_reg);
+            }
+          } else {
+            DCHECK(!interval->HasHighInterval() || !interval->GetHighInterval()->HasRegister());
+          }
+        }
+
+        break;
+      }
+    }  // while unsuccessful
+  }  // for processing_core_instructions
+
+  // (5) Resolve locations and deconstruct SSA form.
+  RegisterAllocationResolver(allocator_, codegen_, liveness_)
+      .Resolve(max_safepoint_live_core_regs_,
+               max_safepoint_live_fp_regs_,
+               reserved_art_method_slots_ + reserved_out_slots_,
+               int_spill_slot_counter_,
+               long_spill_slot_counter_,
+               float_spill_slot_counter_,
+               double_spill_slot_counter_,
+               catch_phi_spill_slot_counter_,
+               temp_intervals_);
+
+  if (kIsDebugBuild) {
+    Validate(/*log_fatal_on_failure*/ true);
+  }
+}
+
+bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) {
+  for (bool processing_core_regs : {true, false}) {
+    ArenaVector<LiveInterval*> intervals(
+        allocator_->Adapter(kArenaAllocRegisterAllocatorValidate));
+    for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
+      HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+      LiveInterval* interval = instruction->GetLiveInterval();
+      if (interval != nullptr && IsCoreInterval(interval) == processing_core_regs) {
+        intervals.push_back(instruction->GetLiveInterval());
+      }
+    }
+
+    ArenaVector<LiveInterval*>& physical_intervals = processing_core_regs
+        ? physical_core_intervals_
+        : physical_fp_intervals_;
+    for (LiveInterval* fixed : physical_intervals) {
+      if (fixed->GetFirstRange() != nullptr) {
+        // Ideally we would check fixed ranges as well, but currently there are times when
+        // two fixed intervals for the same register will overlap. For example, a fixed input
+        // and a fixed output may sometimes share the same register, in which there will be two
+        // fixed intervals for the same place.
+      }
+    }
+
+    for (LiveInterval* temp : temp_intervals_) {
+      if (IsCoreInterval(temp) == processing_core_regs) {
+        intervals.push_back(temp);
+      }
+    }
+
+    size_t spill_slots = int_spill_slot_counter_
+                       + long_spill_slot_counter_
+                       + float_spill_slot_counter_
+                       + double_spill_slot_counter_
+                       + catch_phi_spill_slot_counter_;
+    bool ok = ValidateIntervals(intervals,
+                                spill_slots,
+                                reserved_art_method_slots_ + reserved_out_slots_,
+                                *codegen_,
+                                allocator_,
+                                processing_core_regs,
+                                log_fatal_on_failure);
+    if (!ok) {
+      return false;
+    }
+  }  // for processing_core_regs
+
+  return true;
+}
+
+void RegisterAllocatorGraphColor::ProcessInstructions() {
+  for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+
+    // Note that we currently depend on this ordering, since some helper
+    // code is designed for linear scan register allocation.
+    for (HBackwardInstructionIterator instr_it(block->GetInstructions());
+          !instr_it.Done();
+          instr_it.Advance()) {
+      ProcessInstruction(instr_it.Current());
+    }
+
+    for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+      ProcessInstruction(phi_it.Current());
+    }
+
+    if (block->IsCatchBlock() || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
+      // By blocking all registers at the top of each catch block or irreducible loop, we force
+      // intervals belonging to the live-in set of the catch/header block to be spilled.
+      // TODO(ngeoffray): Phis in this block could be allocated in register.
+      size_t position = block->GetLifetimeStart();
+      BlockRegisters(position, position + 1);
+    }
+  }
+}
+
+void RegisterAllocatorGraphColor::ProcessInstruction(HInstruction* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  if (locations == nullptr) {
+    return;
+  }
+  if (locations->NeedsSafepoint() && codegen_->IsLeafMethod()) {
+    // We do this here because we do not want the suspend check to artificially
+    // create live registers.
+    DCHECK(instruction->IsSuspendCheckEntry());
+    DCHECK_EQ(locations->GetTempCount(), 0u);
+    instruction->GetBlock()->RemoveInstruction(instruction);
+    return;
+  }
+
+  CheckForTempLiveIntervals(instruction);
+  CheckForSafepoint(instruction);
+  if (instruction->GetLocations()->WillCall()) {
+    // If a call will happen, create fixed intervals for caller-save registers.
+    // TODO: Note that it may be beneficial to later split intervals at this point,
+    //       so that we allow last-minute moves from a caller-save register
+    //       to a callee-save register.
+    BlockRegisters(instruction->GetLifetimePosition(),
+                   instruction->GetLifetimePosition() + 1,
+                   /*caller_save_only*/ true);
+  }
+  CheckForFixedInputs(instruction);
+
+  LiveInterval* interval = instruction->GetLiveInterval();
+  if (interval == nullptr) {
+    // Instructions lacking a valid output location do not have a live interval.
+    DCHECK(!locations->Out().IsValid());
+    return;
+  }
+
+  // Low intervals act as representatives for their corresponding high interval.
+  DCHECK(!interval->IsHighInterval());
+  if (codegen_->NeedsTwoRegisters(interval->GetType())) {
+    interval->AddHighInterval();
+  }
+  AddSafepointsFor(instruction);
+  CheckForFixedOutput(instruction);
+  AllocateSpillSlotForCatchPhi(instruction);
+
+  ArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval)
+      ? core_intervals_
+      : fp_intervals_;
+  if (interval->HasSpillSlot() || instruction->IsConstant()) {
+    // Note that if an interval already has a spill slot, then its value currently resides
+    // in the stack (e.g., parameters). Thus we do not have to allocate a register until its first
+    // register use. This is also true for constants, which can be materialized at any point.
+    size_t first_register_use = interval->FirstRegisterUse();
+    if (first_register_use != kNoLifetime) {
+      LiveInterval* split = SplitBetween(interval, interval->GetStart(), first_register_use - 1);
+      intervals.push_back(split);
+    } else {
+      // We won't allocate a register for this value.
+    }
+  } else {
+    intervals.push_back(interval);
+  }
+}
+
+void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) {
+  // We simply block physical registers where necessary.
+  // TODO: Ideally we would coalesce the physical register with the register
+  //       allocated to the input value, but this can be tricky if, e.g., there
+  //       could be multiple physical register uses of the same value at the
+  //       same instruction. Need to think about it more.
+  LocationSummary* locations = instruction->GetLocations();
+  size_t position = instruction->GetLifetimePosition();
+  for (size_t i = 0; i < locations->GetInputCount(); ++i) {
+    Location input = locations->InAt(i);
+    if (input.IsRegister() || input.IsFpuRegister()) {
+      BlockRegister(input, position, position + 1);
+      codegen_->AddAllocatedRegister(input);
+    } else if (input.IsPair()) {
+      BlockRegister(input.ToLow(), position, position + 1);
+      BlockRegister(input.ToHigh(), position, position + 1);
+      codegen_->AddAllocatedRegister(input.ToLow());
+      codegen_->AddAllocatedRegister(input.ToHigh());
+    }
+  }
+}
+
+void RegisterAllocatorGraphColor::CheckForFixedOutput(HInstruction* instruction) {
+  // If an instruction has a fixed output location, we give the live interval a register and then
+  // proactively split it just after the definition point to avoid creating too many interferences
+  // with a fixed node.
+  LiveInterval* interval = instruction->GetLiveInterval();
+  Location out = interval->GetDefinedBy()->GetLocations()->Out();
+  size_t position = instruction->GetLifetimePosition();
+  DCHECK_GE(interval->GetEnd() - position, 2u);
+
+  if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
+    out = instruction->GetLocations()->InAt(0);
+  }
+
+  if (out.IsRegister() || out.IsFpuRegister()) {
+    interval->SetRegister(out.reg());
+    codegen_->AddAllocatedRegister(out);
+    Split(interval, position + 1);
+  } else if (out.IsPair()) {
+    interval->SetRegister(out.low());
+    interval->GetHighInterval()->SetRegister(out.high());
+    codegen_->AddAllocatedRegister(out.ToLow());
+    codegen_->AddAllocatedRegister(out.ToHigh());
+    Split(interval, position + 1);
+  } else if (out.IsStackSlot() || out.IsDoubleStackSlot()) {
+    interval->SetSpillSlot(out.GetStackIndex());
+  } else {
+    DCHECK(out.IsUnallocated() || out.IsConstant());
+  }
+}
+
+void RegisterAllocatorGraphColor::AddSafepointsFor(HInstruction* instruction) {
+  LiveInterval* interval = instruction->GetLiveInterval();
+  for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
+    HInstruction* safepoint = safepoints_[safepoint_index - 1u];
+    size_t safepoint_position = safepoint->GetLifetimePosition();
+
+    // Test that safepoints_ are ordered in the optimal way.
+    DCHECK(safepoint_index == safepoints_.size() ||
+           safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
+
+    if (safepoint_position == interval->GetStart()) {
+      // The safepoint is for this instruction, so the location of the instruction
+      // does not need to be saved.
+      DCHECK_EQ(safepoint_index, safepoints_.size());
+      DCHECK_EQ(safepoint, instruction);
+      continue;
+    } else if (interval->IsDeadAt(safepoint_position)) {
+      break;
+    } else if (!interval->Covers(safepoint_position)) {
+      // Hole in the interval.
+      continue;
+    }
+    interval->AddSafepoint(safepoint);
+  }
+}
+
+void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  size_t position = instruction->GetLifetimePosition();
+  for (size_t i = 0; i < locations->GetTempCount(); ++i) {
+    Location temp = locations->GetTemp(i);
+    if (temp.IsRegister() || temp.IsFpuRegister()) {
+      BlockRegister(temp, position, position + 1);
+      codegen_->AddAllocatedRegister(temp);
+    } else {
+      DCHECK(temp.IsUnallocated());
+      switch (temp.GetPolicy()) {
+        case Location::kRequiresRegister: {
+          LiveInterval* interval =
+              LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
+          interval->AddTempUse(instruction, i);
+          core_intervals_.push_back(interval);
+          temp_intervals_.push_back(interval);
+          break;
+        }
+
+        case Location::kRequiresFpuRegister: {
+          LiveInterval* interval =
+              LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
+          interval->AddTempUse(instruction, i);
+          fp_intervals_.push_back(interval);
+          temp_intervals_.push_back(interval);
+          if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
+            interval->AddHighInterval(/*is_temp*/ true);
+            temp_intervals_.push_back(interval->GetHighInterval());
+          }
+          break;
+        }
+
+        default:
+          LOG(FATAL) << "Unexpected policy for temporary location "
+                     << temp.GetPolicy();
+      }
+    }
+  }
+}
+
+void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  size_t position = instruction->GetLifetimePosition();
+
+  if (locations->NeedsSafepoint()) {
+    safepoints_.push_back(instruction);
+    if (locations->OnlyCallsOnSlowPath()) {
+      // We add a synthesized range at this position to record the live registers
+      // at this position. Ideally, we could just update the safepoints when locations
+      // are updated, but we currently need to know the full stack size before updating
+      // locations (because of parameters and the fact that we don't have a frame pointer).
+      // And knowing the full stack size requires to know the maximum number of live
+      // registers at calls in slow paths.
+      // By adding the following interval in the algorithm, we can compute this
+      // maximum before updating locations.
+      LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
+      interval->AddRange(position, position + 1);
+      core_intervals_.push_back(interval);
+      fp_intervals_.push_back(interval);
+    }
+  }
+}
+
+LiveInterval* RegisterAllocatorGraphColor::TrySplit(LiveInterval* interval, size_t position) {
+  if (interval->GetStart() < position && position < interval->GetEnd()) {
+    return Split(interval, position);
+  } else {
+    return interval;
+  }
+}
+
+void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) {
+  DCHECK(!interval->IsHighInterval());
+
+  // Split just after a register definition.
+  if (interval->IsParent() && interval->DefinitionRequiresRegister()) {
+    interval = TrySplit(interval, interval->GetStart() + 1);
+  }
+
+  UsePosition* use = interval->GetFirstUse();
+  while (use != nullptr && use->GetPosition() < interval->GetStart()) {
+    use = use->GetNext();
+  }
+
+  // Split around register uses.
+  size_t end = interval->GetEnd();
+  while (use != nullptr && use->GetPosition() <= end) {
+    if (use->RequiresRegister()) {
+      size_t position = use->GetPosition();
+      interval = TrySplit(interval, position - 1);
+      if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) {
+        // If we are at the very end of a basic block, we cannot split right
+        // at the use. Split just after instead.
+        interval = TrySplit(interval, position + 1);
+      } else {
+        interval = TrySplit(interval, position);
+      }
+    }
+    use = use->GetNext();
+  }
+}
+
+void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* instruction) {
+  if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
+    HPhi* phi = instruction->AsPhi();
+    LiveInterval* interval = phi->GetLiveInterval();
+
+    HInstruction* previous_phi = phi->GetPrevious();
+    DCHECK(previous_phi == nullptr ||
+           previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
+        << "Phis expected to be sorted by vreg number, "
+        << "so that equivalent phis are adjacent.";
+
+    if (phi->IsVRegEquivalentOf(previous_phi)) {
+      // Assign the same spill slot.
+      DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
+      interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
+    } else {
+      interval->SetSpillSlot(catch_phi_spill_slot_counter_);
+      catch_phi_spill_slot_counter_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
+    }
+  }
+}
+
+void RegisterAllocatorGraphColor::BlockRegister(Location location,
+                                                size_t start,
+                                                size_t end) {
+  DCHECK(location.IsRegister() || location.IsFpuRegister());
+  int reg = location.reg();
+  LiveInterval* interval = location.IsRegister()
+      ? physical_core_intervals_[reg]
+      : physical_fp_intervals_[reg];
+  DCHECK(interval->GetRegister() == reg);
+  bool blocked_by_codegen = location.IsRegister()
+      ? codegen_->IsBlockedCoreRegister(reg)
+      : codegen_->IsBlockedFloatingPointRegister(reg);
+  if (blocked_by_codegen) {
+    // We've already blocked this register for the entire method. (And adding a
+    // range inside another range violates the preconditions of AddRange).
+  } else {
+    interval->AddRange(start, end);
+  }
+}
+
+void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
+  for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
+    if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
+      BlockRegister(Location::RegisterLocation(i), start, end);
+    }
+  }
+  for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
+    if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
+      BlockRegister(Location::FpuRegisterLocation(i), start, end);
+    }
+  }
+}
+
+// Add an interference edge, but only if necessary.
+static void AddPotentialInterference(InterferenceNode* from, InterferenceNode* to) {
+  if (from->GetInterval()->HasRegister()) {
+    // We save space by ignoring outgoing edges from fixed nodes.
+  } else if (to->GetInterval()->IsSlowPathSafepoint()) {
+    // Safepoint intervals are only there to count max live registers,
+    // so no need to give them incoming interference edges.
+    // This is also necessary for correctness, because we don't want nodes
+    // to remove themselves from safepoint adjacency sets when they're pruned.
+  } else {
+    from->AddInterference(to);
+  }
+}
+
+// TODO: See locations->OutputCanOverlapWithInputs(); we may want to consider
+//       this when building the interference graph.
+void RegisterAllocatorGraphColor::BuildInterferenceGraph(
+    const ArenaVector<LiveInterval*>& intervals,
+    ArenaVector<InterferenceNode*>* prunable_nodes,
+    ArenaVector<InterferenceNode*>* safepoints) {
+  size_t interval_id_counter = 0;
+
+  // Build the interference graph efficiently by ordering range endpoints
+  // by position and doing a linear sweep to find interferences. (That is, we
+  // jump from endpoint to endpoint, maintaining a set of intervals live at each
+  // point. If two nodes are ever in the live set at the same time, then they
+  // interfere with each other.)
+  //
+  // We order by both position and (secondarily) by whether the endpoint
+  // begins or ends a range; we want to process range endings before range
+  // beginnings at the same position because they should not conflict.
+  //
+  // For simplicity, we create a tuple for each endpoint, and then sort the tuples.
+  // Tuple contents: (position, is_range_beginning, node).
+  ArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints(
+      coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator));
+  for (LiveInterval* parent : intervals) {
+    for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) {
+      LiveRange* range = sibling->GetFirstRange();
+      if (range != nullptr) {
+        InterferenceNode* node = new (coloring_attempt_allocator_) InterferenceNode(
+            coloring_attempt_allocator_, sibling, interval_id_counter++);
+        if (sibling->HasRegister()) {
+          // Fixed nodes will never be pruned, so no need to keep track of them.
+        } else if (sibling->IsSlowPathSafepoint()) {
+          // Safepoint intervals are synthesized to count max live registers.
+          // They will be processed separately after coloring.
+          safepoints->push_back(node);
+        } else {
+          prunable_nodes->push_back(node);
+        }
+
+        while (range != nullptr) {
+          range_endpoints.push_back(std::make_tuple(range->GetStart(), true, node));
+          range_endpoints.push_back(std::make_tuple(range->GetEnd(), false, node));
+          range = range->GetNext();
+        }
+      }
+    }
+  }
+
+  // Sort the endpoints.
+  std::sort(range_endpoints.begin(), range_endpoints.end());
+
+  // Nodes live at the current position in the linear sweep.
+  ArenaSet<InterferenceNode*, decltype(&InterferenceNode::CmpPtr)> live(
+      InterferenceNode::CmpPtr, coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+  // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the
+  // live set. When we encounter the end of a range, we remove the corresponding node
+  // from the live set. Nodes interfere if they are in the live set at the same time.
+  for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) {
+    bool is_range_beginning;
+    InterferenceNode* node;
+    // Extract information from the tuple, including the node this tuple represents.
+    std::tie(std::ignore, is_range_beginning, node) = *it;
+
+    if (is_range_beginning) {
+      for (InterferenceNode* conflicting : live) {
+        DCHECK_NE(node, conflicting);
+        AddPotentialInterference(node, conflicting);
+        AddPotentialInterference(conflicting, node);
+      }
+      DCHECK_EQ(live.count(node), 0u);
+      live.insert(node);
+    } else {
+      // End of range.
+      DCHECK_EQ(live.count(node), 1u);
+      live.erase(node);
+    }
+  }
+  DCHECK(live.empty());
+}
+
+// The order in which we color nodes is vital to both correctness (forward
+// progress) and code quality. Specifically, we must prioritize intervals
+// that require registers, and after that we must prioritize short intervals.
+// That way, if we fail to color a node, it either won't require a register,
+// or it will be a long interval that can be split in order to make the
+// interference graph sparser.
+// TODO: May also want to consider:
+// - Loop depth
+// - Constants (since they can be rematerialized)
+// - Allocated spill slots
+static bool GreaterNodePriority(const InterferenceNode* lhs,
+                                const InterferenceNode* rhs) {
+  LiveInterval* lhs_interval = lhs->GetInterval();
+  LiveInterval* rhs_interval = rhs->GetInterval();
+
+  // (1) Choose the interval that requires a register.
+  if (lhs_interval->RequiresRegister() != rhs_interval->RequiresRegister()) {
+    return lhs_interval->RequiresRegister();
+  }
+
+  // (2) Choose the interval that has a shorter life span.
+  if (lhs_interval->GetLength() != rhs_interval->GetLength()) {
+    return lhs_interval->GetLength() < rhs_interval->GetLength();
+  }
+
+  // (3) Just choose the interval based on a deterministic ordering.
+  return InterferenceNode::CmpPtr(lhs, rhs);
+}
+
+void RegisterAllocatorGraphColor::PruneInterferenceGraph(
+      const ArenaVector<InterferenceNode*>& prunable_nodes,
+      size_t num_regs,
+      ArenaStdStack<InterferenceNode*>* pruned_nodes) {
+  // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes,
+  // and all others as high degree nodes. The distinction is important: low degree nodes are
+  // guaranteed a color, while high degree nodes are not.
+
+  // Low-degree nodes are guaranteed a color, so worklist order does not matter.
+  ArenaDeque<InterferenceNode*> low_degree_worklist(
+      coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+  // If we have to prune from the high-degree worklist, we cannot guarantee
+  // the pruned node a color. So, we order the worklist by priority.
+  ArenaSet<InterferenceNode*, decltype(&GreaterNodePriority)> high_degree_worklist(
+      GreaterNodePriority, coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+  // Build worklists.
+  for (InterferenceNode* node : prunable_nodes) {
+    DCHECK(!node->GetInterval()->HasRegister())
+        << "Fixed nodes should never be pruned";
+    DCHECK(!node->GetInterval()->IsSlowPathSafepoint())
+        << "Safepoint nodes should never be pruned";
+    if (node->GetOutDegree() < num_regs) {
+      low_degree_worklist.push_back(node);
+    } else {
+      high_degree_worklist.insert(node);
+    }
+  }
+
+  // Helper function to prune an interval from the interference graph,
+  // which includes updating the worklists.
+  auto prune_node = [this,
+                     num_regs,
+                     &pruned_nodes,
+                     &low_degree_worklist,
+                     &high_degree_worklist] (InterferenceNode* node) {
+    DCHECK(!node->GetInterval()->HasRegister());
+    pruned_nodes->push(node);
+    for (InterferenceNode* adjacent : node->GetAdjacentNodes()) {
+      DCHECK(!adjacent->GetInterval()->IsSlowPathSafepoint())
+          << "Nodes should never interfere with synthesized safepoint nodes";
+      if (adjacent->GetInterval()->HasRegister()) {
+        // No effect on pre-colored nodes; they're never pruned.
+      } else {
+        bool was_high_degree = adjacent->GetOutDegree() >= num_regs;
+        DCHECK(adjacent->ContainsInterference(node))
+            << "Missing incoming interference edge from non-fixed node";
+        adjacent->RemoveInterference(node);
+        if (was_high_degree && adjacent->GetOutDegree() < num_regs) {
+          // This is a transition from high degree to low degree.
+          DCHECK_EQ(high_degree_worklist.count(adjacent), 1u);
+          high_degree_worklist.erase(adjacent);
+          low_degree_worklist.push_back(adjacent);
+        }
+      }
+    }
+  };
+
+  // Prune graph.
+  while (!low_degree_worklist.empty() || !high_degree_worklist.empty()) {
+    while (!low_degree_worklist.empty()) {
+      InterferenceNode* node = low_degree_worklist.front();
+      // TODO: pop_back() should work as well, but it doesn't; we get a
+      //       failed check while pruning. We should look into this.
+      low_degree_worklist.pop_front();
+      prune_node(node);
+    }
+    if (!high_degree_worklist.empty()) {
+      // We prune the lowest-priority node, because pruning a node earlier
+      // gives it a higher chance of being spilled.
+      InterferenceNode* node = *high_degree_worklist.rbegin();
+      high_degree_worklist.erase(node);
+      prune_node(node);
+    }
+  }
+}
+
+// Build a mask with a bit set for each register assigned to some
+// interval in `intervals`.
+template <typename Container>
+static std::bitset<kMaxNumRegs> BuildConflictMask(Container& intervals) {
+  std::bitset<kMaxNumRegs> conflict_mask;
+  for (InterferenceNode* adjacent : intervals) {
+    LiveInterval* conflicting = adjacent->GetInterval();
+    if (conflicting->HasRegister()) {
+      conflict_mask.set(conflicting->GetRegister());
+      if (conflicting->HasHighInterval()) {
+        DCHECK(conflicting->GetHighInterval()->HasRegister());
+        conflict_mask.set(conflicting->GetHighInterval()->GetRegister());
+      }
+    } else {
+      DCHECK(!conflicting->HasHighInterval()
+          || !conflicting->GetHighInterval()->HasRegister());
+    }
+  }
+  return conflict_mask;
+}
+
+bool RegisterAllocatorGraphColor::ColorInterferenceGraph(
+      ArenaStdStack<InterferenceNode*>* pruned_nodes,
+      size_t num_regs) {
+  DCHECK_LE(num_regs, kMaxNumRegs) << "kMaxNumRegs is too small";
+  ArenaVector<LiveInterval*> colored_intervals(
+      coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator));
+  bool successful = true;
+
+  while (!pruned_nodes->empty()) {
+    InterferenceNode* node = pruned_nodes->top();
+    pruned_nodes->pop();
+    LiveInterval* interval = node->GetInterval();
+
+    // Search for free register(s).
+    // Note that the graph coloring allocator assumes that pair intervals are aligned here,
+    // excluding pre-colored pair intervals (which can currently be unaligned on x86).
+    std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes());
+    size_t reg = 0;
+    if (interval->HasHighInterval()) {
+      while (reg < num_regs - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) {
+        reg += 2;
+      }
+    } else {
+      // We use CTZ (count trailing zeros) to quickly find the lowest available register.
+      // Note that CTZ is undefined for 0, so we special-case it.
+      reg = conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong());
+    }
+
+    if (reg < (interval->HasHighInterval() ? num_regs - 1 : num_regs)) {
+      // Assign register.
+      DCHECK(!interval->HasRegister());
+      interval->SetRegister(reg);
+      colored_intervals.push_back(interval);
+      if (interval->HasHighInterval()) {
+        DCHECK(!interval->GetHighInterval()->HasRegister());
+        interval->GetHighInterval()->SetRegister(reg + 1);
+        colored_intervals.push_back(interval->GetHighInterval());
+      }
+    } else if (interval->RequiresRegister()) {
+      // The interference graph is too dense to color. Make it sparser by
+      // splitting this live interval.
+      successful = false;
+      SplitAtRegisterUses(interval);
+      // We continue coloring, because there may be additional intervals that cannot
+      // be colored, and that we should split.
+    } else {
+      // Spill.
+      AllocateSpillSlotFor(interval);
+    }
+  }
+
+  // If unsuccessful, reset all register assignments.
+  if (!successful) {
+    for (LiveInterval* interval : colored_intervals) {
+      interval->ClearRegister();
+    }
+  }
+
+  return successful;
+}
+
+size_t RegisterAllocatorGraphColor::ComputeMaxSafepointLiveRegisters(
+    const ArenaVector<InterferenceNode*>& safepoints) {
+  size_t max_safepoint_live_regs = 0;
+  for (InterferenceNode* safepoint : safepoints) {
+    DCHECK(safepoint->GetInterval()->IsSlowPathSafepoint());
+    std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(safepoint->GetAdjacentNodes());
+    size_t live_regs = conflict_mask.count();
+    max_safepoint_live_regs = std::max(max_safepoint_live_regs, live_regs);
+  }
+  return max_safepoint_live_regs;
+}
+
+void RegisterAllocatorGraphColor::AllocateSpillSlotFor(LiveInterval* interval) {
+  LiveInterval* parent = interval->GetParent();
+  HInstruction* defined_by = parent->GetDefinedBy();
+  if (parent->HasSpillSlot()) {
+    // We already have a spill slot for this value that we can reuse.
+  } else if (defined_by->IsParameterValue()) {
+    // Parameters already have a stack slot.
+    parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
+  } else if (defined_by->IsCurrentMethod()) {
+    // The current method is always at spill slot 0.
+    parent->SetSpillSlot(0);
+  } else if (defined_by->IsConstant()) {
+    // Constants don't need a spill slot.
+  } else {
+    // Allocate a spill slot based on type.
+    size_t* spill_slot_counter;
+    switch (interval->GetType()) {
+      case Primitive::kPrimDouble:
+        spill_slot_counter = &double_spill_slot_counter_;
+        break;
+      case Primitive::kPrimLong:
+        spill_slot_counter = &long_spill_slot_counter_;
+        break;
+      case Primitive::kPrimFloat:
+        spill_slot_counter = &float_spill_slot_counter_;
+        break;
+      case Primitive::kPrimNot:
+      case Primitive::kPrimInt:
+      case Primitive::kPrimChar:
+      case Primitive::kPrimByte:
+      case Primitive::kPrimBoolean:
+      case Primitive::kPrimShort:
+        spill_slot_counter = &int_spill_slot_counter_;
+        break;
+      case Primitive::kPrimVoid:
+        LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
+        UNREACHABLE();
+    }
+
+    parent->SetSpillSlot(*spill_slot_counter);
+    *spill_slot_counter += parent->NeedsTwoSpillSlots() ? 2 : 1;
+    // TODO: Could color stack slots if we wanted to, even if
+    //       it's just a trivial coloring. See the linear scan implementation,
+    //       which simply reuses spill slots for values whose live intervals
+    //       have already ended.
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h
new file mode 100644
index 0000000..0b5af96
--- /dev/null
+++ b/compiler/optimizing/register_allocator_graph_color.h
@@ -0,0 +1,197 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
+#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
+
+#include "arch/instruction_set.h"
+#include "base/arena_containers.h"
+#include "base/arena_object.h"
+#include "base/macros.h"
+#include "primitive.h"
+#include "register_allocator.h"
+
+namespace art {
+
+class CodeGenerator;
+class HBasicBlock;
+class HGraph;
+class HInstruction;
+class HParallelMove;
+class Location;
+class SsaLivenessAnalysis;
+class InterferenceNode;
+
+/**
+ * A graph coloring register allocator.
+ *
+ * The algorithm proceeds as follows:
+ * (1) Build an interference graph, where nodes represent live intervals, and edges represent
+ *     interferences between two intervals. Coloring this graph with k colors is isomorphic to
+ *     finding a valid register assignment with k registers.
+ * (2) To color the graph, first prune all nodes with degree less than k, since these nodes are
+ *     guaranteed a color. (No matter how we color their adjacent nodes, we can give them a
+ *     different color.) As we prune nodes from the graph, more nodes may drop below degree k,
+ *     enabling further pruning. The key is to maintain the pruning order in a stack, so that we
+ *     can color the nodes in the reverse order.
+ *     When there are no more nodes with degree less than k, we start pruning alternate nodes based
+ *     on heuristics. Since these nodes are not guaranteed a color, we are careful to
+ *     prioritize nodes that require a register. We also prioritize short intervals, because
+ *     short intervals cannot be split very much if coloring fails (see below). "Prioritizing"
+ *     a node amounts to pruning it later, since it will have fewer interferences if we prune other
+ *     nodes first.
+ * (3) We color nodes in the reverse order in which we pruned them. If we cannot assign
+ *     a node a color, we do one of two things:
+ *     - If the node requires a register, we consider the current coloring attempt a failure.
+ *       However, we split the node's live interval in order to make the interference graph
+ *       sparser, so that future coloring attempts may succeed.
+ *     - If the node does not require a register, we simply assign it a location on the stack.
+ *
+ * A good reference for graph coloring register allocation is
+ * "Modern Compiler Implementation in Java" (Andrew W. Appel, 2nd Edition).
+ */
+class RegisterAllocatorGraphColor : public RegisterAllocator {
+ public:
+  RegisterAllocatorGraphColor(ArenaAllocator* allocator,
+                              CodeGenerator* codegen,
+                              const SsaLivenessAnalysis& analysis);
+  ~RegisterAllocatorGraphColor() OVERRIDE {}
+
+  void AllocateRegisters() OVERRIDE;
+
+  bool Validate(bool log_fatal_on_failure);
+
+ private:
+  // Collect all intervals and prepare for register allocation.
+  void ProcessInstructions();
+  void ProcessInstruction(HInstruction* instruction);
+
+  // If any inputs require specific registers, block those registers
+  // at the position of this instruction.
+  void CheckForFixedInputs(HInstruction* instruction);
+
+  // If the output of an instruction requires a specific register, split
+  // the interval and assign the register to the first part.
+  void CheckForFixedOutput(HInstruction* instruction);
+
+  // Add all applicable safepoints to a live interval.
+  // Currently depends on instruction processing order.
+  void AddSafepointsFor(HInstruction* instruction);
+
+  // Collect all live intervals associated with the temporary locations
+  // needed by an instruction.
+  void CheckForTempLiveIntervals(HInstruction* instruction);
+
+  // If a safe point is needed, add a synthesized interval to later record
+  // the number of live registers at this point.
+  void CheckForSafepoint(HInstruction* instruction);
+
+  // Split an interval, but only if `position` is inside of `interval`.
+  // Return either the new interval, or the original interval if not split.
+  static LiveInterval* TrySplit(LiveInterval* interval, size_t position);
+
+  // To ensure every graph can be colored, split live intervals
+  // at their register defs and uses. This creates short intervals with low
+  // degree in the interference graph, which are prioritized during graph
+  // coloring.
+  void SplitAtRegisterUses(LiveInterval* interval);
+
+  // If the given instruction is a catch phi, give it a spill slot.
+  void AllocateSpillSlotForCatchPhi(HInstruction* instruction);
+
+  // Ensure that the given register cannot be allocated for a given range.
+  void BlockRegister(Location location, size_t start, size_t end);
+  void BlockRegisters(size_t start, size_t end, bool caller_save_only = false);
+
+  // Use the intervals collected from instructions to construct an
+  // interference graph mapping intervals to adjacency lists.
+  // Also, collect synthesized safepoint nodes, used to keep
+  // track of live intervals across safepoints.
+  void BuildInterferenceGraph(const ArenaVector<LiveInterval*>& intervals,
+                              ArenaVector<InterferenceNode*>* prunable_nodes,
+                              ArenaVector<InterferenceNode*>* safepoints);
+
+  // Prune nodes from the interference graph to be colored later. Build
+  // a stack (pruned_nodes) containing these intervals in an order determined
+  // by various heuristics.
+  void PruneInterferenceGraph(const ArenaVector<InterferenceNode*>& prunable_nodes,
+                              size_t num_registers,
+                              ArenaStdStack<InterferenceNode*>* pruned_nodes);
+
+  // Process pruned_intervals to color the interference graph, spilling when
+  // necessary. Return true if successful. Else, split some intervals to make
+  // the interference graph sparser.
+  bool ColorInterferenceGraph(ArenaStdStack<InterferenceNode*>* pruned_nodes,
+                              size_t num_registers);
+
+  // Return the maximum number of registers live at safepoints,
+  // based on the outgoing interference edges of safepoint nodes.
+  size_t ComputeMaxSafepointLiveRegisters(const ArenaVector<InterferenceNode*>& safepoints);
+
+  // If necessary, add the given interval to the list of spilled intervals,
+  // and make sure it's ready to be spilled to the stack.
+  void AllocateSpillSlotFor(LiveInterval* interval);
+
+  // Live intervals, split by kind (core and floating point).
+  // These should not contain high intervals, as those are represented by
+  // the corresponding low interval throughout register allocation.
+  ArenaVector<LiveInterval*> core_intervals_;
+  ArenaVector<LiveInterval*> fp_intervals_;
+
+  // Intervals for temporaries, saved for special handling in the resolution phase.
+  ArenaVector<LiveInterval*> temp_intervals_;
+
+  // Safepoints, saved for special handling while processing instructions.
+  ArenaVector<HInstruction*> safepoints_;
+
+  // Live intervals for specific registers. These become pre-colored nodes
+  // in the interference graph.
+  ArenaVector<LiveInterval*> physical_core_intervals_;
+  ArenaVector<LiveInterval*> physical_fp_intervals_;
+
+  // Allocated stack slot counters.
+  size_t int_spill_slot_counter_;
+  size_t double_spill_slot_counter_;
+  size_t float_spill_slot_counter_;
+  size_t long_spill_slot_counter_;
+  size_t catch_phi_spill_slot_counter_;
+
+  // Number of stack slots needed for the pointer to the current method.
+  // This is 1 for 32-bit architectures, and 2 for 64-bit architectures.
+  const size_t reserved_art_method_slots_;
+
+  // Number of stack slots needed for outgoing arguments.
+  const size_t reserved_out_slots_;
+
+  // The number of globally blocked core and floating point registers, such as the stack pointer.
+  size_t number_of_globally_blocked_core_regs_;
+  size_t number_of_globally_blocked_fp_regs_;
+
+  // The maximum number of registers live at safe points. Needed by the code generator.
+  size_t max_safepoint_live_core_regs_;
+  size_t max_safepoint_live_fp_regs_;
+
+  // An arena allocator used for a single graph coloring attempt.
+  // Many data structures are cleared between graph coloring attempts, so we reduce
+  // total memory usage by using a new arena allocator for each attempt.
+  ArenaAllocator* coloring_attempt_allocator_;
+
+  DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorGraphColor);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h
index b6e4f92..1a643a0 100644
--- a/compiler/optimizing/register_allocator_linear_scan.h
+++ b/compiler/optimizing/register_allocator_linear_scan.h
@@ -43,6 +43,7 @@
   RegisterAllocatorLinearScan(ArenaAllocator* allocator,
                               CodeGenerator* codegen,
                               const SsaLivenessAnalysis& analysis);
+  ~RegisterAllocatorLinearScan() OVERRIDE {}
 
   void AllocateRegisters() OVERRIDE;
 
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index cbb7b2f..55ea99e 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -31,12 +31,29 @@
 
 namespace art {
 
+using Strategy = RegisterAllocator::Strategy;
+
 // Note: the register allocator tests rely on the fact that constants have live
 // intervals and registers get allocated to them.
 
-class RegisterAllocatorTest : public CommonCompilerTest {};
+class RegisterAllocatorTest : public CommonCompilerTest {
+ protected:
+  // These functions need to access private variables of LocationSummary, so we declare it
+  // as a member of RegisterAllocatorTest, which we make a friend class.
+  static void SameAsFirstInputHint(Strategy strategy);
+  static void ExpectedInRegisterHint(Strategy strategy);
+};
 
-static bool Check(const uint16_t* data) {
+// This macro should include all register allocation strategies that should be tested.
+#define TEST_ALL_STRATEGIES(test_name)\
+TEST_F(RegisterAllocatorTest, test_name##_LinearScan) {\
+  test_name(Strategy::kRegisterAllocatorLinearScan);\
+}\
+TEST_F(RegisterAllocatorTest, test_name##_GraphColor) {\
+  test_name(Strategy::kRegisterAllocatorGraphColor);\
+}
+
+static bool Check(const uint16_t* data, Strategy strategy) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = CreateCFG(&allocator, data);
@@ -45,7 +62,8 @@
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(graph, &codegen);
   liveness.Analyze();
-  RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness);
+  RegisterAllocator* register_allocator =
+      RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
   register_allocator->AllocateRegisters();
   return register_allocator->Validate(false);
 }
@@ -143,7 +161,7 @@
   }
 }
 
-TEST_F(RegisterAllocatorTest, CFG1) {
+static void CFG1(Strategy strategy) {
   /*
    * Test the following snippet:
    *  return 0;
@@ -160,10 +178,12 @@
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN);
 
-  ASSERT_TRUE(Check(data));
+  ASSERT_TRUE(Check(data, strategy));
 }
 
-TEST_F(RegisterAllocatorTest, Loop1) {
+TEST_ALL_STRATEGIES(CFG1);
+
+static void Loop1(Strategy strategy) {
   /*
    * Test the following snippet:
    *  int a = 0;
@@ -199,10 +219,12 @@
     Instruction::CONST_4 | 5 << 12 | 1 << 8,
     Instruction::RETURN | 1 << 8);
 
-  ASSERT_TRUE(Check(data));
+  ASSERT_TRUE(Check(data, strategy));
 }
 
-TEST_F(RegisterAllocatorTest, Loop2) {
+TEST_ALL_STRATEGIES(Loop1);
+
+static void Loop2(Strategy strategy) {
   /*
    * Test the following snippet:
    *  int a = 0;
@@ -248,10 +270,12 @@
     Instruction::ADD_INT, 1 << 8 | 0,
     Instruction::RETURN | 1 << 8);
 
-  ASSERT_TRUE(Check(data));
+  ASSERT_TRUE(Check(data, strategy));
 }
 
-TEST_F(RegisterAllocatorTest, Loop3) {
+TEST_ALL_STRATEGIES(Loop2);
+
+static void Loop3(Strategy strategy) {
   /*
    * Test the following snippet:
    *  int a = 0
@@ -296,7 +320,8 @@
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(graph, &codegen);
   liveness.Analyze();
-  RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness);
+  RegisterAllocator* register_allocator =
+      RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
   register_allocator->AllocateRegisters();
   ASSERT_TRUE(register_allocator->Validate(false));
 
@@ -314,6 +339,8 @@
   ASSERT_EQ(phi_interval->GetRegister(), ret->InputAt(0)->GetLiveInterval()->GetRegister());
 }
 
+TEST_ALL_STRATEGIES(Loop3);
+
 TEST_F(RegisterAllocatorTest, FirstRegisterUse) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -354,7 +381,7 @@
   ASSERT_EQ(new_interval->FirstRegisterUse(), last_xor->GetLifetimePosition());
 }
 
-TEST_F(RegisterAllocatorTest, DeadPhi) {
+static void DeadPhi(Strategy strategy) {
   /* Test for a dead loop phi taking as back-edge input a phi that also has
    * this loop phi as input. Walking backwards in SsaDeadPhiElimination
    * does not solve the problem because the loop phi will be visited last.
@@ -385,15 +412,19 @@
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(graph, &codegen);
   liveness.Analyze();
-  RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness);
+  RegisterAllocator* register_allocator =
+      RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
   register_allocator->AllocateRegisters();
   ASSERT_TRUE(register_allocator->Validate(false));
 }
 
+TEST_ALL_STRATEGIES(DeadPhi);
+
 /**
  * Test that the TryAllocateFreeReg method works in the presence of inactive intervals
  * that share the same register. It should split the interval it is currently
  * allocating for at the minimum lifetime position between the two inactive intervals.
+ * This test only applies to the linear scan allocator.
  */
 TEST_F(RegisterAllocatorTest, FreeUntil) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
@@ -507,15 +538,15 @@
                                               graph->GetDexFile(),
                                               dex_cache,
                                               0);
-*input2 = new (allocator) HInstanceFieldGet(parameter,
-                                            Primitive::kPrimInt,
-                                            MemberOffset(42),
-                                            false,
-                                            kUnknownFieldIndex,
-                                            kUnknownClassDefIndex,
-                                            graph->GetDexFile(),
-                                            dex_cache,
-                                            0);
+  *input2 = new (allocator) HInstanceFieldGet(parameter,
+                                              Primitive::kPrimInt,
+                                              MemberOffset(42),
+                                              false,
+                                              kUnknownFieldIndex,
+                                              kUnknownClassDefIndex,
+                                              graph->GetDexFile(),
+                                              dex_cache,
+                                              0);
   then->AddInstruction(*input1);
   else_->AddInstruction(*input2);
   join->AddInstruction(new (allocator) HExit());
@@ -527,7 +558,7 @@
   return graph;
 }
 
-TEST_F(RegisterAllocatorTest, PhiHint) {
+static void PhiHint(Strategy strategy) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HPhi *phi;
@@ -543,7 +574,7 @@
 
     // Check that the register allocator is deterministic.
     RegisterAllocator* register_allocator =
-        RegisterAllocator::Create(&allocator, &codegen, liveness);
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
     register_allocator->AllocateRegisters();
 
     ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 0);
@@ -563,7 +594,7 @@
     // the same register.
     phi->GetLocations()->UpdateOut(Location::RegisterLocation(2));
     RegisterAllocator* register_allocator =
-        RegisterAllocator::Create(&allocator, &codegen, liveness);
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
     register_allocator->AllocateRegisters();
 
     ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
@@ -583,7 +614,7 @@
     // the same register.
     input1->GetLocations()->UpdateOut(Location::RegisterLocation(2));
     RegisterAllocator* register_allocator =
-        RegisterAllocator::Create(&allocator, &codegen, liveness);
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
     register_allocator->AllocateRegisters();
 
     ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
@@ -603,7 +634,7 @@
     // the same register.
     input2->GetLocations()->UpdateOut(Location::RegisterLocation(2));
     RegisterAllocator* register_allocator =
-        RegisterAllocator::Create(&allocator, &codegen, liveness);
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
     register_allocator->AllocateRegisters();
 
     ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
@@ -612,6 +643,12 @@
   }
 }
 
+// TODO: Enable this test for graph coloring register allocation when iterative move
+//       coalescing is merged.
+TEST_F(RegisterAllocatorTest, PhiHint_LinearScan) {
+  PhiHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
 static HGraph* BuildFieldReturn(ArenaAllocator* allocator,
                                 HInstruction** field,
                                 HInstruction** ret) {
@@ -650,7 +687,7 @@
   return graph;
 }
 
-TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint) {
+void RegisterAllocatorTest::ExpectedInRegisterHint(Strategy strategy) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HInstruction *field, *ret;
@@ -664,7 +701,7 @@
     liveness.Analyze();
 
     RegisterAllocator* register_allocator =
-        RegisterAllocator::Create(&allocator, &codegen, liveness);
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
     register_allocator->AllocateRegisters();
 
     // Sanity check that in normal conditions, the register should be hinted to 0 (EAX).
@@ -684,13 +721,19 @@
     ret->GetLocations()->inputs_[0] = Location::RegisterLocation(2);
 
     RegisterAllocator* register_allocator =
-        RegisterAllocator::Create(&allocator, &codegen, liveness);
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
     register_allocator->AllocateRegisters();
 
     ASSERT_EQ(field->GetLiveInterval()->GetRegister(), 2);
   }
 }
 
+// TODO: Enable this test for graph coloring register allocation when iterative move
+//       coalescing is merged.
+TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint_LinearScan) {
+  ExpectedInRegisterHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
 static HGraph* BuildTwoSubs(ArenaAllocator* allocator,
                             HInstruction** first_sub,
                             HInstruction** second_sub) {
@@ -720,7 +763,7 @@
   return graph;
 }
 
-TEST_F(RegisterAllocatorTest, SameAsFirstInputHint) {
+void RegisterAllocatorTest::SameAsFirstInputHint(Strategy strategy) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HInstruction *first_sub, *second_sub;
@@ -734,7 +777,7 @@
     liveness.Analyze();
 
     RegisterAllocator* register_allocator =
-        RegisterAllocator::Create(&allocator, &codegen, liveness);
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
     register_allocator->AllocateRegisters();
 
     // Sanity check that in normal conditions, the registers are the same.
@@ -757,7 +800,7 @@
     ASSERT_EQ(second_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput);
 
     RegisterAllocator* register_allocator =
-        RegisterAllocator::Create(&allocator, &codegen, liveness);
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
     register_allocator->AllocateRegisters();
 
     ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 2);
@@ -765,6 +808,12 @@
   }
 }
 
+// TODO: Enable this test for graph coloring register allocation when iterative move
+//       coalescing is merged.
+TEST_F(RegisterAllocatorTest, SameAsFirstInputHint_LinearScan) {
+  SameAsFirstInputHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
 static HGraph* BuildDiv(ArenaAllocator* allocator,
                         HInstruction** div) {
   HGraph* graph = CreateGraph(allocator);
@@ -791,7 +840,7 @@
   return graph;
 }
 
-TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
+static void ExpectedExactInRegisterAndSameOutputHint(Strategy strategy) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HInstruction *div;
@@ -805,7 +854,7 @@
     liveness.Analyze();
 
     RegisterAllocator* register_allocator =
-        RegisterAllocator::Create(&allocator, &codegen, liveness);
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
     register_allocator->AllocateRegisters();
 
     // div on x86 requires its first input in eax and the output be the same as the first input.
@@ -813,9 +862,16 @@
   }
 }
 
+// TODO: Enable this test for graph coloring register allocation when iterative move
+//       coalescing is merged.
+TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint_LinearScan) {
+  ExpectedExactInRegisterAndSameOutputHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
 // Test a bug in the register allocator, where allocating a blocked
 // register would lead to spilling an inactive interval at the wrong
 // position.
+// This test only applies to the linear scan allocator.
 TEST_F(RegisterAllocatorTest, SpillInactive) {
   ArenaPool pool;
 
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 7af4302..a01e107 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -368,6 +368,27 @@
   return live_in->UnionIfNotIn(live_out, kill);
 }
 
+void LiveInterval::DumpWithContext(std::ostream& stream,
+                                   const CodeGenerator& codegen) const {
+  Dump(stream);
+  if (IsFixed()) {
+    stream << ", register:" << GetRegister() << "(";
+    if (IsFloatingPoint()) {
+      codegen.DumpFloatingPointRegister(stream, GetRegister());
+    } else {
+      codegen.DumpCoreRegister(stream, GetRegister());
+    }
+    stream << ")";
+  } else {
+    stream << ", spill slot:" << GetSpillSlot();
+  }
+  stream << ", requires_register:" << (GetDefinedBy() != nullptr && RequiresRegister());
+  if (GetParent()->GetDefinedBy() != nullptr) {
+    stream << ", defined_by:" << GetParent()->GetDefinedBy()->GetKind();
+    stream << "(" << GetParent()->GetDefinedBy()->GetLifetimePosition() << ")";
+  }
+}
+
 static int RegisterOrLowRegister(Location location) {
   return location.IsPair() ? location.low() : location.reg();
 }
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index dc98864..346753b 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -150,9 +150,7 @@
     if (GetIsEnvironment()) return false;
     if (IsSynthesized()) return false;
     Location location = GetUser()->GetLocations()->InAt(GetInputIndex());
-    return location.IsUnallocated()
-        && (location.GetPolicy() == Location::kRequiresRegister
-            || location.GetPolicy() == Location::kRequiresFpuRegister);
+    return location.IsUnallocated() && location.RequiresRegisterKind();
   }
 
  private:
@@ -481,6 +479,10 @@
     return last_range_->GetEnd();
   }
 
+  size_t GetLength() const {
+    return GetEnd() - GetStart();
+  }
+
   size_t FirstRegisterUseAfter(size_t position) const {
     if (is_temp_) {
       return position == GetStart() ? position : kNoLifetime;
@@ -504,10 +506,16 @@
     return kNoLifetime;
   }
 
+  // Returns the location of the first register use for this live interval,
+  // including a register definition if applicable.
   size_t FirstRegisterUse() const {
     return FirstRegisterUseAfter(GetStart());
   }
 
+  // Whether the interval requires a register rather than a stack location.
+  // If needed for performance, this could be cached.
+  bool RequiresRegister() const { return FirstRegisterUse() != kNoLifetime; }
+
   size_t FirstUseAfter(size_t position) const {
     if (is_temp_) {
       return position == GetStart() ? position : kNoLifetime;
@@ -693,6 +701,10 @@
     stream << " is_high: " << IsHighInterval();
   }
 
+  // Same as Dump, but adds context such as the instruction defining this interval, and
+  // the register currently assigned to this interval.
+  void DumpWithContext(std::ostream& stream, const CodeGenerator& codegen) const;
+
   LiveInterval* GetNextSibling() const { return next_sibling_; }
   LiveInterval* GetLastSibling() {
     LiveInterval* result = this;
@@ -871,6 +883,33 @@
     range_search_start_ = first_range_;
   }
 
+  bool DefinitionRequiresRegister() const {
+    DCHECK(IsParent());
+    LocationSummary* locations = defined_by_->GetLocations();
+    Location location = locations->Out();
+    // This interval is the first interval of the instruction. If the output
+    // of the instruction requires a register, we return the position of that instruction
+    // as the first register use.
+    if (location.IsUnallocated()) {
+      if ((location.GetPolicy() == Location::kRequiresRegister)
+           || (location.GetPolicy() == Location::kSameAsFirstInput
+               && (locations->InAt(0).IsRegister()
+                   || locations->InAt(0).IsRegisterPair()
+                   || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) {
+        return true;
+      } else if ((location.GetPolicy() == Location::kRequiresFpuRegister)
+                 || (location.GetPolicy() == Location::kSameAsFirstInput
+                     && (locations->InAt(0).IsFpuRegister()
+                         || locations->InAt(0).IsFpuRegisterPair()
+                         || locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister))) {
+        return true;
+      }
+    } else if (location.IsRegister() || location.IsRegisterPair()) {
+      return true;
+    }
+    return false;
+  }
+
  private:
   LiveInterval(ArenaAllocator* allocator,
                Primitive::Type type,
@@ -925,33 +964,6 @@
     return range;
   }
 
-  bool DefinitionRequiresRegister() const {
-    DCHECK(IsParent());
-    LocationSummary* locations = defined_by_->GetLocations();
-    Location location = locations->Out();
-    // This interval is the first interval of the instruction. If the output
-    // of the instruction requires a register, we return the position of that instruction
-    // as the first register use.
-    if (location.IsUnallocated()) {
-      if ((location.GetPolicy() == Location::kRequiresRegister)
-           || (location.GetPolicy() == Location::kSameAsFirstInput
-               && (locations->InAt(0).IsRegister()
-                   || locations->InAt(0).IsRegisterPair()
-                   || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) {
-        return true;
-      } else if ((location.GetPolicy() == Location::kRequiresFpuRegister)
-                 || (location.GetPolicy() == Location::kSameAsFirstInput
-                     && (locations->InAt(0).IsFpuRegister()
-                         || locations->InAt(0).IsFpuRegisterPair()
-                         || locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister))) {
-        return true;
-      }
-    } else if (location.IsRegister() || location.IsRegisterPair()) {
-      return true;
-    }
-    return false;
-  }
-
   bool IsDefiningPosition(size_t position) const {
     return IsParent() && (position == GetStart());
   }
diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc
index 195159f..8aa315a 100644
--- a/compiler/optimizing/x86_memory_gen.cc
+++ b/compiler/optimizing/x86_memory_gen.cc
@@ -69,8 +69,8 @@
 };
 
 X86MemoryOperandGeneration::X86MemoryOperandGeneration(HGraph* graph,
-                                                       OptimizingCompilerStats* stats,
-                                                       CodeGenerator* codegen)
+                                                       CodeGenerator* codegen,
+                                                       OptimizingCompilerStats* stats)
     : HOptimization(graph, kX86MemoryOperandGenerationPassName, stats),
       do_implicit_null_checks_(codegen->GetCompilerOptions().GetImplicitNullChecks()) {
 }
diff --git a/compiler/optimizing/x86_memory_gen.h b/compiler/optimizing/x86_memory_gen.h
index 7e88681..5f15d9f 100644
--- a/compiler/optimizing/x86_memory_gen.h
+++ b/compiler/optimizing/x86_memory_gen.h
@@ -28,8 +28,8 @@
 class X86MemoryOperandGeneration : public HOptimization {
  public:
   X86MemoryOperandGeneration(HGraph* graph,
-                             OptimizingCompilerStats* stats,
-                             CodeGenerator* codegen);
+                             CodeGenerator* codegen,
+                             OptimizingCompilerStats* stats);
 
   void Run() OVERRIDE;
 
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 1796b39..d5cd59d 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -376,508 +376,6 @@
   }
 }
 
-static dwarf::Reg DWARFReg(Register reg) {
-  return dwarf::Reg::ArmCore(static_cast<int>(reg));
-}
-
-static dwarf::Reg DWARFReg(SRegister reg) {
-  return dwarf::Reg::ArmFp(static_cast<int>(reg));
-}
-
-constexpr size_t kFramePointerSize = static_cast<size_t>(kArmPointerSize);
-
-void ArmAssembler::BuildFrame(size_t frame_size,
-                              ManagedRegister method_reg,
-                              ArrayRef<const ManagedRegister> callee_save_regs,
-                              const ManagedRegisterEntrySpills& entry_spills) {
-  CHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet
-  CHECK_ALIGNED(frame_size, kStackAlignment);
-  CHECK_EQ(R0, method_reg.AsArm().AsCoreRegister());
-
-  // Push callee saves and link register.
-  RegList core_spill_mask = 1 << LR;
-  uint32_t fp_spill_mask = 0;
-  for (const ManagedRegister& reg : callee_save_regs) {
-    if (reg.AsArm().IsCoreRegister()) {
-      core_spill_mask |= 1 << reg.AsArm().AsCoreRegister();
-    } else {
-      fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
-    }
-  }
-  PushList(core_spill_mask);
-  cfi_.AdjustCFAOffset(POPCOUNT(core_spill_mask) * kFramePointerSize);
-  cfi_.RelOffsetForMany(DWARFReg(Register(0)), 0, core_spill_mask, kFramePointerSize);
-  if (fp_spill_mask != 0) {
-    vpushs(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask));
-    cfi_.AdjustCFAOffset(POPCOUNT(fp_spill_mask) * kFramePointerSize);
-    cfi_.RelOffsetForMany(DWARFReg(SRegister(0)), 0, fp_spill_mask, kFramePointerSize);
-  }
-
-  // Increase frame to required size.
-  int pushed_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
-  CHECK_GT(frame_size, pushed_values * kFramePointerSize);  // Must at least have space for Method*.
-  IncreaseFrameSize(frame_size - pushed_values * kFramePointerSize);  // handles CFI as well.
-
-  // Write out Method*.
-  StoreToOffset(kStoreWord, R0, SP, 0);
-
-  // Write out entry spills.
-  int32_t offset = frame_size + kFramePointerSize;
-  for (size_t i = 0; i < entry_spills.size(); ++i) {
-    ArmManagedRegister reg = entry_spills.at(i).AsArm();
-    if (reg.IsNoRegister()) {
-      // only increment stack offset.
-      ManagedRegisterSpill spill = entry_spills.at(i);
-      offset += spill.getSize();
-    } else if (reg.IsCoreRegister()) {
-      StoreToOffset(kStoreWord, reg.AsCoreRegister(), SP, offset);
-      offset += 4;
-    } else if (reg.IsSRegister()) {
-      StoreSToOffset(reg.AsSRegister(), SP, offset);
-      offset += 4;
-    } else if (reg.IsDRegister()) {
-      StoreDToOffset(reg.AsDRegister(), SP, offset);
-      offset += 8;
-    }
-  }
-}
-
-void ArmAssembler::RemoveFrame(size_t frame_size,
-                               ArrayRef<const ManagedRegister> callee_save_regs) {
-  CHECK_ALIGNED(frame_size, kStackAlignment);
-  cfi_.RememberState();
-
-  // Compute callee saves to pop and PC.
-  RegList core_spill_mask = 1 << PC;
-  uint32_t fp_spill_mask = 0;
-  for (const ManagedRegister& reg : callee_save_regs) {
-    if (reg.AsArm().IsCoreRegister()) {
-      core_spill_mask |= 1 << reg.AsArm().AsCoreRegister();
-    } else {
-      fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
-    }
-  }
-
-  // Decrease frame to start of callee saves.
-  int pop_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
-  CHECK_GT(frame_size, pop_values * kFramePointerSize);
-  DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize));  // handles CFI as well.
-
-  if (fp_spill_mask != 0) {
-    vpops(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask));
-    cfi_.AdjustCFAOffset(-kFramePointerSize * POPCOUNT(fp_spill_mask));
-    cfi_.RestoreMany(DWARFReg(SRegister(0)), fp_spill_mask);
-  }
-
-  // Pop callee saves and PC.
-  PopList(core_spill_mask);
-
-  // The CFI should be restored for any code that follows the exit block.
-  cfi_.RestoreState();
-  cfi_.DefCFAOffset(frame_size);
-}
-
-void ArmAssembler::IncreaseFrameSize(size_t adjust) {
-  AddConstant(SP, -adjust);
-  cfi_.AdjustCFAOffset(adjust);
-}
-
-void ArmAssembler::DecreaseFrameSize(size_t adjust) {
-  AddConstant(SP, adjust);
-  cfi_.AdjustCFAOffset(-adjust);
-}
-
-void ArmAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) {
-  ArmManagedRegister src = msrc.AsArm();
-  if (src.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (src.IsCoreRegister()) {
-    CHECK_EQ(4u, size);
-    StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
-  } else if (src.IsRegisterPair()) {
-    CHECK_EQ(8u, size);
-    StoreToOffset(kStoreWord, src.AsRegisterPairLow(), SP, dest.Int32Value());
-    StoreToOffset(kStoreWord, src.AsRegisterPairHigh(),
-                  SP, dest.Int32Value() + 4);
-  } else if (src.IsSRegister()) {
-    StoreSToOffset(src.AsSRegister(), SP, dest.Int32Value());
-  } else {
-    CHECK(src.IsDRegister()) << src;
-    StoreDToOffset(src.AsDRegister(), SP, dest.Int32Value());
-  }
-}
-
-void ArmAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
-  ArmManagedRegister src = msrc.AsArm();
-  CHECK(src.IsCoreRegister()) << src;
-  StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
-}
-
-void ArmAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
-  ArmManagedRegister src = msrc.AsArm();
-  CHECK(src.IsCoreRegister()) << src;
-  StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
-}
-
-void ArmAssembler::StoreSpanning(FrameOffset dest, ManagedRegister msrc,
-                              FrameOffset in_off, ManagedRegister mscratch) {
-  ArmManagedRegister src = msrc.AsArm();
-  ArmManagedRegister scratch = mscratch.AsArm();
-  StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, in_off.Int32Value());
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + 4);
-}
-
-void ArmAssembler::CopyRef(FrameOffset dest, FrameOffset src,
-                        ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
-}
-
-void ArmAssembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
-                           bool unpoison_reference) {
-  ArmManagedRegister dst = mdest.AsArm();
-  CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst;
-  LoadFromOffset(kLoadWord, dst.AsCoreRegister(),
-                 base.AsArm().AsCoreRegister(), offs.Int32Value());
-  if (unpoison_reference) {
-    MaybeUnpoisonHeapReference(dst.AsCoreRegister());
-  }
-}
-
-void ArmAssembler::LoadRef(ManagedRegister mdest, FrameOffset  src) {
-  ArmManagedRegister dst = mdest.AsArm();
-  CHECK(dst.IsCoreRegister()) << dst;
-  LoadFromOffset(kLoadWord, dst.AsCoreRegister(), SP, src.Int32Value());
-}
-
-void ArmAssembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base,
-                           Offset offs) {
-  ArmManagedRegister dst = mdest.AsArm();
-  CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst;
-  LoadFromOffset(kLoadWord, dst.AsCoreRegister(),
-                 base.AsArm().AsCoreRegister(), offs.Int32Value());
-}
-
-void ArmAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm,
-                                      ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadImmediate(scratch.AsCoreRegister(), imm);
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
-}
-
-void ArmAssembler::StoreImmediateToThread32(ThreadOffset32 dest,
-                                            uint32_t imm,
-                                            ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadImmediate(scratch.AsCoreRegister(), imm);
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), TR, dest.Int32Value());
-}
-
-static void EmitLoad(ArmAssembler* assembler, ManagedRegister m_dst,
-                     Register src_register, int32_t src_offset, size_t size) {
-  ArmManagedRegister dst = m_dst.AsArm();
-  if (dst.IsNoRegister()) {
-    CHECK_EQ(0u, size) << dst;
-  } else if (dst.IsCoreRegister()) {
-    CHECK_EQ(4u, size) << dst;
-    assembler->LoadFromOffset(kLoadWord, dst.AsCoreRegister(), src_register, src_offset);
-  } else if (dst.IsRegisterPair()) {
-    CHECK_EQ(8u, size) << dst;
-    assembler->LoadFromOffset(kLoadWord, dst.AsRegisterPairLow(), src_register, src_offset);
-    assembler->LoadFromOffset(kLoadWord, dst.AsRegisterPairHigh(), src_register, src_offset + 4);
-  } else if (dst.IsSRegister()) {
-    assembler->LoadSFromOffset(dst.AsSRegister(), src_register, src_offset);
-  } else {
-    CHECK(dst.IsDRegister()) << dst;
-    assembler->LoadDFromOffset(dst.AsDRegister(), src_register, src_offset);
-  }
-}
-
-void ArmAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) {
-  return EmitLoad(this, m_dst, SP, src.Int32Value(), size);
-}
-
-void ArmAssembler::LoadFromThread32(ManagedRegister m_dst, ThreadOffset32 src, size_t size) {
-  return EmitLoad(this, m_dst, TR, src.Int32Value(), size);
-}
-
-void ArmAssembler::LoadRawPtrFromThread32(ManagedRegister m_dst, ThreadOffset32 offs) {
-  ArmManagedRegister dst = m_dst.AsArm();
-  CHECK(dst.IsCoreRegister()) << dst;
-  LoadFromOffset(kLoadWord, dst.AsCoreRegister(), TR, offs.Int32Value());
-}
-
-void ArmAssembler::CopyRawPtrFromThread32(FrameOffset fr_offs,
-                                          ThreadOffset32 thr_offs,
-                                          ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 TR, thr_offs.Int32Value());
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(),
-                SP, fr_offs.Int32Value());
-}
-
-void ArmAssembler::CopyRawPtrToThread32(ThreadOffset32 thr_offs,
-                                        FrameOffset fr_offs,
-                                        ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 SP, fr_offs.Int32Value());
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(),
-                TR, thr_offs.Int32Value());
-}
-
-void ArmAssembler::StoreStackOffsetToThread32(ThreadOffset32 thr_offs,
-                                              FrameOffset fr_offs,
-                                              ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  AddConstant(scratch.AsCoreRegister(), SP, fr_offs.Int32Value(), AL);
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(),
-                TR, thr_offs.Int32Value());
-}
-
-void ArmAssembler::StoreStackPointerToThread32(ThreadOffset32 thr_offs) {
-  StoreToOffset(kStoreWord, SP, TR, thr_offs.Int32Value());
-}
-
-void ArmAssembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no sign extension necessary for arm";
-}
-
-void ArmAssembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no zero extension necessary for arm";
-}
-
-void ArmAssembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t /*size*/) {
-  ArmManagedRegister dst = m_dst.AsArm();
-  ArmManagedRegister src = m_src.AsArm();
-  if (!dst.Equals(src)) {
-    if (dst.IsCoreRegister()) {
-      CHECK(src.IsCoreRegister()) << src;
-      mov(dst.AsCoreRegister(), ShifterOperand(src.AsCoreRegister()));
-    } else if (dst.IsDRegister()) {
-      CHECK(src.IsDRegister()) << src;
-      vmovd(dst.AsDRegister(), src.AsDRegister());
-    } else if (dst.IsSRegister()) {
-      CHECK(src.IsSRegister()) << src;
-      vmovs(dst.AsSRegister(), src.AsSRegister());
-    } else {
-      CHECK(dst.IsRegisterPair()) << dst;
-      CHECK(src.IsRegisterPair()) << src;
-      // Ensure that the first move doesn't clobber the input of the second.
-      if (src.AsRegisterPairHigh() != dst.AsRegisterPairLow()) {
-        mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow()));
-        mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh()));
-      } else {
-        mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh()));
-        mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow()));
-      }
-    }
-  }
-}
-
-void ArmAssembler::Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  CHECK(size == 4 || size == 8) << size;
-  if (size == 4) {
-    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
-    StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
-  } else if (size == 8) {
-    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
-    StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
-    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value() + 4);
-    StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + 4);
-  }
-}
-
-void ArmAssembler::Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset,
-                        ManagedRegister mscratch, size_t size) {
-  Register scratch = mscratch.AsArm().AsCoreRegister();
-  CHECK_EQ(size, 4u);
-  LoadFromOffset(kLoadWord, scratch, src_base.AsArm().AsCoreRegister(), src_offset.Int32Value());
-  StoreToOffset(kStoreWord, scratch, SP, dest.Int32Value());
-}
-
-void ArmAssembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
-                        ManagedRegister mscratch, size_t size) {
-  Register scratch = mscratch.AsArm().AsCoreRegister();
-  CHECK_EQ(size, 4u);
-  LoadFromOffset(kLoadWord, scratch, SP, src.Int32Value());
-  StoreToOffset(kStoreWord, scratch, dest_base.AsArm().AsCoreRegister(), dest_offset.Int32Value());
-}
-
-void ArmAssembler::Copy(FrameOffset /*dst*/, FrameOffset /*src_base*/, Offset /*src_offset*/,
-                        ManagedRegister /*mscratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void ArmAssembler::Copy(ManagedRegister dest, Offset dest_offset,
-                        ManagedRegister src, Offset src_offset,
-                        ManagedRegister mscratch, size_t size) {
-  CHECK_EQ(size, 4u);
-  Register scratch = mscratch.AsArm().AsCoreRegister();
-  LoadFromOffset(kLoadWord, scratch, src.AsArm().AsCoreRegister(), src_offset.Int32Value());
-  StoreToOffset(kStoreWord, scratch, dest.AsArm().AsCoreRegister(), dest_offset.Int32Value());
-}
-
-void ArmAssembler::Copy(FrameOffset /*dst*/, Offset /*dest_offset*/, FrameOffset /*src*/, Offset /*src_offset*/,
-                        ManagedRegister /*scratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void ArmAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
-                                   FrameOffset handle_scope_offset,
-                                   ManagedRegister min_reg, bool null_allowed) {
-  ArmManagedRegister out_reg = mout_reg.AsArm();
-  ArmManagedRegister in_reg = min_reg.AsArm();
-  CHECK(in_reg.IsNoRegister() || in_reg.IsCoreRegister()) << in_reg;
-  CHECK(out_reg.IsCoreRegister()) << out_reg;
-  if (null_allowed) {
-    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
-    // the address in the handle scope holding the reference.
-    // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
-    if (in_reg.IsNoRegister()) {
-      LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(),
-                     SP, handle_scope_offset.Int32Value());
-      in_reg = out_reg;
-    }
-    cmp(in_reg.AsCoreRegister(), ShifterOperand(0));
-    if (!out_reg.Equals(in_reg)) {
-      it(EQ, kItElse);
-      LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);
-    } else {
-      it(NE);
-    }
-    AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE);
-  } else {
-    AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL);
-  }
-}
-
-void ArmAssembler::CreateHandleScopeEntry(FrameOffset out_off,
-                                   FrameOffset handle_scope_offset,
-                                   ManagedRegister mscratch,
-                                   bool null_allowed) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  if (null_allowed) {
-    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP,
-                   handle_scope_offset.Int32Value());
-    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
-    // the address in the handle scope holding the reference.
-    // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
-    cmp(scratch.AsCoreRegister(), ShifterOperand(0));
-    it(NE);
-    AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE);
-  } else {
-    AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL);
-  }
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, out_off.Int32Value());
-}
-
-void ArmAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
-                                         ManagedRegister min_reg) {
-  ArmManagedRegister out_reg = mout_reg.AsArm();
-  ArmManagedRegister in_reg = min_reg.AsArm();
-  CHECK(out_reg.IsCoreRegister()) << out_reg;
-  CHECK(in_reg.IsCoreRegister()) << in_reg;
-  Label null_arg;
-  if (!out_reg.Equals(in_reg)) {
-    LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);     // TODO: why EQ?
-  }
-  cmp(in_reg.AsCoreRegister(), ShifterOperand(0));
-  it(NE);
-  LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(),
-                 in_reg.AsCoreRegister(), 0, NE);
-}
-
-void ArmAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references.
-}
-
-void ArmAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references.
-}
-
-void ArmAssembler::Call(ManagedRegister mbase, Offset offset,
-                        ManagedRegister mscratch) {
-  ArmManagedRegister base = mbase.AsArm();
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(base.IsCoreRegister()) << base;
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 base.AsCoreRegister(), offset.Int32Value());
-  blx(scratch.AsCoreRegister());
-  // TODO: place reference map on call.
-}
-
-void ArmAssembler::Call(FrameOffset base, Offset offset,
-                        ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  // Call *(*(SP + base) + offset)
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 SP, base.Int32Value());
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 scratch.AsCoreRegister(), offset.Int32Value());
-  blx(scratch.AsCoreRegister());
-  // TODO: place reference map on call
-}
-
-void ArmAssembler::CallFromThread32(ThreadOffset32 offset ATTRIBUTE_UNUSED,
-                                    ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void ArmAssembler::GetCurrentThread(ManagedRegister tr) {
-  mov(tr.AsArm().AsCoreRegister(), ShifterOperand(TR));
-}
-
-void ArmAssembler::GetCurrentThread(FrameOffset offset,
-                                    ManagedRegister /*scratch*/) {
-  StoreToOffset(kStoreWord, TR, SP, offset.Int32Value(), AL);
-}
-
-void ArmAssembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  ArmExceptionSlowPath* slow = new (GetArena()) ArmExceptionSlowPath(scratch, stack_adjust);
-  buffer_.EnqueueSlowPath(slow);
-  LoadFromOffset(kLoadWord,
-                 scratch.AsCoreRegister(),
-                 TR,
-                 Thread::ExceptionOffset<kArmPointerSize>().Int32Value());
-  cmp(scratch.AsCoreRegister(), ShifterOperand(0));
-  b(slow->Entry(), NE);
-}
-
-void ArmExceptionSlowPath::Emit(Assembler* sasm) {
-  ArmAssembler* sp_asm = down_cast<ArmAssembler*>(sasm);
-#define __ sp_asm->
-  __ Bind(&entry_);
-  if (stack_adjust_ != 0) {  // Fix up the frame.
-    __ DecreaseFrameSize(stack_adjust_);
-  }
-  // Pass exception object as argument.
-  // Don't care about preserving R0 as this call won't return.
-  __ mov(R0, ShifterOperand(scratch_.AsCoreRegister()));
-  // Set up call to Thread::Current()->pDeliverException.
-  __ LoadFromOffset(kLoadWord,
-                    R12,
-                    TR,
-                    QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pDeliverException).Int32Value());
-  __ blx(R12);
-#undef __
-}
-
-
 static int LeadingZeros(uint32_t val) {
   uint32_t alt;
   int32_t n;
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 2b7414d..ff0bbaf 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -23,12 +23,14 @@
 #include "base/arena_allocator.h"
 #include "base/arena_containers.h"
 #include "base/bit_utils.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/stl_util.h"
 #include "base/value_object.h"
 #include "constants_arm.h"
 #include "utils/arm/managed_register_arm.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 #include "offsets.h"
 
 namespace art {
@@ -880,122 +882,6 @@
   virtual void CompareAndBranchIfZero(Register r, Label* label) = 0;
   virtual void CompareAndBranchIfNonZero(Register r, Label* label) = 0;
 
-  //
-  // Overridden common assembler high-level functionality
-  //
-
-  // Emit code that will create an activation on the stack
-  void BuildFrame(size_t frame_size,
-                  ManagedRegister method_reg,
-                  ArrayRef<const ManagedRegister> callee_save_regs,
-                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
-
-  // Emit code that will remove an activation from the stack
-  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
-    OVERRIDE;
-
-  void IncreaseFrameSize(size_t adjust) OVERRIDE;
-  void DecreaseFrameSize(size_t adjust) OVERRIDE;
-
-  // Store routines
-  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
-  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
-  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
-
-  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
-
-  void StoreImmediateToThread32(ThreadOffset32 dest, uint32_t imm, ManagedRegister scratch)
-      OVERRIDE;
-
-  void StoreStackOffsetToThread32(ThreadOffset32 thr_offs, FrameOffset fr_offs,
-                                  ManagedRegister scratch) OVERRIDE;
-
-  void StoreStackPointerToThread32(ThreadOffset32 thr_offs) OVERRIDE;
-
-  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
-                     ManagedRegister scratch) OVERRIDE;
-
-  // Load routines
-  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
-
-  void LoadFromThread32(ManagedRegister dest, ThreadOffset32 src, size_t size) OVERRIDE;
-
-  void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
-
-  void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
-               bool unpoison_reference) OVERRIDE;
-
-  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
-
-  void LoadRawPtrFromThread32(ManagedRegister dest, ThreadOffset32 offs) OVERRIDE;
-
-  // Copying routines
-  void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
-
-  void CopyRawPtrFromThread32(FrameOffset fr_offs, ThreadOffset32 thr_offs,
-                              ManagedRegister scratch) OVERRIDE;
-
-  void CopyRawPtrToThread32(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
-      OVERRIDE;
-
-  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
-
-  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-
-  // Sign extension
-  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Zero extension
-  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Exploit fast access in managed code to Thread::Current()
-  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
-  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
-
-  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the handle scope entry to see if the value is
-  // null.
-  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
-                              ManagedRegister in_reg, bool null_allowed) OVERRIDE;
-
-  // Set up out_off to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed.
-  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
-                              ManagedRegister scratch, bool null_allowed) OVERRIDE;
-
-  // src holds a handle scope entry (Object**) load this into dst
-  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
-
-  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
-  // know that src may not be null.
-  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
-  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
-
-  // Call to address held at [base+offset]
-  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void CallFromThread32(ThreadOffset32 offset, ManagedRegister scratch) OVERRIDE;
-
-  // Generate code to check if Thread::Current()->exception_ is non-null
-  // and branch to a ExceptionSlowPath if it is.
-  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
-
   static uint32_t ModifiedImmediate(uint32_t value);
 
   static bool IsLowRegister(Register r) {
@@ -1073,18 +959,6 @@
   ArenaVector<Label*> tracked_labels_;
 };
 
-// Slowpath entered when Thread::Current()->_exception is non-null
-class ArmExceptionSlowPath FINAL : public SlowPath {
- public:
-  ArmExceptionSlowPath(ArmManagedRegister scratch, size_t stack_adjust)
-      : scratch_(scratch), stack_adjust_(stack_adjust) {
-  }
-  void Emit(Assembler *sp_asm) OVERRIDE;
- private:
-  const ArmManagedRegister scratch_;
-  const size_t stack_adjust_;
-};
-
 }  // namespace arm
 }  // namespace art
 
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index c95dfa8..6f9d5f3 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -1664,12 +1664,6 @@
 }
 
 
-void Arm32Assembler::MemoryBarrier(ManagedRegister mscratch) {
-  CHECK_EQ(mscratch.AsArm().AsCoreRegister(), R12);
-  dmb(SY);
-}
-
-
 void Arm32Assembler::dmb(DmbOptions flavor) {
   int32_t encoding = 0xf57ff05f;  // dmb
   Emit(encoding | flavor);
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index 554dd23..044eaa1 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -316,8 +316,6 @@
   void Emit(int32_t value);
   void Bind(Label* label) OVERRIDE;
 
-  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
-
   JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE;
   void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) OVERRIDE;
 
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 353c729..ee69698 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -2325,7 +2325,7 @@
   }
 
   Register rn = ad.GetRegister();
-  if (IsHighRegister(rn) && rn != SP && rn != PC) {
+  if (IsHighRegister(rn) && (byte || half || (rn != SP && rn != PC))) {
     must_be_32bit = true;
   }
 
@@ -2337,24 +2337,24 @@
     // Immediate offset
     int32_t offset = ad.GetOffset();
 
-    // The 16 bit SP relative instruction can only have a 10 bit offset.
-    if (rn == SP && offset >= (1 << 10)) {
-      must_be_32bit = true;
-    }
-
     if (byte) {
       // 5 bit offset, no shift.
-      if (offset >= (1 << 5)) {
+      if ((offset & ~0x1f) != 0) {
         must_be_32bit = true;
       }
     } else if (half) {
-      // 6 bit offset, shifted by 1.
-      if (offset >= (1 << 6)) {
+      // 5 bit offset, shifted by 1.
+      if ((offset & ~(0x1f << 1)) != 0) {
+        must_be_32bit = true;
+      }
+    } else if (rn == SP || rn == PC) {
+      // The 16 bit SP/PC relative instruction can only have an (imm8 << 2) offset.
+      if ((offset & ~(0xff << 2)) != 0) {
         must_be_32bit = true;
       }
     } else {
-      // 7 bit offset, shifted by 2.
-      if (offset >= (1 << 7)) {
+      // 5 bit offset, shifted by 2.
+      if ((offset & ~(0x1f << 2)) != 0) {
         must_be_32bit = true;
       }
     }
@@ -2370,7 +2370,7 @@
     } else {
       // 16 bit thumb1.
       uint8_t opA = 0;
-      bool sp_relative = false;
+      bool sp_or_pc_relative = false;
 
       if (byte) {
         opA = 7U /* 0b0111 */;
@@ -2379,7 +2379,10 @@
       } else {
         if (rn == SP) {
           opA = 9U /* 0b1001 */;
-          sp_relative = true;
+          sp_or_pc_relative = true;
+        } else if (rn == PC) {
+          opA = 4U;
+          sp_or_pc_relative = true;
         } else {
           opA = 6U /* 0b0110 */;
         }
@@ -2388,7 +2391,7 @@
           (load ? B11 : 0);
 
       CHECK_GE(offset, 0);
-      if (sp_relative) {
+      if (sp_or_pc_relative) {
         // SP relative, 10 bit offset.
         CHECK_LT(offset, (1 << 10));
         CHECK_ALIGNED(offset, 4);
@@ -3860,12 +3863,6 @@
 }
 
 
-void Thumb2Assembler::MemoryBarrier(ManagedRegister mscratch) {
-  CHECK_EQ(mscratch.AsArm().AsCoreRegister(), R12);
-  dmb(SY);
-}
-
-
 void Thumb2Assembler::dmb(DmbOptions flavor) {
   int32_t encoding = 0xf3bf8f50;  // dmb in T1 encoding.
   Emit32(encoding | flavor);
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 4ee23c0..1c1c98b 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -368,8 +368,6 @@
   void Emit16(int16_t value);     // Emit a 16 bit instruction in little endian format.
   void Bind(Label* label) OVERRIDE;
 
-  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
-
   // Force the assembler to generate 32 bit instructions.
   void Force32Bit() {
     force_32bit_ = true;
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index abb09f7..3ca3714 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -279,6 +279,148 @@
   DriverStr(expected, "smull");
 }
 
+TEST_F(AssemblerThumb2Test, LoadByteFromThumbOffset) {
+  arm::LoadOperandType type = arm::kLoadUnsignedByte;
+
+  __ LoadFromOffset(type, arm::R0, arm::R7, 0);
+  __ LoadFromOffset(type, arm::R1, arm::R7, 31);
+  __ LoadFromOffset(type, arm::R2, arm::R7, 32);
+  __ LoadFromOffset(type, arm::R3, arm::R7, 4095);
+  __ LoadFromOffset(type, arm::R4, arm::SP, 0);
+
+  const char* expected =
+      "ldrb r0, [r7, #0]\n"
+      "ldrb r1, [r7, #31]\n"
+      "ldrb.w r2, [r7, #32]\n"
+      "ldrb.w r3, [r7, #4095]\n"
+      "ldrb.w r4, [sp, #0]\n";
+  DriverStr(expected, "LoadByteFromThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreByteToThumbOffset) {
+  arm::StoreOperandType type = arm::kStoreByte;
+
+  __ StoreToOffset(type, arm::R0, arm::R7, 0);
+  __ StoreToOffset(type, arm::R1, arm::R7, 31);
+  __ StoreToOffset(type, arm::R2, arm::R7, 32);
+  __ StoreToOffset(type, arm::R3, arm::R7, 4095);
+  __ StoreToOffset(type, arm::R4, arm::SP, 0);
+
+  const char* expected =
+      "strb r0, [r7, #0]\n"
+      "strb r1, [r7, #31]\n"
+      "strb.w r2, [r7, #32]\n"
+      "strb.w r3, [r7, #4095]\n"
+      "strb.w r4, [sp, #0]\n";
+  DriverStr(expected, "StoreByteToThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, LoadHalfFromThumbOffset) {
+  arm::LoadOperandType type = arm::kLoadUnsignedHalfword;
+
+  __ LoadFromOffset(type, arm::R0, arm::R7, 0);
+  __ LoadFromOffset(type, arm::R1, arm::R7, 62);
+  __ LoadFromOffset(type, arm::R2, arm::R7, 64);
+  __ LoadFromOffset(type, arm::R3, arm::R7, 4094);
+  __ LoadFromOffset(type, arm::R4, arm::SP, 0);
+  __ LoadFromOffset(type, arm::R5, arm::R7, 1);  // Unaligned
+
+  const char* expected =
+      "ldrh r0, [r7, #0]\n"
+      "ldrh r1, [r7, #62]\n"
+      "ldrh.w r2, [r7, #64]\n"
+      "ldrh.w r3, [r7, #4094]\n"
+      "ldrh.w r4, [sp, #0]\n"
+      "ldrh.w r5, [r7, #1]\n";
+  DriverStr(expected, "LoadHalfFromThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreHalfToThumbOffset) {
+  arm::StoreOperandType type = arm::kStoreHalfword;
+
+  __ StoreToOffset(type, arm::R0, arm::R7, 0);
+  __ StoreToOffset(type, arm::R1, arm::R7, 62);
+  __ StoreToOffset(type, arm::R2, arm::R7, 64);
+  __ StoreToOffset(type, arm::R3, arm::R7, 4094);
+  __ StoreToOffset(type, arm::R4, arm::SP, 0);
+  __ StoreToOffset(type, arm::R5, arm::R7, 1);  // Unaligned
+
+  const char* expected =
+      "strh r0, [r7, #0]\n"
+      "strh r1, [r7, #62]\n"
+      "strh.w r2, [r7, #64]\n"
+      "strh.w r3, [r7, #4094]\n"
+      "strh.w r4, [sp, #0]\n"
+      "strh.w r5, [r7, #1]\n";
+  DriverStr(expected, "StoreHalfToThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, LoadWordFromSpPlusOffset) {
+  arm::LoadOperandType type = arm::kLoadWord;
+
+  __ LoadFromOffset(type, arm::R0, arm::SP, 0);
+  __ LoadFromOffset(type, arm::R1, arm::SP, 124);
+  __ LoadFromOffset(type, arm::R2, arm::SP, 128);
+  __ LoadFromOffset(type, arm::R3, arm::SP, 1020);
+  __ LoadFromOffset(type, arm::R4, arm::SP, 1024);
+  __ LoadFromOffset(type, arm::R5, arm::SP, 4092);
+  __ LoadFromOffset(type, arm::R6, arm::SP, 1);  // Unaligned
+
+  const char* expected =
+      "ldr r0, [sp, #0]\n"
+      "ldr r1, [sp, #124]\n"
+      "ldr r2, [sp, #128]\n"
+      "ldr r3, [sp, #1020]\n"
+      "ldr.w r4, [sp, #1024]\n"
+      "ldr.w r5, [sp, #4092]\n"
+      "ldr.w r6, [sp, #1]\n";
+  DriverStr(expected, "LoadWordFromSpPlusOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreWordToSpPlusOffset) {
+  arm::StoreOperandType type = arm::kStoreWord;
+
+  __ StoreToOffset(type, arm::R0, arm::SP, 0);
+  __ StoreToOffset(type, arm::R1, arm::SP, 124);
+  __ StoreToOffset(type, arm::R2, arm::SP, 128);
+  __ StoreToOffset(type, arm::R3, arm::SP, 1020);
+  __ StoreToOffset(type, arm::R4, arm::SP, 1024);
+  __ StoreToOffset(type, arm::R5, arm::SP, 4092);
+  __ StoreToOffset(type, arm::R6, arm::SP, 1);  // Unaligned
+
+  const char* expected =
+      "str r0, [sp, #0]\n"
+      "str r1, [sp, #124]\n"
+      "str r2, [sp, #128]\n"
+      "str r3, [sp, #1020]\n"
+      "str.w r4, [sp, #1024]\n"
+      "str.w r5, [sp, #4092]\n"
+      "str.w r6, [sp, #1]\n";
+  DriverStr(expected, "StoreWordToSpPlusOffset");
+}
+
+TEST_F(AssemblerThumb2Test, LoadWordFromPcPlusOffset) {
+  arm::LoadOperandType type = arm::kLoadWord;
+
+  __ LoadFromOffset(type, arm::R0, arm::PC, 0);
+  __ LoadFromOffset(type, arm::R1, arm::PC, 124);
+  __ LoadFromOffset(type, arm::R2, arm::PC, 128);
+  __ LoadFromOffset(type, arm::R3, arm::PC, 1020);
+  __ LoadFromOffset(type, arm::R4, arm::PC, 1024);
+  __ LoadFromOffset(type, arm::R5, arm::PC, 4092);
+  __ LoadFromOffset(type, arm::R6, arm::PC, 1);  // Unaligned
+
+  const char* expected =
+      "ldr r0, [pc, #0]\n"
+      "ldr r1, [pc, #124]\n"
+      "ldr r2, [pc, #128]\n"
+      "ldr r3, [pc, #1020]\n"
+      "ldr.w r4, [pc, #1024]\n"
+      "ldr.w r5, [pc, #4092]\n"
+      "ldr.w r6, [pc, #1]\n";
+  DriverStr(expected, "LoadWordFromPcPlusOffset");
+}
+
 TEST_F(AssemblerThumb2Test, StoreWordToThumbOffset) {
   arm::StoreOperandType type = arm::kStoreWord;
   int32_t offset = 4092;
diff --git a/compiler/utils/arm/jni_macro_assembler_arm.cc b/compiler/utils/arm/jni_macro_assembler_arm.cc
new file mode 100644
index 0000000..c039816
--- /dev/null
+++ b/compiler/utils/arm/jni_macro_assembler_arm.cc
@@ -0,0 +1,612 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni_macro_assembler_arm.h"
+
+#include <algorithm>
+
+#include "assembler_arm32.h"
+#include "assembler_thumb2.h"
+#include "base/arena_allocator.h"
+#include "base/bit_utils.h"
+#include "base/logging.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "offsets.h"
+#include "thread.h"
+
+namespace art {
+namespace arm {
+
+constexpr size_t kFramePointerSize = static_cast<size_t>(kArmPointerSize);
+
+// Slowpath entered when Thread::Current()->_exception is non-null
+class ArmExceptionSlowPath FINAL : public SlowPath {
+ public:
+  ArmExceptionSlowPath(ArmManagedRegister scratch, size_t stack_adjust)
+      : scratch_(scratch), stack_adjust_(stack_adjust) {
+  }
+  void Emit(Assembler *sp_asm) OVERRIDE;
+ private:
+  const ArmManagedRegister scratch_;
+  const size_t stack_adjust_;
+};
+
+ArmJNIMacroAssembler::ArmJNIMacroAssembler(ArenaAllocator* arena, InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+      asm_.reset(new (arena) Arm32Assembler(arena));
+      break;
+
+    case kThumb2:
+      asm_.reset(new (arena) Thumb2Assembler(arena));
+      break;
+
+    default:
+      LOG(FATAL) << isa;
+      UNREACHABLE();
+  }
+}
+
+ArmJNIMacroAssembler::~ArmJNIMacroAssembler() {
+}
+
+size_t ArmJNIMacroAssembler::CodeSize() const {
+  return asm_->CodeSize();
+}
+
+DebugFrameOpCodeWriterForAssembler& ArmJNIMacroAssembler::cfi() {
+  return asm_->cfi();
+}
+
+void ArmJNIMacroAssembler::FinalizeCode() {
+  asm_->FinalizeCode();
+}
+
+void ArmJNIMacroAssembler::FinalizeInstructions(const MemoryRegion& region) {
+  asm_->FinalizeInstructions(region);
+}
+
+static dwarf::Reg DWARFReg(Register reg) {
+  return dwarf::Reg::ArmCore(static_cast<int>(reg));
+}
+
+static dwarf::Reg DWARFReg(SRegister reg) {
+  return dwarf::Reg::ArmFp(static_cast<int>(reg));
+}
+
+#define __ asm_->
+
+void ArmJNIMacroAssembler::BuildFrame(size_t frame_size,
+                                      ManagedRegister method_reg,
+                                      ArrayRef<const ManagedRegister> callee_save_regs,
+                                      const ManagedRegisterEntrySpills& entry_spills) {
+  CHECK_EQ(CodeSize(), 0U);  // Nothing emitted yet
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+  CHECK_EQ(R0, method_reg.AsArm().AsCoreRegister());
+
+  // Push callee saves and link register.
+  RegList core_spill_mask = 1 << LR;
+  uint32_t fp_spill_mask = 0;
+  for (const ManagedRegister& reg : callee_save_regs) {
+    if (reg.AsArm().IsCoreRegister()) {
+      core_spill_mask |= 1 << reg.AsArm().AsCoreRegister();
+    } else {
+      fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
+    }
+  }
+  __ PushList(core_spill_mask);
+  cfi().AdjustCFAOffset(POPCOUNT(core_spill_mask) * kFramePointerSize);
+  cfi().RelOffsetForMany(DWARFReg(Register(0)), 0, core_spill_mask, kFramePointerSize);
+  if (fp_spill_mask != 0) {
+    __ vpushs(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask));
+    cfi().AdjustCFAOffset(POPCOUNT(fp_spill_mask) * kFramePointerSize);
+    cfi().RelOffsetForMany(DWARFReg(SRegister(0)), 0, fp_spill_mask, kFramePointerSize);
+  }
+
+  // Increase frame to required size.
+  int pushed_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
+  CHECK_GT(frame_size, pushed_values * kFramePointerSize);  // Must at least have space for Method*.
+  IncreaseFrameSize(frame_size - pushed_values * kFramePointerSize);  // handles CFI as well.
+
+  // Write out Method*.
+  __ StoreToOffset(kStoreWord, R0, SP, 0);
+
+  // Write out entry spills.
+  int32_t offset = frame_size + kFramePointerSize;
+  for (size_t i = 0; i < entry_spills.size(); ++i) {
+    ArmManagedRegister reg = entry_spills.at(i).AsArm();
+    if (reg.IsNoRegister()) {
+      // only increment stack offset.
+      ManagedRegisterSpill spill = entry_spills.at(i);
+      offset += spill.getSize();
+    } else if (reg.IsCoreRegister()) {
+      __ StoreToOffset(kStoreWord, reg.AsCoreRegister(), SP, offset);
+      offset += 4;
+    } else if (reg.IsSRegister()) {
+      __ StoreSToOffset(reg.AsSRegister(), SP, offset);
+      offset += 4;
+    } else if (reg.IsDRegister()) {
+      __ StoreDToOffset(reg.AsDRegister(), SP, offset);
+      offset += 8;
+    }
+  }
+}
+
+void ArmJNIMacroAssembler::RemoveFrame(size_t frame_size,
+                                       ArrayRef<const ManagedRegister> callee_save_regs) {
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+  cfi().RememberState();
+
+  // Compute callee saves to pop and PC.
+  RegList core_spill_mask = 1 << PC;
+  uint32_t fp_spill_mask = 0;
+  for (const ManagedRegister& reg : callee_save_regs) {
+    if (reg.AsArm().IsCoreRegister()) {
+      core_spill_mask |= 1 << reg.AsArm().AsCoreRegister();
+    } else {
+      fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
+    }
+  }
+
+  // Decrease frame to start of callee saves.
+  int pop_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
+  CHECK_GT(frame_size, pop_values * kFramePointerSize);
+  DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize));  // handles CFI as well.
+
+  if (fp_spill_mask != 0) {
+    __ vpops(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask));
+    cfi().AdjustCFAOffset(-kFramePointerSize * POPCOUNT(fp_spill_mask));
+    cfi().RestoreMany(DWARFReg(SRegister(0)), fp_spill_mask);
+  }
+
+  // Pop callee saves and PC.
+  __ PopList(core_spill_mask);
+
+  // The CFI should be restored for any code that follows the exit block.
+  cfi().RestoreState();
+  cfi().DefCFAOffset(frame_size);
+}
+
+void ArmJNIMacroAssembler::IncreaseFrameSize(size_t adjust) {
+  __ AddConstant(SP, -adjust);
+  cfi().AdjustCFAOffset(adjust);
+}
+
+static void DecreaseFrameSizeImpl(ArmAssembler* assembler, size_t adjust) {
+  assembler->AddConstant(SP, adjust);
+  assembler->cfi().AdjustCFAOffset(-adjust);
+}
+
+void ArmJNIMacroAssembler::DecreaseFrameSize(size_t adjust) {
+  DecreaseFrameSizeImpl(asm_.get(), adjust);
+}
+
+void ArmJNIMacroAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) {
+  ArmManagedRegister src = msrc.AsArm();
+  if (src.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (src.IsCoreRegister()) {
+    CHECK_EQ(4u, size);
+    __ StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
+  } else if (src.IsRegisterPair()) {
+    CHECK_EQ(8u, size);
+    __ StoreToOffset(kStoreWord, src.AsRegisterPairLow(), SP, dest.Int32Value());
+    __ StoreToOffset(kStoreWord, src.AsRegisterPairHigh(), SP, dest.Int32Value() + 4);
+  } else if (src.IsSRegister()) {
+    __ StoreSToOffset(src.AsSRegister(), SP, dest.Int32Value());
+  } else {
+    CHECK(src.IsDRegister()) << src;
+    __ StoreDToOffset(src.AsDRegister(), SP, dest.Int32Value());
+  }
+}
+
+void ArmJNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
+  ArmManagedRegister src = msrc.AsArm();
+  CHECK(src.IsCoreRegister()) << src;
+  __ StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
+}
+
+void ArmJNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
+  ArmManagedRegister src = msrc.AsArm();
+  CHECK(src.IsCoreRegister()) << src;
+  __ StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
+}
+
+void ArmJNIMacroAssembler::StoreSpanning(FrameOffset dest,
+                                         ManagedRegister msrc,
+                                         FrameOffset in_off,
+                                         ManagedRegister mscratch) {
+  ArmManagedRegister src = msrc.AsArm();
+  ArmManagedRegister scratch = mscratch.AsArm();
+  __ StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
+  __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, in_off.Int32Value());
+  __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + sizeof(uint32_t));
+}
+
+void ArmJNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
+  __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
+}
+
+void ArmJNIMacroAssembler::LoadRef(ManagedRegister mdest,
+                                   ManagedRegister base,
+                                   MemberOffset offs,
+                                   bool unpoison_reference) {
+  ArmManagedRegister dst = mdest.AsArm();
+  CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst;
+  __ LoadFromOffset(kLoadWord,
+                    dst.AsCoreRegister(),
+                    base.AsArm().AsCoreRegister(),
+                    offs.Int32Value());
+  if (unpoison_reference) {
+    __ MaybeUnpoisonHeapReference(dst.AsCoreRegister());
+  }
+}
+
+void ArmJNIMacroAssembler::LoadRef(ManagedRegister mdest, FrameOffset  src) {
+  ArmManagedRegister dst = mdest.AsArm();
+  CHECK(dst.IsCoreRegister()) << dst;
+  __ LoadFromOffset(kLoadWord, dst.AsCoreRegister(), SP, src.Int32Value());
+}
+
+void ArmJNIMacroAssembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base,
+                           Offset offs) {
+  ArmManagedRegister dst = mdest.AsArm();
+  CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst;
+  __ LoadFromOffset(kLoadWord,
+                    dst.AsCoreRegister(),
+                    base.AsArm().AsCoreRegister(),
+                    offs.Int32Value());
+}
+
+void ArmJNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest,
+                                                 uint32_t imm,
+                                                 ManagedRegister mscratch) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  __ LoadImmediate(scratch.AsCoreRegister(), imm);
+  __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
+}
+
+static void EmitLoad(ArmAssembler* assembler,
+                     ManagedRegister m_dst,
+                     Register src_register,
+                     int32_t src_offset,
+                     size_t size) {
+  ArmManagedRegister dst = m_dst.AsArm();
+  if (dst.IsNoRegister()) {
+    CHECK_EQ(0u, size) << dst;
+  } else if (dst.IsCoreRegister()) {
+    CHECK_EQ(4u, size) << dst;
+    assembler->LoadFromOffset(kLoadWord, dst.AsCoreRegister(), src_register, src_offset);
+  } else if (dst.IsRegisterPair()) {
+    CHECK_EQ(8u, size) << dst;
+    assembler->LoadFromOffset(kLoadWord, dst.AsRegisterPairLow(), src_register, src_offset);
+    assembler->LoadFromOffset(kLoadWord, dst.AsRegisterPairHigh(), src_register, src_offset + 4);
+  } else if (dst.IsSRegister()) {
+    assembler->LoadSFromOffset(dst.AsSRegister(), src_register, src_offset);
+  } else {
+    CHECK(dst.IsDRegister()) << dst;
+    assembler->LoadDFromOffset(dst.AsDRegister(), src_register, src_offset);
+  }
+}
+
+void ArmJNIMacroAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) {
+  EmitLoad(asm_.get(), m_dst, SP, src.Int32Value(), size);
+}
+
+void ArmJNIMacroAssembler::LoadFromThread(ManagedRegister m_dst, ThreadOffset32 src, size_t size) {
+  EmitLoad(asm_.get(), m_dst, TR, src.Int32Value(), size);
+}
+
+void ArmJNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister m_dst, ThreadOffset32 offs) {
+  ArmManagedRegister dst = m_dst.AsArm();
+  CHECK(dst.IsCoreRegister()) << dst;
+  __ LoadFromOffset(kLoadWord, dst.AsCoreRegister(), TR, offs.Int32Value());
+}
+
+void ArmJNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                                ThreadOffset32 thr_offs,
+                                                ManagedRegister mscratch) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), TR, thr_offs.Int32Value());
+  __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
+}
+
+void ArmJNIMacroAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs,
+                                              FrameOffset fr_offs,
+                                              ManagedRegister mscratch) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
+  __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), TR, thr_offs.Int32Value());
+}
+
+void ArmJNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                                    FrameOffset fr_offs,
+                                                    ManagedRegister mscratch) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  __ AddConstant(scratch.AsCoreRegister(), SP, fr_offs.Int32Value(), AL);
+  __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), TR, thr_offs.Int32Value());
+}
+
+void ArmJNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) {
+  __ StoreToOffset(kStoreWord, SP, TR, thr_offs.Int32Value());
+}
+
+void ArmJNIMacroAssembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
+  UNIMPLEMENTED(FATAL) << "no sign extension necessary for arm";
+}
+
+void ArmJNIMacroAssembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
+  UNIMPLEMENTED(FATAL) << "no zero extension necessary for arm";
+}
+
+void ArmJNIMacroAssembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t /*size*/) {
+  ArmManagedRegister dst = m_dst.AsArm();
+  ArmManagedRegister src = m_src.AsArm();
+  if (!dst.Equals(src)) {
+    if (dst.IsCoreRegister()) {
+      CHECK(src.IsCoreRegister()) << src;
+      __ mov(dst.AsCoreRegister(), ShifterOperand(src.AsCoreRegister()));
+    } else if (dst.IsDRegister()) {
+      CHECK(src.IsDRegister()) << src;
+      __ vmovd(dst.AsDRegister(), src.AsDRegister());
+    } else if (dst.IsSRegister()) {
+      CHECK(src.IsSRegister()) << src;
+      __ vmovs(dst.AsSRegister(), src.AsSRegister());
+    } else {
+      CHECK(dst.IsRegisterPair()) << dst;
+      CHECK(src.IsRegisterPair()) << src;
+      // Ensure that the first move doesn't clobber the input of the second.
+      if (src.AsRegisterPairHigh() != dst.AsRegisterPairLow()) {
+        __ mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow()));
+        __ mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh()));
+      } else {
+        __ mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh()));
+        __ mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow()));
+      }
+    }
+  }
+}
+
+void ArmJNIMacroAssembler::Copy(FrameOffset dest,
+                                FrameOffset src,
+                                ManagedRegister mscratch,
+                                size_t size) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  CHECK(size == 4 || size == 8) << size;
+  if (size == 4) {
+    __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
+    __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
+  } else if (size == 8) {
+    __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
+    __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
+    __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value() + 4);
+    __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + 4);
+  }
+}
+
+void ArmJNIMacroAssembler::Copy(FrameOffset dest,
+                                ManagedRegister src_base,
+                                Offset src_offset,
+                                ManagedRegister mscratch,
+                                size_t size) {
+  Register scratch = mscratch.AsArm().AsCoreRegister();
+  CHECK_EQ(size, 4u);
+  __ LoadFromOffset(kLoadWord, scratch, src_base.AsArm().AsCoreRegister(), src_offset.Int32Value());
+  __ StoreToOffset(kStoreWord, scratch, SP, dest.Int32Value());
+}
+
+void ArmJNIMacroAssembler::Copy(ManagedRegister dest_base,
+                                Offset dest_offset,
+                                FrameOffset src,
+                                ManagedRegister mscratch,
+                                size_t size) {
+  Register scratch = mscratch.AsArm().AsCoreRegister();
+  CHECK_EQ(size, 4u);
+  __ LoadFromOffset(kLoadWord, scratch, SP, src.Int32Value());
+  __ StoreToOffset(kStoreWord,
+                   scratch,
+                   dest_base.AsArm().AsCoreRegister(),
+                   dest_offset.Int32Value());
+}
+
+void ArmJNIMacroAssembler::Copy(FrameOffset /*dst*/,
+                                FrameOffset /*src_base*/,
+                                Offset /*src_offset*/,
+                                ManagedRegister /*mscratch*/,
+                                size_t /*size*/) {
+  UNIMPLEMENTED(FATAL);
+}
+
+void ArmJNIMacroAssembler::Copy(ManagedRegister dest,
+                                Offset dest_offset,
+                                ManagedRegister src,
+                                Offset src_offset,
+                                ManagedRegister mscratch,
+                                size_t size) {
+  CHECK_EQ(size, 4u);
+  Register scratch = mscratch.AsArm().AsCoreRegister();
+  __ LoadFromOffset(kLoadWord, scratch, src.AsArm().AsCoreRegister(), src_offset.Int32Value());
+  __ StoreToOffset(kStoreWord, scratch, dest.AsArm().AsCoreRegister(), dest_offset.Int32Value());
+}
+
+void ArmJNIMacroAssembler::Copy(FrameOffset /*dst*/,
+                                Offset /*dest_offset*/,
+                                FrameOffset /*src*/,
+                                Offset /*src_offset*/,
+                                ManagedRegister /*scratch*/,
+                                size_t /*size*/) {
+  UNIMPLEMENTED(FATAL);
+}
+
+void ArmJNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
+                                                  FrameOffset handle_scope_offset,
+                                                  ManagedRegister min_reg,
+                                                  bool null_allowed) {
+  ArmManagedRegister out_reg = mout_reg.AsArm();
+  ArmManagedRegister in_reg = min_reg.AsArm();
+  CHECK(in_reg.IsNoRegister() || in_reg.IsCoreRegister()) << in_reg;
+  CHECK(out_reg.IsCoreRegister()) << out_reg;
+  if (null_allowed) {
+    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
+    // the address in the handle scope holding the reference.
+    // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
+    if (in_reg.IsNoRegister()) {
+      __ LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
+      in_reg = out_reg;
+    }
+    __ cmp(in_reg.AsCoreRegister(), ShifterOperand(0));
+    if (!out_reg.Equals(in_reg)) {
+      __ it(EQ, kItElse);
+      __ LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);
+    } else {
+      __ it(NE);
+    }
+    __ AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE);
+  } else {
+    __ AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL);
+  }
+}
+
+void ArmJNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off,
+                                                  FrameOffset handle_scope_offset,
+                                                  ManagedRegister mscratch,
+                                                  bool null_allowed) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  if (null_allowed) {
+    __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
+    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
+    // the address in the handle scope holding the reference.
+    // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
+    __ cmp(scratch.AsCoreRegister(), ShifterOperand(0));
+    __ it(NE);
+    __ AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE);
+  } else {
+    __ AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL);
+  }
+  __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, out_off.Int32Value());
+}
+
+void ArmJNIMacroAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
+                                                        ManagedRegister min_reg) {
+  ArmManagedRegister out_reg = mout_reg.AsArm();
+  ArmManagedRegister in_reg = min_reg.AsArm();
+  CHECK(out_reg.IsCoreRegister()) << out_reg;
+  CHECK(in_reg.IsCoreRegister()) << in_reg;
+  Label null_arg;
+  if (!out_reg.Equals(in_reg)) {
+    __ LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);     // TODO: why EQ?
+  }
+  __ cmp(in_reg.AsCoreRegister(), ShifterOperand(0));
+  __ it(NE);
+  __ LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(), in_reg.AsCoreRegister(), 0, NE);
+}
+
+void ArmJNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references.
+}
+
+void ArmJNIMacroAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references.
+}
+
+void ArmJNIMacroAssembler::Call(ManagedRegister mbase, Offset offset,
+                        ManagedRegister mscratch) {
+  ArmManagedRegister base = mbase.AsArm();
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(base.IsCoreRegister()) << base;
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  __ LoadFromOffset(kLoadWord,
+                    scratch.AsCoreRegister(),
+                    base.AsCoreRegister(),
+                    offset.Int32Value());
+  __ blx(scratch.AsCoreRegister());
+  // TODO: place reference map on call.
+}
+
+void ArmJNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  // Call *(*(SP + base) + offset)
+  __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, base.Int32Value());
+  __ LoadFromOffset(kLoadWord,
+                    scratch.AsCoreRegister(),
+                    scratch.AsCoreRegister(),
+                    offset.Int32Value());
+  __ blx(scratch.AsCoreRegister());
+  // TODO: place reference map on call
+}
+
+void ArmJNIMacroAssembler::CallFromThread(ThreadOffset32 offset ATTRIBUTE_UNUSED,
+                                          ManagedRegister scratch ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL);
+}
+
+void ArmJNIMacroAssembler::GetCurrentThread(ManagedRegister tr) {
+  __ mov(tr.AsArm().AsCoreRegister(), ShifterOperand(TR));
+}
+
+void ArmJNIMacroAssembler::GetCurrentThread(FrameOffset offset, ManagedRegister /*scratch*/) {
+  __ StoreToOffset(kStoreWord, TR, SP, offset.Int32Value(), AL);
+}
+
+void ArmJNIMacroAssembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  ArmExceptionSlowPath* slow = new (__ GetArena()) ArmExceptionSlowPath(scratch, stack_adjust);
+  __ GetBuffer()->EnqueueSlowPath(slow);
+  __ LoadFromOffset(kLoadWord,
+                    scratch.AsCoreRegister(),
+                    TR,
+                    Thread::ExceptionOffset<kArmPointerSize>().Int32Value());
+  __ cmp(scratch.AsCoreRegister(), ShifterOperand(0));
+  __ b(slow->Entry(), NE);
+}
+
+#undef __
+
+void ArmExceptionSlowPath::Emit(Assembler* sasm) {
+  ArmAssembler* sp_asm = down_cast<ArmAssembler*>(sasm);
+#define __ sp_asm->
+  __ Bind(&entry_);
+  if (stack_adjust_ != 0) {  // Fix up the frame.
+    DecreaseFrameSizeImpl(sp_asm, stack_adjust_);
+  }
+  // Pass exception object as argument.
+  // Don't care about preserving R0 as this call won't return.
+  __ mov(R0, ShifterOperand(scratch_.AsCoreRegister()));
+  // Set up call to Thread::Current()->pDeliverException.
+  __ LoadFromOffset(kLoadWord,
+                    R12,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pDeliverException).Int32Value());
+  __ blx(R12);
+#undef __
+}
+
+void ArmJNIMacroAssembler::MemoryBarrier(ManagedRegister mscratch) {
+  CHECK_EQ(mscratch.AsArm().AsCoreRegister(), R12);
+  asm_->dmb(SY);
+}
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/utils/arm/jni_macro_assembler_arm.h b/compiler/utils/arm/jni_macro_assembler_arm.h
new file mode 100644
index 0000000..4471906
--- /dev/null
+++ b/compiler/utils/arm/jni_macro_assembler_arm.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_ARM_JNI_MACRO_ASSEMBLER_ARM_H_
+#define ART_COMPILER_UTILS_ARM_JNI_MACRO_ASSEMBLER_ARM_H_
+
+#include <memory>
+#include <type_traits>
+#include <vector>
+
+#include "arch/instruction_set.h"
+#include "base/enums.h"
+#include "base/macros.h"
+#include "utils/jni_macro_assembler.h"
+#include "offsets.h"
+
+namespace art {
+namespace arm {
+
+class ArmAssembler;
+
+class ArmJNIMacroAssembler : public JNIMacroAssembler<PointerSize::k32> {
+ public:
+  ArmJNIMacroAssembler(ArenaAllocator* arena, InstructionSet isa);
+  virtual ~ArmJNIMacroAssembler();
+
+  size_t CodeSize() const OVERRIDE;
+  DebugFrameOpCodeWriterForAssembler& cfi() OVERRIDE;
+  void FinalizeCode() OVERRIDE;
+  void FinalizeInstructions(const MemoryRegion& region) OVERRIDE;
+
+  //
+  // Overridden common assembler high-level functionality
+  //
+
+  // Emit code that will create an activation on the stack
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
+                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
+
+  // Emit code that will remove an activation from the stack
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
+    OVERRIDE;
+
+  void IncreaseFrameSize(size_t adjust) OVERRIDE;
+  void DecreaseFrameSize(size_t adjust) OVERRIDE;
+
+  // Store routines
+  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
+  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
+  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
+
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
+
+  void StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                FrameOffset fr_offs,
+                                ManagedRegister scratch) OVERRIDE;
+
+  void StoreStackPointerToThread(ThreadOffset32 thr_offs) OVERRIDE;
+
+  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
+                     ManagedRegister scratch) OVERRIDE;
+
+  // Load routines
+  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
+
+  void LoadFromThread(ManagedRegister dest, ThreadOffset32 src, size_t size) OVERRIDE;
+
+  void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
+
+  void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
+               bool unpoison_reference) OVERRIDE;
+
+  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
+
+  void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) OVERRIDE;
+
+  // Copying routines
+  void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
+
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset32 thr_offs,
+                            ManagedRegister scratch) OVERRIDE;
+
+  void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
+      OVERRIDE;
+
+  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
+
+  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
+            ManagedRegister scratch, size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
+            ManagedRegister scratch, size_t size) OVERRIDE;
+
+  // Sign extension
+  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Zero extension
+  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Exploit fast access in managed code to Thread::Current()
+  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
+  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
+
+  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed. in_reg holds a possibly stale reference
+  // that can be used to avoid loading the handle scope entry to see if the value is
+  // null.
+  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
+                              ManagedRegister in_reg, bool null_allowed) OVERRIDE;
+
+  // Set up out_off to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed.
+  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
+                              ManagedRegister scratch, bool null_allowed) OVERRIDE;
+
+  // src holds a handle scope entry (Object**) load this into dst
+  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
+
+  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
+  // know that src may not be null.
+  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
+  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
+
+  // Call to address held at [base+offset]
+  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) OVERRIDE;
+
+  // Generate code to check if Thread::Current()->exception_ is non-null
+  // and branch to a ExceptionSlowPath if it is.
+  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
+
+  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
+
+ private:
+  std::unique_ptr<ArmAssembler> asm_;
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_ARM_JNI_MACRO_ASSEMBLER_ARM_H_
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index d82caf5..22221e7 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -28,620 +28,49 @@
 #ifdef ___
 #error "ARM64 Assembler macro already defined."
 #else
-#define ___   vixl_masm_->
+#define ___   vixl_masm_.
 #endif
 
 void Arm64Assembler::FinalizeCode() {
-  for (const std::unique_ptr<Arm64Exception>& exception : exception_blocks_) {
-    EmitExceptionPoll(exception.get());
-  }
   ___ FinalizeCode();
 }
 
 size_t Arm64Assembler::CodeSize() const {
-  return vixl_masm_->GetBufferCapacity() - vixl_masm_->GetRemainingBufferSpace();
+  return vixl_masm_.GetBufferCapacity() - vixl_masm_.GetRemainingBufferSpace();
 }
 
 const uint8_t* Arm64Assembler::CodeBufferBaseAddress() const {
-  return vixl_masm_->GetStartAddress<uint8_t*>();
+  return vixl_masm_.GetStartAddress<uint8_t*>();
 }
 
 void Arm64Assembler::FinalizeInstructions(const MemoryRegion& region) {
   // Copy the instructions from the buffer.
-  MemoryRegion from(vixl_masm_->GetStartAddress<void*>(), CodeSize());
+  MemoryRegion from(vixl_masm_.GetStartAddress<void*>(), CodeSize());
   region.CopyFrom(0, from);
 }
 
-void Arm64Assembler::GetCurrentThread(ManagedRegister tr) {
-  ___ Mov(reg_x(tr.AsArm64().AsXRegister()), reg_x(TR));
-}
-
-void Arm64Assembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scratch */) {
-  StoreToOffset(TR, SP, offset.Int32Value());
-}
-
-// See Arm64 PCS Section 5.2.2.1.
-void Arm64Assembler::IncreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  AddConstant(SP, -adjust);
-  cfi().AdjustCFAOffset(adjust);
-}
-
-// See Arm64 PCS Section 5.2.2.1.
-void Arm64Assembler::DecreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  AddConstant(SP, adjust);
-  cfi().AdjustCFAOffset(-adjust);
-}
-
-void Arm64Assembler::AddConstant(XRegister rd, int32_t value, Condition cond) {
-  AddConstant(rd, rd, value, cond);
-}
-
-void Arm64Assembler::AddConstant(XRegister rd, XRegister rn, int32_t value,
-                                 Condition cond) {
-  if ((cond == al) || (cond == nv)) {
-    // VIXL macro-assembler handles all variants.
-    ___ Add(reg_x(rd), reg_x(rn), value);
-  } else {
-    // temp = rd + value
-    // rd = cond ? temp : rn
-    UseScratchRegisterScope temps(vixl_masm_);
-    temps.Exclude(reg_x(rd), reg_x(rn));
-    Register temp = temps.AcquireX();
-    ___ Add(temp, reg_x(rn), value);
-    ___ Csel(reg_x(rd), temp, reg_x(rd), cond);
-  }
-}
-
-void Arm64Assembler::StoreWToOffset(StoreOperandType type, WRegister source,
-                                    XRegister base, int32_t offset) {
-  switch (type) {
-    case kStoreByte:
-      ___ Strb(reg_w(source), MEM_OP(reg_x(base), offset));
-      break;
-    case kStoreHalfword:
-      ___ Strh(reg_w(source), MEM_OP(reg_x(base), offset));
-      break;
-    case kStoreWord:
-      ___ Str(reg_w(source), MEM_OP(reg_x(base), offset));
-      break;
-    default:
-      LOG(FATAL) << "UNREACHABLE";
-  }
-}
-
-void Arm64Assembler::StoreToOffset(XRegister source, XRegister base, int32_t offset) {
-  CHECK_NE(source, SP);
-  ___ Str(reg_x(source), MEM_OP(reg_x(base), offset));
-}
-
-void Arm64Assembler::StoreSToOffset(SRegister source, XRegister base, int32_t offset) {
-  ___ Str(reg_s(source), MEM_OP(reg_x(base), offset));
-}
-
-void Arm64Assembler::StoreDToOffset(DRegister source, XRegister base, int32_t offset) {
-  ___ Str(reg_d(source), MEM_OP(reg_x(base), offset));
-}
-
-void Arm64Assembler::Store(FrameOffset offs, ManagedRegister m_src, size_t size) {
-  Arm64ManagedRegister src = m_src.AsArm64();
-  if (src.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (src.IsWRegister()) {
-    CHECK_EQ(4u, size);
-    StoreWToOffset(kStoreWord, src.AsWRegister(), SP, offs.Int32Value());
-  } else if (src.IsXRegister()) {
-    CHECK_EQ(8u, size);
-    StoreToOffset(src.AsXRegister(), SP, offs.Int32Value());
-  } else if (src.IsSRegister()) {
-    StoreSToOffset(src.AsSRegister(), SP, offs.Int32Value());
-  } else {
-    CHECK(src.IsDRegister()) << src;
-    StoreDToOffset(src.AsDRegister(), SP, offs.Int32Value());
-  }
-}
-
-void Arm64Assembler::StoreRef(FrameOffset offs, ManagedRegister m_src) {
-  Arm64ManagedRegister src = m_src.AsArm64();
-  CHECK(src.IsXRegister()) << src;
-  StoreWToOffset(kStoreWord, src.AsOverlappingWRegister(), SP,
-                 offs.Int32Value());
-}
-
-void Arm64Assembler::StoreRawPtr(FrameOffset offs, ManagedRegister m_src) {
-  Arm64ManagedRegister src = m_src.AsArm64();
-  CHECK(src.IsXRegister()) << src;
-  StoreToOffset(src.AsXRegister(), SP, offs.Int32Value());
-}
-
-void Arm64Assembler::StoreImmediateToFrame(FrameOffset offs, uint32_t imm,
-                                           ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadImmediate(scratch.AsXRegister(), imm);
-  StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP,
-                 offs.Int32Value());
-}
-
-void Arm64Assembler::StoreImmediateToThread64(ThreadOffset64 offs,
-                                              uint32_t imm,
-                                              ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadImmediate(scratch.AsXRegister(), imm);
-  StoreToOffset(scratch.AsXRegister(), TR, offs.Int32Value());
-}
-
-void Arm64Assembler::StoreStackOffsetToThread64(ThreadOffset64 tr_offs,
-                                                FrameOffset fr_offs,
-                                                ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  AddConstant(scratch.AsXRegister(), SP, fr_offs.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
-}
-
-void Arm64Assembler::StoreStackPointerToThread64(ThreadOffset64 tr_offs) {
-  UseScratchRegisterScope temps(vixl_masm_);
-  Register temp = temps.AcquireX();
-  ___ Mov(temp, reg_x(SP));
-  ___ Str(temp, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
-}
-
-void Arm64Assembler::StoreSpanning(FrameOffset dest_off, ManagedRegister m_source,
-                                   FrameOffset in_off, ManagedRegister m_scratch) {
-  Arm64ManagedRegister source = m_source.AsArm64();
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  StoreToOffset(source.AsXRegister(), SP, dest_off.Int32Value());
-  LoadFromOffset(scratch.AsXRegister(), SP, in_off.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), SP, dest_off.Int32Value() + 8);
-}
-
-// Load routines.
-void Arm64Assembler::LoadImmediate(XRegister dest, int32_t value,
-                                   Condition cond) {
-  if ((cond == al) || (cond == nv)) {
-    ___ Mov(reg_x(dest), value);
-  } else {
-    // temp = value
-    // rd = cond ? temp : rd
-    if (value != 0) {
-      UseScratchRegisterScope temps(vixl_masm_);
-      temps.Exclude(reg_x(dest));
-      Register temp = temps.AcquireX();
-      ___ Mov(temp, value);
-      ___ Csel(reg_x(dest), temp, reg_x(dest), cond);
-    } else {
-      ___ Csel(reg_x(dest), reg_x(XZR), reg_x(dest), cond);
-    }
-  }
-}
-
-void Arm64Assembler::LoadWFromOffset(LoadOperandType type, WRegister dest,
-                                     XRegister base, int32_t offset) {
-  switch (type) {
-    case kLoadSignedByte:
-      ___ Ldrsb(reg_w(dest), MEM_OP(reg_x(base), offset));
-      break;
-    case kLoadSignedHalfword:
-      ___ Ldrsh(reg_w(dest), MEM_OP(reg_x(base), offset));
-      break;
-    case kLoadUnsignedByte:
-      ___ Ldrb(reg_w(dest), MEM_OP(reg_x(base), offset));
-      break;
-    case kLoadUnsignedHalfword:
-      ___ Ldrh(reg_w(dest), MEM_OP(reg_x(base), offset));
-      break;
-    case kLoadWord:
-      ___ Ldr(reg_w(dest), MEM_OP(reg_x(base), offset));
-      break;
-    default:
-        LOG(FATAL) << "UNREACHABLE";
-  }
-}
-
-// Note: We can extend this member by adding load type info - see
-// sign extended A64 load variants.
-void Arm64Assembler::LoadFromOffset(XRegister dest, XRegister base,
-                                    int32_t offset) {
-  CHECK_NE(dest, SP);
-  ___ Ldr(reg_x(dest), MEM_OP(reg_x(base), offset));
-}
-
-void Arm64Assembler::LoadSFromOffset(SRegister dest, XRegister base,
-                                     int32_t offset) {
-  ___ Ldr(reg_s(dest), MEM_OP(reg_x(base), offset));
-}
-
-void Arm64Assembler::LoadDFromOffset(DRegister dest, XRegister base,
-                                     int32_t offset) {
-  ___ Ldr(reg_d(dest), MEM_OP(reg_x(base), offset));
-}
-
-void Arm64Assembler::Load(Arm64ManagedRegister dest, XRegister base,
-                          int32_t offset, size_t size) {
-  if (dest.IsNoRegister()) {
-    CHECK_EQ(0u, size) << dest;
-  } else if (dest.IsWRegister()) {
-    CHECK_EQ(4u, size) << dest;
-    ___ Ldr(reg_w(dest.AsWRegister()), MEM_OP(reg_x(base), offset));
-  } else if (dest.IsXRegister()) {
-    CHECK_NE(dest.AsXRegister(), SP) << dest;
-    if (size == 4u) {
-      ___ Ldr(reg_w(dest.AsOverlappingWRegister()), MEM_OP(reg_x(base), offset));
-    } else {
-      CHECK_EQ(8u, size) << dest;
-      ___ Ldr(reg_x(dest.AsXRegister()), MEM_OP(reg_x(base), offset));
-    }
-  } else if (dest.IsSRegister()) {
-    ___ Ldr(reg_s(dest.AsSRegister()), MEM_OP(reg_x(base), offset));
-  } else {
-    CHECK(dest.IsDRegister()) << dest;
-    ___ Ldr(reg_d(dest.AsDRegister()), MEM_OP(reg_x(base), offset));
-  }
-}
-
-void Arm64Assembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) {
-  return Load(m_dst.AsArm64(), SP, src.Int32Value(), size);
-}
-
-void Arm64Assembler::LoadFromThread64(ManagedRegister m_dst, ThreadOffset64 src, size_t size) {
-  return Load(m_dst.AsArm64(), TR, src.Int32Value(), size);
-}
-
-void Arm64Assembler::LoadRef(ManagedRegister m_dst, FrameOffset offs) {
-  Arm64ManagedRegister dst = m_dst.AsArm64();
-  CHECK(dst.IsXRegister()) << dst;
-  LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), SP, offs.Int32Value());
-}
-
-void Arm64Assembler::LoadRef(ManagedRegister m_dst, ManagedRegister m_base, MemberOffset offs,
-                             bool unpoison_reference) {
-  Arm64ManagedRegister dst = m_dst.AsArm64();
-  Arm64ManagedRegister base = m_base.AsArm64();
-  CHECK(dst.IsXRegister() && base.IsXRegister());
-  LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), base.AsXRegister(),
-                  offs.Int32Value());
-  if (unpoison_reference) {
-    WRegister ref_reg = dst.AsOverlappingWRegister();
-    MaybeUnpoisonHeapReference(reg_w(ref_reg));
-  }
-}
-
 void Arm64Assembler::LoadRawPtr(ManagedRegister m_dst, ManagedRegister m_base, Offset offs) {
   Arm64ManagedRegister dst = m_dst.AsArm64();
   Arm64ManagedRegister base = m_base.AsArm64();
   CHECK(dst.IsXRegister() && base.IsXRegister());
   // Remove dst and base form the temp list - higher level API uses IP1, IP0.
-  UseScratchRegisterScope temps(vixl_masm_);
+  UseScratchRegisterScope temps(&vixl_masm_);
   temps.Exclude(reg_x(dst.AsXRegister()), reg_x(base.AsXRegister()));
   ___ Ldr(reg_x(dst.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value()));
 }
 
-void Arm64Assembler::LoadRawPtrFromThread64(ManagedRegister m_dst, ThreadOffset64 offs) {
-  Arm64ManagedRegister dst = m_dst.AsArm64();
-  CHECK(dst.IsXRegister()) << dst;
-  LoadFromOffset(dst.AsXRegister(), TR, offs.Int32Value());
-}
-
-// Copying routines.
-void Arm64Assembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t size) {
-  Arm64ManagedRegister dst = m_dst.AsArm64();
-  Arm64ManagedRegister src = m_src.AsArm64();
-  if (!dst.Equals(src)) {
-    if (dst.IsXRegister()) {
-      if (size == 4) {
-        CHECK(src.IsWRegister());
-        ___ Mov(reg_w(dst.AsOverlappingWRegister()), reg_w(src.AsWRegister()));
-      } else {
-        if (src.IsXRegister()) {
-          ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsXRegister()));
-        } else {
-          ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsOverlappingXRegister()));
-        }
-      }
-    } else if (dst.IsWRegister()) {
-      CHECK(src.IsWRegister()) << src;
-      ___ Mov(reg_w(dst.AsWRegister()), reg_w(src.AsWRegister()));
-    } else if (dst.IsSRegister()) {
-      CHECK(src.IsSRegister()) << src;
-      ___ Fmov(reg_s(dst.AsSRegister()), reg_s(src.AsSRegister()));
-    } else {
-      CHECK(dst.IsDRegister()) << dst;
-      CHECK(src.IsDRegister()) << src;
-      ___ Fmov(reg_d(dst.AsDRegister()), reg_d(src.AsDRegister()));
-    }
-  }
-}
-
-void Arm64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
-                                            ThreadOffset64 tr_offs,
-                                            ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadFromOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value());
-}
-
-void Arm64Assembler::CopyRawPtrToThread64(ThreadOffset64 tr_offs,
-                                          FrameOffset fr_offs,
-                                          ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadFromOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
-}
-
-void Arm64Assembler::CopyRef(FrameOffset dest, FrameOffset src,
-                             ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(),
-                  SP, src.Int32Value());
-  StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(),
-                 SP, dest.Int32Value());
-}
-
-void Arm64Assembler::Copy(FrameOffset dest, FrameOffset src,
-                          ManagedRegister m_scratch, size_t size) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  CHECK(size == 4 || size == 8) << size;
-  if (size == 4) {
-    LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP, src.Int32Value());
-    StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP, dest.Int32Value());
-  } else if (size == 8) {
-    LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value());
-    StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value());
-  } else {
-    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
-  }
-}
-
-void Arm64Assembler::Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset,
-                          ManagedRegister m_scratch, size_t size) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  Arm64ManagedRegister base = src_base.AsArm64();
-  CHECK(base.IsXRegister()) << base;
-  CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch;
-  CHECK(size == 4 || size == 8) << size;
-  if (size == 4) {
-    LoadWFromOffset(kLoadWord, scratch.AsWRegister(), base.AsXRegister(),
-                   src_offset.Int32Value());
-    StoreWToOffset(kStoreWord, scratch.AsWRegister(), SP, dest.Int32Value());
-  } else if (size == 8) {
-    LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), src_offset.Int32Value());
-    StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value());
-  } else {
-    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
-  }
-}
-
-void Arm64Assembler::Copy(ManagedRegister m_dest_base, Offset dest_offs, FrameOffset src,
-                          ManagedRegister m_scratch, size_t size) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  Arm64ManagedRegister base = m_dest_base.AsArm64();
-  CHECK(base.IsXRegister()) << base;
-  CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch;
-  CHECK(size == 4 || size == 8) << size;
-  if (size == 4) {
-    LoadWFromOffset(kLoadWord, scratch.AsWRegister(), SP, src.Int32Value());
-    StoreWToOffset(kStoreWord, scratch.AsWRegister(), base.AsXRegister(),
-                   dest_offs.Int32Value());
-  } else if (size == 8) {
-    LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value());
-    StoreToOffset(scratch.AsXRegister(), base.AsXRegister(), dest_offs.Int32Value());
-  } else {
-    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
-  }
-}
-
-void Arm64Assembler::Copy(FrameOffset /*dst*/, FrameOffset /*src_base*/, Offset /*src_offset*/,
-                          ManagedRegister /*mscratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant";
-}
-
-void Arm64Assembler::Copy(ManagedRegister m_dest, Offset dest_offset,
-                          ManagedRegister m_src, Offset src_offset,
-                          ManagedRegister m_scratch, size_t size) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  Arm64ManagedRegister src = m_src.AsArm64();
-  Arm64ManagedRegister dest = m_dest.AsArm64();
-  CHECK(dest.IsXRegister()) << dest;
-  CHECK(src.IsXRegister()) << src;
-  CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch;
-  CHECK(size == 4 || size == 8) << size;
-  if (size == 4) {
-    if (scratch.IsWRegister()) {
-      LoadWFromOffset(kLoadWord, scratch.AsWRegister(), src.AsXRegister(),
-                    src_offset.Int32Value());
-      StoreWToOffset(kStoreWord, scratch.AsWRegister(), dest.AsXRegister(),
-                   dest_offset.Int32Value());
-    } else {
-      LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), src.AsXRegister(),
-                    src_offset.Int32Value());
-      StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), dest.AsXRegister(),
-                   dest_offset.Int32Value());
-    }
-  } else if (size == 8) {
-    LoadFromOffset(scratch.AsXRegister(), src.AsXRegister(), src_offset.Int32Value());
-    StoreToOffset(scratch.AsXRegister(), dest.AsXRegister(), dest_offset.Int32Value());
-  } else {
-    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
-  }
-}
-
-void Arm64Assembler::Copy(FrameOffset /*dst*/, Offset /*dest_offset*/,
-                          FrameOffset /*src*/, Offset /*src_offset*/,
-                          ManagedRegister /*scratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant";
-}
-
-void Arm64Assembler::MemoryBarrier(ManagedRegister m_scratch ATTRIBUTE_UNUSED) {
-  // TODO: Should we check that m_scratch is IP? - see arm.
-  ___ Dmb(InnerShareable, BarrierAll);
-}
-
-void Arm64Assembler::SignExtend(ManagedRegister mreg, size_t size) {
-  Arm64ManagedRegister reg = mreg.AsArm64();
-  CHECK(size == 1 || size == 2) << size;
-  CHECK(reg.IsWRegister()) << reg;
-  if (size == 1) {
-    ___ Sxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
-  } else {
-    ___ Sxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
-  }
-}
-
-void Arm64Assembler::ZeroExtend(ManagedRegister mreg, size_t size) {
-  Arm64ManagedRegister reg = mreg.AsArm64();
-  CHECK(size == 1 || size == 2) << size;
-  CHECK(reg.IsWRegister()) << reg;
-  if (size == 1) {
-    ___ Uxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
-  } else {
-    ___ Uxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
-  }
-}
-
-void Arm64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references.
-}
-
-void Arm64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references.
-}
-
-void Arm64Assembler::Call(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch) {
-  Arm64ManagedRegister base = m_base.AsArm64();
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(base.IsXRegister()) << base;
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), offs.Int32Value());
-  ___ Blr(reg_x(scratch.AsXRegister()));
-}
-
 void Arm64Assembler::JumpTo(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch) {
   Arm64ManagedRegister base = m_base.AsArm64();
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(base.IsXRegister()) << base;
   CHECK(scratch.IsXRegister()) << scratch;
   // Remove base and scratch form the temp list - higher level API uses IP1, IP0.
-  UseScratchRegisterScope temps(vixl_masm_);
+  UseScratchRegisterScope temps(&vixl_masm_);
   temps.Exclude(reg_x(base.AsXRegister()), reg_x(scratch.AsXRegister()));
   ___ Ldr(reg_x(scratch.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value()));
   ___ Br(reg_x(scratch.AsXRegister()));
 }
 
-void Arm64Assembler::Call(FrameOffset base, Offset offs, ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  // Call *(*(SP + base) + offset)
-  LoadFromOffset(scratch.AsXRegister(), SP, base.Int32Value());
-  LoadFromOffset(scratch.AsXRegister(), scratch.AsXRegister(), offs.Int32Value());
-  ___ Blr(reg_x(scratch.AsXRegister()));
-}
-
-void Arm64Assembler::CallFromThread64(ThreadOffset64 offset ATTRIBUTE_UNUSED,
-                                      ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL) << "Unimplemented Call() variant";
-}
-
-void Arm64Assembler::CreateHandleScopeEntry(
-    ManagedRegister m_out_reg, FrameOffset handle_scope_offs, ManagedRegister m_in_reg,
-    bool null_allowed) {
-  Arm64ManagedRegister out_reg = m_out_reg.AsArm64();
-  Arm64ManagedRegister in_reg = m_in_reg.AsArm64();
-  // For now we only hold stale handle scope entries in x registers.
-  CHECK(in_reg.IsNoRegister() || in_reg.IsXRegister()) << in_reg;
-  CHECK(out_reg.IsXRegister()) << out_reg;
-  if (null_allowed) {
-    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
-    // the address in the handle scope holding the reference.
-    // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
-    if (in_reg.IsNoRegister()) {
-      LoadWFromOffset(kLoadWord, out_reg.AsOverlappingWRegister(), SP,
-                      handle_scope_offs.Int32Value());
-      in_reg = out_reg;
-    }
-    ___ Cmp(reg_w(in_reg.AsOverlappingWRegister()), 0);
-    if (!out_reg.Equals(in_reg)) {
-      LoadImmediate(out_reg.AsXRegister(), 0, eq);
-    }
-    AddConstant(out_reg.AsXRegister(), SP, handle_scope_offs.Int32Value(), ne);
-  } else {
-    AddConstant(out_reg.AsXRegister(), SP, handle_scope_offs.Int32Value(), al);
-  }
-}
-
-void Arm64Assembler::CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handle_scope_offset,
-                                            ManagedRegister m_scratch, bool null_allowed) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  if (null_allowed) {
-    LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP,
-                    handle_scope_offset.Int32Value());
-    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
-    // the address in the handle scope holding the reference.
-    // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
-    ___ Cmp(reg_w(scratch.AsOverlappingWRegister()), 0);
-    // Move this logic in add constants with flags.
-    AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), ne);
-  } else {
-    AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), al);
-  }
-  StoreToOffset(scratch.AsXRegister(), SP, out_off.Int32Value());
-}
-
-void Arm64Assembler::LoadReferenceFromHandleScope(ManagedRegister m_out_reg,
-                                                  ManagedRegister m_in_reg) {
-  Arm64ManagedRegister out_reg = m_out_reg.AsArm64();
-  Arm64ManagedRegister in_reg = m_in_reg.AsArm64();
-  CHECK(out_reg.IsXRegister()) << out_reg;
-  CHECK(in_reg.IsXRegister()) << in_reg;
-  vixl::aarch64::Label exit;
-  if (!out_reg.Equals(in_reg)) {
-    // FIXME: Who sets the flags here?
-    LoadImmediate(out_reg.AsXRegister(), 0, eq);
-  }
-  ___ Cbz(reg_x(in_reg.AsXRegister()), &exit);
-  LoadFromOffset(out_reg.AsXRegister(), in_reg.AsXRegister(), 0);
-  ___ Bind(&exit);
-}
-
-void Arm64Assembler::ExceptionPoll(ManagedRegister m_scratch, size_t stack_adjust) {
-  CHECK_ALIGNED(stack_adjust, kStackAlignment);
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  exception_blocks_.emplace_back(new Arm64Exception(scratch, stack_adjust));
-  LoadFromOffset(scratch.AsXRegister(),
-                 TR,
-                 Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
-  ___ Cbnz(reg_x(scratch.AsXRegister()), exception_blocks_.back()->Entry());
-}
-
-void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) {
-  UseScratchRegisterScope temps(vixl_masm_);
-  temps.Exclude(reg_x(exception->scratch_.AsXRegister()));
-  Register temp = temps.AcquireX();
-
-  // Bind exception poll entry.
-  ___ Bind(exception->Entry());
-  if (exception->stack_adjust_ != 0) {  // Fix up the frame.
-    DecreaseFrameSize(exception->stack_adjust_);
-  }
-  // Pass exception object as argument.
-  // Don't care about preserving X0 as this won't return.
-  ___ Mov(reg_x(X0), reg_x(exception->scratch_.AsXRegister()));
-  ___ Ldr(temp,
-          MEM_OP(reg_x(TR),
-                 QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pDeliverException).Int32Value()));
-
-  ___ Blr(temp);
-  // Call should never return.
-  ___ Brk();
-}
-
 static inline dwarf::Reg DWARFReg(CPURegister reg) {
   if (reg.IsFPRegister()) {
     return dwarf::Reg::Arm64Fp(reg.GetCode());
@@ -653,7 +82,7 @@
 
 void Arm64Assembler::SpillRegisters(CPURegList registers, int offset) {
   int size = registers.GetRegisterSizeInBytes();
-  const Register sp = vixl_masm_->StackPointer();
+  const Register sp = vixl_masm_.StackPointer();
   // Since we are operating on register pairs, we would like to align on
   // double the standard size; on the other hand, we don't want to insert
   // an extra store, which will happen if the number of registers is even.
@@ -681,7 +110,7 @@
 
 void Arm64Assembler::UnspillRegisters(CPURegList registers, int offset) {
   int size = registers.GetRegisterSizeInBytes();
-  const Register sp = vixl_masm_->StackPointer();
+  const Register sp = vixl_masm_.StackPointer();
   // Be consistent with the logic for spilling registers.
   if (!IsAlignedParam(offset, 2 * size) && registers.GetCount() % 2 != 0) {
     const CPURegister& dst0 = registers.PopLowestIndex();
@@ -705,105 +134,6 @@
   DCHECK(registers.IsEmpty());
 }
 
-void Arm64Assembler::BuildFrame(size_t frame_size,
-                                ManagedRegister method_reg,
-                                ArrayRef<const ManagedRegister> callee_save_regs,
-                                const ManagedRegisterEntrySpills& entry_spills) {
-  // Setup VIXL CPURegList for callee-saves.
-  CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0);
-  CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0);
-  for (auto r : callee_save_regs) {
-    Arm64ManagedRegister reg = r.AsArm64();
-    if (reg.IsXRegister()) {
-      core_reg_list.Combine(reg_x(reg.AsXRegister()).GetCode());
-    } else {
-      DCHECK(reg.IsDRegister());
-      fp_reg_list.Combine(reg_d(reg.AsDRegister()).GetCode());
-    }
-  }
-  size_t core_reg_size = core_reg_list.GetTotalSizeInBytes();
-  size_t fp_reg_size = fp_reg_list.GetTotalSizeInBytes();
-
-  // Increase frame to required size.
-  DCHECK_ALIGNED(frame_size, kStackAlignment);
-  DCHECK_GE(frame_size, core_reg_size + fp_reg_size + static_cast<size_t>(kArm64PointerSize));
-  IncreaseFrameSize(frame_size);
-
-  // Save callee-saves.
-  SpillRegisters(core_reg_list, frame_size - core_reg_size);
-  SpillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size);
-
-  DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR)));
-
-  // Write ArtMethod*
-  DCHECK(X0 == method_reg.AsArm64().AsXRegister());
-  StoreToOffset(X0, SP, 0);
-
-  // Write out entry spills
-  int32_t offset = frame_size + static_cast<size_t>(kArm64PointerSize);
-  for (size_t i = 0; i < entry_spills.size(); ++i) {
-    Arm64ManagedRegister reg = entry_spills.at(i).AsArm64();
-    if (reg.IsNoRegister()) {
-      // only increment stack offset.
-      ManagedRegisterSpill spill = entry_spills.at(i);
-      offset += spill.getSize();
-    } else if (reg.IsXRegister()) {
-      StoreToOffset(reg.AsXRegister(), SP, offset);
-      offset += 8;
-    } else if (reg.IsWRegister()) {
-      StoreWToOffset(kStoreWord, reg.AsWRegister(), SP, offset);
-      offset += 4;
-    } else if (reg.IsDRegister()) {
-      StoreDToOffset(reg.AsDRegister(), SP, offset);
-      offset += 8;
-    } else if (reg.IsSRegister()) {
-      StoreSToOffset(reg.AsSRegister(), SP, offset);
-      offset += 4;
-    }
-  }
-}
-
-void Arm64Assembler::RemoveFrame(size_t frame_size,
-                                 ArrayRef<const ManagedRegister> callee_save_regs) {
-  // Setup VIXL CPURegList for callee-saves.
-  CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0);
-  CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0);
-  for (auto r : callee_save_regs) {
-    Arm64ManagedRegister reg = r.AsArm64();
-    if (reg.IsXRegister()) {
-      core_reg_list.Combine(reg_x(reg.AsXRegister()).GetCode());
-    } else {
-      DCHECK(reg.IsDRegister());
-      fp_reg_list.Combine(reg_d(reg.AsDRegister()).GetCode());
-    }
-  }
-  size_t core_reg_size = core_reg_list.GetTotalSizeInBytes();
-  size_t fp_reg_size = fp_reg_list.GetTotalSizeInBytes();
-
-  // For now we only check that the size of the frame is large enough to hold spills and method
-  // reference.
-  DCHECK_GE(frame_size, core_reg_size + fp_reg_size + static_cast<size_t>(kArm64PointerSize));
-  DCHECK_ALIGNED(frame_size, kStackAlignment);
-
-  DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR)));
-
-  cfi_.RememberState();
-
-  // Restore callee-saves.
-  UnspillRegisters(core_reg_list, frame_size - core_reg_size);
-  UnspillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size);
-
-  // Decrease frame size to start of callee saved regs.
-  DecreaseFrameSize(frame_size);
-
-  // Pop callee saved and return to LR.
-  ___ Ret();
-
-  // The CFI should be restored for any code that follows the exit block.
-  cfi_.RestoreState();
-  cfi_.DefCFAOffset(frame_size);
-}
-
 void Arm64Assembler::PoisonHeapReference(Register reg) {
   DCHECK(reg.IsW());
   // reg = -reg.
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index 24b7982..4e88e64 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -23,7 +23,6 @@
 
 #include "base/arena_containers.h"
 #include "base/logging.h"
-#include "constants_arm64.h"
 #include "utils/arm64/managed_register_arm64.h"
 #include "utils/assembler.h"
 #include "offsets.h"
@@ -62,38 +61,13 @@
   kStoreDWord
 };
 
-class Arm64Exception {
- private:
-  Arm64Exception(Arm64ManagedRegister scratch, size_t stack_adjust)
-      : scratch_(scratch), stack_adjust_(stack_adjust) {
-    }
-
-  vixl::aarch64::Label* Entry() { return &exception_entry_; }
-
-  // Register used for passing Thread::Current()->exception_ .
-  const Arm64ManagedRegister scratch_;
-
-  // Stack adjust for ExceptionPool.
-  const size_t stack_adjust_;
-
-  vixl::aarch64::Label exception_entry_;
-
-  friend class Arm64Assembler;
-  DISALLOW_COPY_AND_ASSIGN(Arm64Exception);
-};
-
 class Arm64Assembler FINAL : public Assembler {
  public:
-  // We indicate the size of the initial code generation buffer to the VIXL
-  // assembler. From there we it will automatically manage the buffer.
-  explicit Arm64Assembler(ArenaAllocator* arena)
-      : Assembler(arena),
-        exception_blocks_(arena->Adapter(kArenaAllocAssembler)),
-        vixl_masm_(new vixl::aarch64::MacroAssembler(kArm64BaseBufferSize)) {}
+  explicit Arm64Assembler(ArenaAllocator* arena) : Assembler(arena) {}
 
-  virtual ~Arm64Assembler() {
-    delete vixl_masm_;
-  }
+  virtual ~Arm64Assembler() {}
+
+  vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return &vixl_masm_; }
 
   // Finalize the code.
   void FinalizeCode() OVERRIDE;
@@ -105,110 +79,14 @@
   // Copy instructions out of assembly buffer into the given region of memory.
   void FinalizeInstructions(const MemoryRegion& region);
 
+  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs);
+
   void SpillRegisters(vixl::aarch64::CPURegList registers, int offset);
   void UnspillRegisters(vixl::aarch64::CPURegList registers, int offset);
 
-  // Emit code that will create an activation on the stack.
-  void BuildFrame(size_t frame_size,
-                  ManagedRegister method_reg,
-                  ArrayRef<const ManagedRegister> callee_save_regs,
-                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
-
-  // Emit code that will remove an activation from the stack.
-  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
-      OVERRIDE;
-
-  void IncreaseFrameSize(size_t adjust) OVERRIDE;
-  void DecreaseFrameSize(size_t adjust) OVERRIDE;
-
-  // Store routines.
-  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
-  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
-  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
-  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
-  void StoreImmediateToThread64(ThreadOffset64 dest, uint32_t imm, ManagedRegister scratch)
-      OVERRIDE;
-  void StoreStackOffsetToThread64(ThreadOffset64 thr_offs, FrameOffset fr_offs,
-                                  ManagedRegister scratch) OVERRIDE;
-  void StoreStackPointerToThread64(ThreadOffset64 thr_offs) OVERRIDE;
-  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
-                     ManagedRegister scratch) OVERRIDE;
-
-  // Load routines.
-  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
-  void LoadFromThread64(ManagedRegister dest, ThreadOffset64 src, size_t size) OVERRIDE;
-  void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
-  void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
-               bool unpoison_reference) OVERRIDE;
-  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
-  void LoadRawPtrFromThread64(ManagedRegister dest, ThreadOffset64 offs) OVERRIDE;
-
-  // Copying routines.
-  void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
-  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset64 thr_offs,
-                              ManagedRegister scratch) OVERRIDE;
-  void CopyRawPtrToThread64(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
-      OVERRIDE;
-  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
-  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
-  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
-
-  // Sign extension.
-  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Zero extension.
-  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Exploit fast access in managed code to Thread::Current().
-  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
-  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
-
-  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the handle scope entry to see if the value is
-  // null.
-  void CreateHandleScopeEntry(ManagedRegister out_reg,
-                              FrameOffset handlescope_offset,
-                              ManagedRegister in_reg,
-                              bool null_allowed) OVERRIDE;
-
-  // Set up out_off to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed.
-  void CreateHandleScopeEntry(FrameOffset out_off,
-                              FrameOffset handlescope_offset,
-                              ManagedRegister scratch,
-                              bool null_allowed) OVERRIDE;
-
-  // src holds a handle scope entry (Object**) load this into dst.
-  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
-
-  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
-  // know that src may not be null.
-  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
-  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
-
-  // Call to address held at [base+offset].
-  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void CallFromThread64(ThreadOffset64 offset, ManagedRegister scratch) OVERRIDE;
-
   // Jump to address (not setting link register)
   void JumpTo(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch);
 
-  // Generate code to check if Thread::Current()->exception_ is non-null
-  // and branch to a ExceptionSlowPath if it is.
-  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
-
   //
   // Heap poisoning.
   //
@@ -227,7 +105,6 @@
     UNIMPLEMENTED(FATAL) << "Do not use Jump for ARM64";
   }
 
- private:
   static vixl::aarch64::Register reg_x(int code) {
     CHECK(code < kNumberOfXRegisters) << code;
     if (code == SP) {
@@ -256,40 +133,9 @@
     return vixl::aarch64::FPRegister::GetSRegFromCode(code);
   }
 
-  // Emits Exception block.
-  void EmitExceptionPoll(Arm64Exception *exception);
-
-  void StoreWToOffset(StoreOperandType type, WRegister source,
-                      XRegister base, int32_t offset);
-  void StoreToOffset(XRegister source, XRegister base, int32_t offset);
-  void StoreSToOffset(SRegister source, XRegister base, int32_t offset);
-  void StoreDToOffset(DRegister source, XRegister base, int32_t offset);
-
-  void LoadImmediate(XRegister dest,
-                     int32_t value,
-                     vixl::aarch64::Condition cond = vixl::aarch64::al);
-  void Load(Arm64ManagedRegister dst, XRegister src, int32_t src_offset, size_t size);
-  void LoadWFromOffset(LoadOperandType type,
-                       WRegister dest,
-                       XRegister base,
-                       int32_t offset);
-  void LoadFromOffset(XRegister dest, XRegister base, int32_t offset);
-  void LoadSFromOffset(SRegister dest, XRegister base, int32_t offset);
-  void LoadDFromOffset(DRegister dest, XRegister base, int32_t offset);
-  void AddConstant(XRegister rd,
-                   int32_t value,
-                   vixl::aarch64::Condition cond = vixl::aarch64::al);
-  void AddConstant(XRegister rd,
-                   XRegister rn,
-                   int32_t value,
-                   vixl::aarch64::Condition cond = vixl::aarch64::al);
-
-  // List of exception blocks to generate at the end of the code cache.
-  ArenaVector<std::unique_ptr<Arm64Exception>> exception_blocks_;
-
- public:
-  // Vixl assembler.
-  vixl::aarch64::MacroAssembler* const vixl_masm_;
+ private:
+  // VIXL assembler.
+  vixl::aarch64::MacroAssembler vixl_masm_;
 
   // Used for testing.
   friend class Arm64ManagedRegister_VixlRegisters_Test;
diff --git a/compiler/utils/arm64/constants_arm64.h b/compiler/utils/arm64/constants_arm64.h
deleted file mode 100644
index 01e8be9..0000000
--- a/compiler/utils/arm64/constants_arm64.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_
-#define ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_
-
-#include <stdint.h>
-#include <iosfwd>
-#include "arch/arm64/registers_arm64.h"
-#include "base/casts.h"
-#include "base/logging.h"
-#include "globals.h"
-
-// TODO: Extend this file by adding missing functionality.
-
-namespace art {
-namespace arm64 {
-
-constexpr size_t kArm64BaseBufferSize = 4096;
-
-}  // namespace arm64
-}  // namespace art
-
-#endif  // ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
new file mode 100644
index 0000000..dfdcd11
--- /dev/null
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -0,0 +1,754 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni_macro_assembler_arm64.h"
+
+#include "base/logging.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "managed_register_arm64.h"
+#include "offsets.h"
+#include "thread.h"
+
+using namespace vixl::aarch64;  // NOLINT(build/namespaces)
+
+namespace art {
+namespace arm64 {
+
+#ifdef ___
+#error "ARM64 Assembler macro already defined."
+#else
+#define ___   asm_.GetVIXLAssembler()->
+#endif
+
+#define reg_x(X) Arm64Assembler::reg_x(X)
+#define reg_w(W) Arm64Assembler::reg_w(W)
+#define reg_d(D) Arm64Assembler::reg_d(D)
+#define reg_s(S) Arm64Assembler::reg_s(S)
+
+Arm64JNIMacroAssembler::~Arm64JNIMacroAssembler() {
+}
+
+void Arm64JNIMacroAssembler::FinalizeCode() {
+  for (const std::unique_ptr<Arm64Exception>& exception : exception_blocks_) {
+    EmitExceptionPoll(exception.get());
+  }
+  ___ FinalizeCode();
+}
+
+void Arm64JNIMacroAssembler::GetCurrentThread(ManagedRegister tr) {
+  ___ Mov(reg_x(tr.AsArm64().AsXRegister()), reg_x(TR));
+}
+
+void Arm64JNIMacroAssembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scratch */) {
+  StoreToOffset(TR, SP, offset.Int32Value());
+}
+
+// See Arm64 PCS Section 5.2.2.1.
+void Arm64JNIMacroAssembler::IncreaseFrameSize(size_t adjust) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  AddConstant(SP, -adjust);
+  cfi().AdjustCFAOffset(adjust);
+}
+
+// See Arm64 PCS Section 5.2.2.1.
+void Arm64JNIMacroAssembler::DecreaseFrameSize(size_t adjust) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  AddConstant(SP, adjust);
+  cfi().AdjustCFAOffset(-adjust);
+}
+
+void Arm64JNIMacroAssembler::AddConstant(XRegister rd, int32_t value, Condition cond) {
+  AddConstant(rd, rd, value, cond);
+}
+
+void Arm64JNIMacroAssembler::AddConstant(XRegister rd,
+                                         XRegister rn,
+                                         int32_t value,
+                                         Condition cond) {
+  if ((cond == al) || (cond == nv)) {
+    // VIXL macro-assembler handles all variants.
+    ___ Add(reg_x(rd), reg_x(rn), value);
+  } else {
+    // temp = rd + value
+    // rd = cond ? temp : rn
+    UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+    temps.Exclude(reg_x(rd), reg_x(rn));
+    Register temp = temps.AcquireX();
+    ___ Add(temp, reg_x(rn), value);
+    ___ Csel(reg_x(rd), temp, reg_x(rd), cond);
+  }
+}
+
+void Arm64JNIMacroAssembler::StoreWToOffset(StoreOperandType type,
+                                            WRegister source,
+                                            XRegister base,
+                                            int32_t offset) {
+  switch (type) {
+    case kStoreByte:
+      ___ Strb(reg_w(source), MEM_OP(reg_x(base), offset));
+      break;
+    case kStoreHalfword:
+      ___ Strh(reg_w(source), MEM_OP(reg_x(base), offset));
+      break;
+    case kStoreWord:
+      ___ Str(reg_w(source), MEM_OP(reg_x(base), offset));
+      break;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+  }
+}
+
+void Arm64JNIMacroAssembler::StoreToOffset(XRegister source, XRegister base, int32_t offset) {
+  CHECK_NE(source, SP);
+  ___ Str(reg_x(source), MEM_OP(reg_x(base), offset));
+}
+
+void Arm64JNIMacroAssembler::StoreSToOffset(SRegister source, XRegister base, int32_t offset) {
+  ___ Str(reg_s(source), MEM_OP(reg_x(base), offset));
+}
+
+void Arm64JNIMacroAssembler::StoreDToOffset(DRegister source, XRegister base, int32_t offset) {
+  ___ Str(reg_d(source), MEM_OP(reg_x(base), offset));
+}
+
+void Arm64JNIMacroAssembler::Store(FrameOffset offs, ManagedRegister m_src, size_t size) {
+  Arm64ManagedRegister src = m_src.AsArm64();
+  if (src.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (src.IsWRegister()) {
+    CHECK_EQ(4u, size);
+    StoreWToOffset(kStoreWord, src.AsWRegister(), SP, offs.Int32Value());
+  } else if (src.IsXRegister()) {
+    CHECK_EQ(8u, size);
+    StoreToOffset(src.AsXRegister(), SP, offs.Int32Value());
+  } else if (src.IsSRegister()) {
+    StoreSToOffset(src.AsSRegister(), SP, offs.Int32Value());
+  } else {
+    CHECK(src.IsDRegister()) << src;
+    StoreDToOffset(src.AsDRegister(), SP, offs.Int32Value());
+  }
+}
+
+void Arm64JNIMacroAssembler::StoreRef(FrameOffset offs, ManagedRegister m_src) {
+  Arm64ManagedRegister src = m_src.AsArm64();
+  CHECK(src.IsXRegister()) << src;
+  StoreWToOffset(kStoreWord, src.AsOverlappingWRegister(), SP,
+                 offs.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::StoreRawPtr(FrameOffset offs, ManagedRegister m_src) {
+  Arm64ManagedRegister src = m_src.AsArm64();
+  CHECK(src.IsXRegister()) << src;
+  StoreToOffset(src.AsXRegister(), SP, offs.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::StoreImmediateToFrame(FrameOffset offs,
+                                                   uint32_t imm,
+                                                   ManagedRegister m_scratch) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  LoadImmediate(scratch.AsXRegister(), imm);
+  StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP,
+                 offs.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset64 tr_offs,
+                                                      FrameOffset fr_offs,
+                                                      ManagedRegister m_scratch) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  AddConstant(scratch.AsXRegister(), SP, fr_offs.Int32Value());
+  StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 tr_offs) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register temp = temps.AcquireX();
+  ___ Mov(temp, reg_x(SP));
+  ___ Str(temp, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
+}
+
+void Arm64JNIMacroAssembler::StoreSpanning(FrameOffset dest_off,
+                                           ManagedRegister m_source,
+                                           FrameOffset in_off,
+                                           ManagedRegister m_scratch) {
+  Arm64ManagedRegister source = m_source.AsArm64();
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  StoreToOffset(source.AsXRegister(), SP, dest_off.Int32Value());
+  LoadFromOffset(scratch.AsXRegister(), SP, in_off.Int32Value());
+  StoreToOffset(scratch.AsXRegister(), SP, dest_off.Int32Value() + 8);
+}
+
+// Load routines.
+void Arm64JNIMacroAssembler::LoadImmediate(XRegister dest, int32_t value, Condition cond) {
+  if ((cond == al) || (cond == nv)) {
+    ___ Mov(reg_x(dest), value);
+  } else {
+    // temp = value
+    // rd = cond ? temp : rd
+    if (value != 0) {
+      UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+      temps.Exclude(reg_x(dest));
+      Register temp = temps.AcquireX();
+      ___ Mov(temp, value);
+      ___ Csel(reg_x(dest), temp, reg_x(dest), cond);
+    } else {
+      ___ Csel(reg_x(dest), reg_x(XZR), reg_x(dest), cond);
+    }
+  }
+}
+
+void Arm64JNIMacroAssembler::LoadWFromOffset(LoadOperandType type,
+                                             WRegister dest,
+                                             XRegister base,
+                                             int32_t offset) {
+  switch (type) {
+    case kLoadSignedByte:
+      ___ Ldrsb(reg_w(dest), MEM_OP(reg_x(base), offset));
+      break;
+    case kLoadSignedHalfword:
+      ___ Ldrsh(reg_w(dest), MEM_OP(reg_x(base), offset));
+      break;
+    case kLoadUnsignedByte:
+      ___ Ldrb(reg_w(dest), MEM_OP(reg_x(base), offset));
+      break;
+    case kLoadUnsignedHalfword:
+      ___ Ldrh(reg_w(dest), MEM_OP(reg_x(base), offset));
+      break;
+    case kLoadWord:
+      ___ Ldr(reg_w(dest), MEM_OP(reg_x(base), offset));
+      break;
+    default:
+        LOG(FATAL) << "UNREACHABLE";
+  }
+}
+
+// Note: We can extend this member by adding load type info - see
+// sign extended A64 load variants.
+void Arm64JNIMacroAssembler::LoadFromOffset(XRegister dest, XRegister base, int32_t offset) {
+  CHECK_NE(dest, SP);
+  ___ Ldr(reg_x(dest), MEM_OP(reg_x(base), offset));
+}
+
+void Arm64JNIMacroAssembler::LoadSFromOffset(SRegister dest, XRegister base, int32_t offset) {
+  ___ Ldr(reg_s(dest), MEM_OP(reg_x(base), offset));
+}
+
+void Arm64JNIMacroAssembler::LoadDFromOffset(DRegister dest, XRegister base, int32_t offset) {
+  ___ Ldr(reg_d(dest), MEM_OP(reg_x(base), offset));
+}
+
+void Arm64JNIMacroAssembler::Load(Arm64ManagedRegister dest,
+                                  XRegister base,
+                                  int32_t offset,
+                                  size_t size) {
+  if (dest.IsNoRegister()) {
+    CHECK_EQ(0u, size) << dest;
+  } else if (dest.IsWRegister()) {
+    CHECK_EQ(4u, size) << dest;
+    ___ Ldr(reg_w(dest.AsWRegister()), MEM_OP(reg_x(base), offset));
+  } else if (dest.IsXRegister()) {
+    CHECK_NE(dest.AsXRegister(), SP) << dest;
+    if (size == 4u) {
+      ___ Ldr(reg_w(dest.AsOverlappingWRegister()), MEM_OP(reg_x(base), offset));
+    } else {
+      CHECK_EQ(8u, size) << dest;
+      ___ Ldr(reg_x(dest.AsXRegister()), MEM_OP(reg_x(base), offset));
+    }
+  } else if (dest.IsSRegister()) {
+    ___ Ldr(reg_s(dest.AsSRegister()), MEM_OP(reg_x(base), offset));
+  } else {
+    CHECK(dest.IsDRegister()) << dest;
+    ___ Ldr(reg_d(dest.AsDRegister()), MEM_OP(reg_x(base), offset));
+  }
+}
+
+void Arm64JNIMacroAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) {
+  return Load(m_dst.AsArm64(), SP, src.Int32Value(), size);
+}
+
+void Arm64JNIMacroAssembler::LoadFromThread(ManagedRegister m_dst,
+                                            ThreadOffset64 src,
+                                            size_t size) {
+  return Load(m_dst.AsArm64(), TR, src.Int32Value(), size);
+}
+
+void Arm64JNIMacroAssembler::LoadRef(ManagedRegister m_dst, FrameOffset offs) {
+  Arm64ManagedRegister dst = m_dst.AsArm64();
+  CHECK(dst.IsXRegister()) << dst;
+  LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), SP, offs.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::LoadRef(ManagedRegister m_dst,
+                                     ManagedRegister m_base,
+                                     MemberOffset offs,
+                                     bool unpoison_reference) {
+  Arm64ManagedRegister dst = m_dst.AsArm64();
+  Arm64ManagedRegister base = m_base.AsArm64();
+  CHECK(dst.IsXRegister() && base.IsXRegister());
+  LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), base.AsXRegister(),
+                  offs.Int32Value());
+  if (unpoison_reference) {
+    WRegister ref_reg = dst.AsOverlappingWRegister();
+    asm_.MaybeUnpoisonHeapReference(reg_w(ref_reg));
+  }
+}
+
+void Arm64JNIMacroAssembler::LoadRawPtr(ManagedRegister m_dst,
+                                        ManagedRegister m_base,
+                                        Offset offs) {
+  Arm64ManagedRegister dst = m_dst.AsArm64();
+  Arm64ManagedRegister base = m_base.AsArm64();
+  CHECK(dst.IsXRegister() && base.IsXRegister());
+  // Remove dst and base form the temp list - higher level API uses IP1, IP0.
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(reg_x(dst.AsXRegister()), reg_x(base.AsXRegister()));
+  ___ Ldr(reg_x(dst.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value()));
+}
+
+void Arm64JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister m_dst, ThreadOffset64 offs) {
+  Arm64ManagedRegister dst = m_dst.AsArm64();
+  CHECK(dst.IsXRegister()) << dst;
+  LoadFromOffset(dst.AsXRegister(), TR, offs.Int32Value());
+}
+
+// Copying routines.
+void Arm64JNIMacroAssembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t size) {
+  Arm64ManagedRegister dst = m_dst.AsArm64();
+  Arm64ManagedRegister src = m_src.AsArm64();
+  if (!dst.Equals(src)) {
+    if (dst.IsXRegister()) {
+      if (size == 4) {
+        CHECK(src.IsWRegister());
+        ___ Mov(reg_w(dst.AsOverlappingWRegister()), reg_w(src.AsWRegister()));
+      } else {
+        if (src.IsXRegister()) {
+          ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsXRegister()));
+        } else {
+          ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsOverlappingXRegister()));
+        }
+      }
+    } else if (dst.IsWRegister()) {
+      CHECK(src.IsWRegister()) << src;
+      ___ Mov(reg_w(dst.AsWRegister()), reg_w(src.AsWRegister()));
+    } else if (dst.IsSRegister()) {
+      CHECK(src.IsSRegister()) << src;
+      ___ Fmov(reg_s(dst.AsSRegister()), reg_s(src.AsSRegister()));
+    } else {
+      CHECK(dst.IsDRegister()) << dst;
+      CHECK(src.IsDRegister()) << src;
+      ___ Fmov(reg_d(dst.AsDRegister()), reg_d(src.AsDRegister()));
+    }
+  }
+}
+
+void Arm64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                                  ThreadOffset64 tr_offs,
+                                                  ManagedRegister m_scratch) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  LoadFromOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
+  StoreToOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset64 tr_offs,
+                                                FrameOffset fr_offs,
+                                                ManagedRegister m_scratch) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  LoadFromOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value());
+  StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister m_scratch) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(),
+                  SP, src.Int32Value());
+  StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(),
+                 SP, dest.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::Copy(FrameOffset dest,
+                                  FrameOffset src,
+                                  ManagedRegister m_scratch,
+                                  size_t size) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  CHECK(size == 4 || size == 8) << size;
+  if (size == 4) {
+    LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP, src.Int32Value());
+    StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP, dest.Int32Value());
+  } else if (size == 8) {
+    LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value());
+    StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value());
+  } else {
+    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
+  }
+}
+
+void Arm64JNIMacroAssembler::Copy(FrameOffset dest,
+                                  ManagedRegister src_base,
+                                  Offset src_offset,
+                                  ManagedRegister m_scratch,
+                                  size_t size) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  Arm64ManagedRegister base = src_base.AsArm64();
+  CHECK(base.IsXRegister()) << base;
+  CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch;
+  CHECK(size == 4 || size == 8) << size;
+  if (size == 4) {
+    LoadWFromOffset(kLoadWord, scratch.AsWRegister(), base.AsXRegister(),
+                   src_offset.Int32Value());
+    StoreWToOffset(kStoreWord, scratch.AsWRegister(), SP, dest.Int32Value());
+  } else if (size == 8) {
+    LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), src_offset.Int32Value());
+    StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value());
+  } else {
+    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
+  }
+}
+
+void Arm64JNIMacroAssembler::Copy(ManagedRegister m_dest_base,
+                                  Offset dest_offs,
+                                  FrameOffset src,
+                                  ManagedRegister m_scratch,
+                                  size_t size) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  Arm64ManagedRegister base = m_dest_base.AsArm64();
+  CHECK(base.IsXRegister()) << base;
+  CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch;
+  CHECK(size == 4 || size == 8) << size;
+  if (size == 4) {
+    LoadWFromOffset(kLoadWord, scratch.AsWRegister(), SP, src.Int32Value());
+    StoreWToOffset(kStoreWord, scratch.AsWRegister(), base.AsXRegister(),
+                   dest_offs.Int32Value());
+  } else if (size == 8) {
+    LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value());
+    StoreToOffset(scratch.AsXRegister(), base.AsXRegister(), dest_offs.Int32Value());
+  } else {
+    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
+  }
+}
+
+void Arm64JNIMacroAssembler::Copy(FrameOffset /*dst*/,
+                                  FrameOffset /*src_base*/,
+                                  Offset /*src_offset*/,
+                                  ManagedRegister /*mscratch*/,
+                                  size_t /*size*/) {
+  UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant";
+}
+
+void Arm64JNIMacroAssembler::Copy(ManagedRegister m_dest,
+                                  Offset dest_offset,
+                                  ManagedRegister m_src,
+                                  Offset src_offset,
+                                  ManagedRegister m_scratch,
+                                  size_t size) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  Arm64ManagedRegister src = m_src.AsArm64();
+  Arm64ManagedRegister dest = m_dest.AsArm64();
+  CHECK(dest.IsXRegister()) << dest;
+  CHECK(src.IsXRegister()) << src;
+  CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch;
+  CHECK(size == 4 || size == 8) << size;
+  if (size == 4) {
+    if (scratch.IsWRegister()) {
+      LoadWFromOffset(kLoadWord, scratch.AsWRegister(), src.AsXRegister(),
+                    src_offset.Int32Value());
+      StoreWToOffset(kStoreWord, scratch.AsWRegister(), dest.AsXRegister(),
+                   dest_offset.Int32Value());
+    } else {
+      LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), src.AsXRegister(),
+                    src_offset.Int32Value());
+      StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), dest.AsXRegister(),
+                   dest_offset.Int32Value());
+    }
+  } else if (size == 8) {
+    LoadFromOffset(scratch.AsXRegister(), src.AsXRegister(), src_offset.Int32Value());
+    StoreToOffset(scratch.AsXRegister(), dest.AsXRegister(), dest_offset.Int32Value());
+  } else {
+    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
+  }
+}
+
+void Arm64JNIMacroAssembler::Copy(FrameOffset /*dst*/,
+                                  Offset /*dest_offset*/,
+                                  FrameOffset /*src*/,
+                                  Offset /*src_offset*/,
+                                  ManagedRegister /*scratch*/,
+                                  size_t /*size*/) {
+  UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant";
+}
+
+void Arm64JNIMacroAssembler::MemoryBarrier(ManagedRegister m_scratch ATTRIBUTE_UNUSED) {
+  // TODO: Should we check that m_scratch is IP? - see arm.
+  ___ Dmb(InnerShareable, BarrierAll);
+}
+
+void Arm64JNIMacroAssembler::SignExtend(ManagedRegister mreg, size_t size) {
+  Arm64ManagedRegister reg = mreg.AsArm64();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsWRegister()) << reg;
+  if (size == 1) {
+    ___ Sxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
+  } else {
+    ___ Sxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
+  }
+}
+
+void Arm64JNIMacroAssembler::ZeroExtend(ManagedRegister mreg, size_t size) {
+  Arm64ManagedRegister reg = mreg.AsArm64();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsWRegister()) << reg;
+  if (size == 1) {
+    ___ Uxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
+  } else {
+    ___ Uxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
+  }
+}
+
+void Arm64JNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references.
+}
+
+void Arm64JNIMacroAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references.
+}
+
+void Arm64JNIMacroAssembler::Call(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch) {
+  Arm64ManagedRegister base = m_base.AsArm64();
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(base.IsXRegister()) << base;
+  CHECK(scratch.IsXRegister()) << scratch;
+  LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), offs.Int32Value());
+  ___ Blr(reg_x(scratch.AsXRegister()));
+}
+
+void Arm64JNIMacroAssembler::Call(FrameOffset base, Offset offs, ManagedRegister m_scratch) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  // Call *(*(SP + base) + offset)
+  LoadFromOffset(scratch.AsXRegister(), SP, base.Int32Value());
+  LoadFromOffset(scratch.AsXRegister(), scratch.AsXRegister(), offs.Int32Value());
+  ___ Blr(reg_x(scratch.AsXRegister()));
+}
+
+void Arm64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset ATTRIBUTE_UNUSED,
+                                            ManagedRegister scratch ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "Unimplemented Call() variant";
+}
+
+void Arm64JNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister m_out_reg,
+                                                    FrameOffset handle_scope_offs,
+                                                    ManagedRegister m_in_reg,
+                                                    bool null_allowed) {
+  Arm64ManagedRegister out_reg = m_out_reg.AsArm64();
+  Arm64ManagedRegister in_reg = m_in_reg.AsArm64();
+  // For now we only hold stale handle scope entries in x registers.
+  CHECK(in_reg.IsNoRegister() || in_reg.IsXRegister()) << in_reg;
+  CHECK(out_reg.IsXRegister()) << out_reg;
+  if (null_allowed) {
+    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
+    // the address in the handle scope holding the reference.
+    // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
+    if (in_reg.IsNoRegister()) {
+      LoadWFromOffset(kLoadWord, out_reg.AsOverlappingWRegister(), SP,
+                      handle_scope_offs.Int32Value());
+      in_reg = out_reg;
+    }
+    ___ Cmp(reg_w(in_reg.AsOverlappingWRegister()), 0);
+    if (!out_reg.Equals(in_reg)) {
+      LoadImmediate(out_reg.AsXRegister(), 0, eq);
+    }
+    AddConstant(out_reg.AsXRegister(), SP, handle_scope_offs.Int32Value(), ne);
+  } else {
+    AddConstant(out_reg.AsXRegister(), SP, handle_scope_offs.Int32Value(), al);
+  }
+}
+
+void Arm64JNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off,
+                                                    FrameOffset handle_scope_offset,
+                                                    ManagedRegister m_scratch,
+                                                    bool null_allowed) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  if (null_allowed) {
+    LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP,
+                    handle_scope_offset.Int32Value());
+    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
+    // the address in the handle scope holding the reference.
+    // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
+    ___ Cmp(reg_w(scratch.AsOverlappingWRegister()), 0);
+    // Move this logic in add constants with flags.
+    AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), ne);
+  } else {
+    AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), al);
+  }
+  StoreToOffset(scratch.AsXRegister(), SP, out_off.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::LoadReferenceFromHandleScope(ManagedRegister m_out_reg,
+                                                          ManagedRegister m_in_reg) {
+  Arm64ManagedRegister out_reg = m_out_reg.AsArm64();
+  Arm64ManagedRegister in_reg = m_in_reg.AsArm64();
+  CHECK(out_reg.IsXRegister()) << out_reg;
+  CHECK(in_reg.IsXRegister()) << in_reg;
+  vixl::aarch64::Label exit;
+  if (!out_reg.Equals(in_reg)) {
+    // FIXME: Who sets the flags here?
+    LoadImmediate(out_reg.AsXRegister(), 0, eq);
+  }
+  ___ Cbz(reg_x(in_reg.AsXRegister()), &exit);
+  LoadFromOffset(out_reg.AsXRegister(), in_reg.AsXRegister(), 0);
+  ___ Bind(&exit);
+}
+
+void Arm64JNIMacroAssembler::ExceptionPoll(ManagedRegister m_scratch, size_t stack_adjust) {
+  CHECK_ALIGNED(stack_adjust, kStackAlignment);
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  exception_blocks_.emplace_back(new Arm64Exception(scratch, stack_adjust));
+  LoadFromOffset(scratch.AsXRegister(),
+                 TR,
+                 Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
+  ___ Cbnz(reg_x(scratch.AsXRegister()), exception_blocks_.back()->Entry());
+}
+
+void Arm64JNIMacroAssembler::EmitExceptionPoll(Arm64Exception *exception) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(reg_x(exception->scratch_.AsXRegister()));
+  Register temp = temps.AcquireX();
+
+  // Bind exception poll entry.
+  ___ Bind(exception->Entry());
+  if (exception->stack_adjust_ != 0) {  // Fix up the frame.
+    DecreaseFrameSize(exception->stack_adjust_);
+  }
+  // Pass exception object as argument.
+  // Don't care about preserving X0 as this won't return.
+  ___ Mov(reg_x(X0), reg_x(exception->scratch_.AsXRegister()));
+  ___ Ldr(temp,
+          MEM_OP(reg_x(TR),
+                 QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pDeliverException).Int32Value()));
+
+  ___ Blr(temp);
+  // Call should never return.
+  ___ Brk();
+}
+
+void Arm64JNIMacroAssembler::BuildFrame(size_t frame_size,
+                                        ManagedRegister method_reg,
+                                        ArrayRef<const ManagedRegister> callee_save_regs,
+                                        const ManagedRegisterEntrySpills& entry_spills) {
+  // Setup VIXL CPURegList for callee-saves.
+  CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0);
+  CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0);
+  for (auto r : callee_save_regs) {
+    Arm64ManagedRegister reg = r.AsArm64();
+    if (reg.IsXRegister()) {
+      core_reg_list.Combine(reg_x(reg.AsXRegister()).GetCode());
+    } else {
+      DCHECK(reg.IsDRegister());
+      fp_reg_list.Combine(reg_d(reg.AsDRegister()).GetCode());
+    }
+  }
+  size_t core_reg_size = core_reg_list.GetTotalSizeInBytes();
+  size_t fp_reg_size = fp_reg_list.GetTotalSizeInBytes();
+
+  // Increase frame to required size.
+  DCHECK_ALIGNED(frame_size, kStackAlignment);
+  DCHECK_GE(frame_size, core_reg_size + fp_reg_size + static_cast<size_t>(kArm64PointerSize));
+  IncreaseFrameSize(frame_size);
+
+  // Save callee-saves.
+  asm_.SpillRegisters(core_reg_list, frame_size - core_reg_size);
+  asm_.SpillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size);
+
+  DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR)));
+
+  // Write ArtMethod*
+  DCHECK(X0 == method_reg.AsArm64().AsXRegister());
+  StoreToOffset(X0, SP, 0);
+
+  // Write out entry spills
+  int32_t offset = frame_size + static_cast<size_t>(kArm64PointerSize);
+  for (size_t i = 0; i < entry_spills.size(); ++i) {
+    Arm64ManagedRegister reg = entry_spills.at(i).AsArm64();
+    if (reg.IsNoRegister()) {
+      // only increment stack offset.
+      ManagedRegisterSpill spill = entry_spills.at(i);
+      offset += spill.getSize();
+    } else if (reg.IsXRegister()) {
+      StoreToOffset(reg.AsXRegister(), SP, offset);
+      offset += 8;
+    } else if (reg.IsWRegister()) {
+      StoreWToOffset(kStoreWord, reg.AsWRegister(), SP, offset);
+      offset += 4;
+    } else if (reg.IsDRegister()) {
+      StoreDToOffset(reg.AsDRegister(), SP, offset);
+      offset += 8;
+    } else if (reg.IsSRegister()) {
+      StoreSToOffset(reg.AsSRegister(), SP, offset);
+      offset += 4;
+    }
+  }
+}
+
+void Arm64JNIMacroAssembler::RemoveFrame(size_t frame_size,
+                                         ArrayRef<const ManagedRegister> callee_save_regs) {
+  // Setup VIXL CPURegList for callee-saves.
+  CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0);
+  CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0);
+  for (auto r : callee_save_regs) {
+    Arm64ManagedRegister reg = r.AsArm64();
+    if (reg.IsXRegister()) {
+      core_reg_list.Combine(reg_x(reg.AsXRegister()).GetCode());
+    } else {
+      DCHECK(reg.IsDRegister());
+      fp_reg_list.Combine(reg_d(reg.AsDRegister()).GetCode());
+    }
+  }
+  size_t core_reg_size = core_reg_list.GetTotalSizeInBytes();
+  size_t fp_reg_size = fp_reg_list.GetTotalSizeInBytes();
+
+  // For now we only check that the size of the frame is large enough to hold spills and method
+  // reference.
+  DCHECK_GE(frame_size, core_reg_size + fp_reg_size + static_cast<size_t>(kArm64PointerSize));
+  DCHECK_ALIGNED(frame_size, kStackAlignment);
+
+  DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR)));
+
+  cfi().RememberState();
+
+  // Restore callee-saves.
+  asm_.UnspillRegisters(core_reg_list, frame_size - core_reg_size);
+  asm_.UnspillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size);
+
+  // Decrease frame size to start of callee saved regs.
+  DecreaseFrameSize(frame_size);
+
+  // Pop callee saved and return to LR.
+  ___ Ret();
+
+  // The CFI should be restored for any code that follows the exit block.
+  cfi().RestoreState();
+  cfi().DefCFAOffset(frame_size);
+}
+
+#undef ___
+
+}  // namespace arm64
+}  // namespace art
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h
new file mode 100644
index 0000000..79ee441
--- /dev/null
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h
@@ -0,0 +1,230 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_ARM64_JNI_MACRO_ASSEMBLER_ARM64_H_
+#define ART_COMPILER_UTILS_ARM64_JNI_MACRO_ASSEMBLER_ARM64_H_
+
+#include <stdint.h>
+#include <memory>
+#include <vector>
+
+#include "assembler_arm64.h"
+#include "base/arena_containers.h"
+#include "base/enums.h"
+#include "base/logging.h"
+#include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
+#include "offsets.h"
+
+// TODO: make vixl clean wrt -Wshadow, -Wunknown-pragmas, -Wmissing-noreturn
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunknown-pragmas"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wmissing-noreturn"
+#include "a64/macro-assembler-a64.h"
+#pragma GCC diagnostic pop
+
+namespace art {
+namespace arm64 {
+
+class Arm64JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<Arm64Assembler, PointerSize::k64> {
+ public:
+  explicit Arm64JNIMacroAssembler(ArenaAllocator* arena)
+      : JNIMacroAssemblerFwd(arena),
+        exception_blocks_(arena->Adapter(kArenaAllocAssembler)) {}
+
+  ~Arm64JNIMacroAssembler();
+
+  // Finalize the code.
+  void FinalizeCode() OVERRIDE;
+
+  // Emit code that will create an activation on the stack.
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
+                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
+
+  // Emit code that will remove an activation from the stack.
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
+      OVERRIDE;
+
+  void IncreaseFrameSize(size_t adjust) OVERRIDE;
+  void DecreaseFrameSize(size_t adjust) OVERRIDE;
+
+  // Store routines.
+  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
+  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
+  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
+  void StoreStackOffsetToThread(ThreadOffset64 thr_offs,
+                                FrameOffset fr_offs,
+                                ManagedRegister scratch) OVERRIDE;
+  void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE;
+  void StoreSpanning(FrameOffset dest,
+                     ManagedRegister src,
+                     FrameOffset in_off,
+                     ManagedRegister scratch) OVERRIDE;
+
+  // Load routines.
+  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
+  void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) OVERRIDE;
+  void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
+  void LoadRef(ManagedRegister dest,
+               ManagedRegister base,
+               MemberOffset offs,
+               bool unpoison_reference) OVERRIDE;
+  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
+  void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) OVERRIDE;
+
+  // Copying routines.
+  void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset64 thr_offs,
+                            ManagedRegister scratch) OVERRIDE;
+  void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
+      OVERRIDE;
+  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
+  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
+  void Copy(FrameOffset dest,
+            ManagedRegister src_base,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+  void Copy(ManagedRegister dest_base,
+            Offset dest_offset,
+            FrameOffset src,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+  void Copy(FrameOffset dest,
+            FrameOffset src_base,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+  void Copy(ManagedRegister dest,
+            Offset dest_offset,
+            ManagedRegister src,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+  void Copy(FrameOffset dest,
+            Offset dest_offset,
+            FrameOffset src,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
+
+  // Sign extension.
+  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Zero extension.
+  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Exploit fast access in managed code to Thread::Current().
+  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
+  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
+
+  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed. in_reg holds a possibly stale reference
+  // that can be used to avoid loading the handle scope entry to see if the value is
+  // null.
+  void CreateHandleScopeEntry(ManagedRegister out_reg,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister in_reg,
+                              bool null_allowed) OVERRIDE;
+
+  // Set up out_off to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed.
+  void CreateHandleScopeEntry(FrameOffset out_off,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister scratch,
+                              bool null_allowed) OVERRIDE;
+
+  // src holds a handle scope entry (Object**) load this into dst.
+  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
+
+  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
+  // know that src may not be null.
+  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
+  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
+
+  // Call to address held at [base+offset].
+  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void CallFromThread(ThreadOffset64 offset, ManagedRegister scratch) OVERRIDE;
+
+  // Generate code to check if Thread::Current()->exception_ is non-null
+  // and branch to a ExceptionSlowPath if it is.
+  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
+
+ private:
+  class Arm64Exception {
+   public:
+    Arm64Exception(Arm64ManagedRegister scratch, size_t stack_adjust)
+        : scratch_(scratch), stack_adjust_(stack_adjust) {}
+
+    vixl::aarch64::Label* Entry() { return &exception_entry_; }
+
+    // Register used for passing Thread::Current()->exception_ .
+    const Arm64ManagedRegister scratch_;
+
+    // Stack adjust for ExceptionPool.
+    const size_t stack_adjust_;
+
+    vixl::aarch64::Label exception_entry_;
+
+   private:
+    DISALLOW_COPY_AND_ASSIGN(Arm64Exception);
+  };
+
+  // Emits Exception block.
+  void EmitExceptionPoll(Arm64Exception *exception);
+
+  void StoreWToOffset(StoreOperandType type,
+                      WRegister source,
+                      XRegister base,
+                      int32_t offset);
+  void StoreToOffset(XRegister source, XRegister base, int32_t offset);
+  void StoreSToOffset(SRegister source, XRegister base, int32_t offset);
+  void StoreDToOffset(DRegister source, XRegister base, int32_t offset);
+
+  void LoadImmediate(XRegister dest,
+                     int32_t value,
+                     vixl::aarch64::Condition cond = vixl::aarch64::al);
+  void Load(Arm64ManagedRegister dst, XRegister src, int32_t src_offset, size_t size);
+  void LoadWFromOffset(LoadOperandType type,
+                       WRegister dest,
+                       XRegister base,
+                       int32_t offset);
+  void LoadFromOffset(XRegister dest, XRegister base, int32_t offset);
+  void LoadSFromOffset(SRegister dest, XRegister base, int32_t offset);
+  void LoadDFromOffset(DRegister dest, XRegister base, int32_t offset);
+  void AddConstant(XRegister rd,
+                   int32_t value,
+                   vixl::aarch64::Condition cond = vixl::aarch64::al);
+  void AddConstant(XRegister rd,
+                   XRegister rn,
+                   int32_t value,
+                   vixl::aarch64::Condition cond = vixl::aarch64::al);
+
+  // List of exception blocks to generate at the end of the code cache.
+  ArenaVector<std::unique_ptr<Arm64Exception>> exception_blocks_;
+};
+
+}  // namespace arm64
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_ARM64_JNI_MACRO_ASSEMBLER_ARM64_H_
diff --git a/compiler/utils/arm64/managed_register_arm64.h b/compiler/utils/arm64/managed_register_arm64.h
index f7d74d2..7378a0a 100644
--- a/compiler/utils/arm64/managed_register_arm64.h
+++ b/compiler/utils/arm64/managed_register_arm64.h
@@ -17,8 +17,8 @@
 #ifndef ART_COMPILER_UTILS_ARM64_MANAGED_REGISTER_ARM64_H_
 #define ART_COMPILER_UTILS_ARM64_MANAGED_REGISTER_ARM64_H_
 
+#include "arch/arm64/registers_arm64.h"
 #include "base/logging.h"
-#include "constants_arm64.h"
 #include "debug/dwarf/register.h"
 #include "utils/managed_register.h"
 
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index 0a1b733..81159e6 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -121,137 +121,4 @@
   }
 }
 
-std::unique_ptr<Assembler> Assembler::Create(
-    ArenaAllocator* arena,
-    InstructionSet instruction_set,
-    const InstructionSetFeatures* instruction_set_features) {
-  switch (instruction_set) {
-#ifdef ART_ENABLE_CODEGEN_arm
-    case kArm:
-      return std::unique_ptr<Assembler>(new (arena) arm::Arm32Assembler(arena));
-    case kThumb2:
-      return std::unique_ptr<Assembler>(new (arena) arm::Thumb2Assembler(arena));
-#endif
-#ifdef ART_ENABLE_CODEGEN_arm64
-    case kArm64:
-      return std::unique_ptr<Assembler>(new (arena) arm64::Arm64Assembler(arena));
-#endif
-#ifdef ART_ENABLE_CODEGEN_mips
-    case kMips:
-      return std::unique_ptr<Assembler>(new (arena) mips::MipsAssembler(
-          arena,
-          instruction_set_features != nullptr
-              ? instruction_set_features->AsMipsInstructionSetFeatures()
-              : nullptr));
-#endif
-#ifdef ART_ENABLE_CODEGEN_mips64
-    case kMips64:
-      return std::unique_ptr<Assembler>(new (arena) mips64::Mips64Assembler(arena));
-#endif
-#ifdef ART_ENABLE_CODEGEN_x86
-    case kX86:
-      return std::unique_ptr<Assembler>(new (arena) x86::X86Assembler(arena));
-#endif
-#ifdef ART_ENABLE_CODEGEN_x86_64
-    case kX86_64:
-      return std::unique_ptr<Assembler>(new (arena) x86_64::X86_64Assembler(arena));
-#endif
-    default:
-      LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
-      return nullptr;
-  }
-}
-
-void Assembler::StoreImmediateToThread32(ThreadOffset32 dest ATTRIBUTE_UNUSED,
-                                         uint32_t imm ATTRIBUTE_UNUSED,
-                                         ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::StoreImmediateToThread64(ThreadOffset64 dest ATTRIBUTE_UNUSED,
-                                         uint32_t imm ATTRIBUTE_UNUSED,
-                                         ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::StoreStackOffsetToThread32(
-    ThreadOffset32 thr_offs ATTRIBUTE_UNUSED,
-    FrameOffset fr_offs ATTRIBUTE_UNUSED,
-    ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::StoreStackOffsetToThread64(
-    ThreadOffset64 thr_offs ATTRIBUTE_UNUSED,
-    FrameOffset fr_offs ATTRIBUTE_UNUSED,
-    ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::StoreStackPointerToThread32(
-    ThreadOffset32 thr_offs ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::StoreStackPointerToThread64(
-    ThreadOffset64 thr_offs ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::LoadFromThread32(ManagedRegister dest ATTRIBUTE_UNUSED,
-                                 ThreadOffset32 src ATTRIBUTE_UNUSED,
-                                 size_t size ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::LoadFromThread64(ManagedRegister dest ATTRIBUTE_UNUSED,
-                                 ThreadOffset64 src ATTRIBUTE_UNUSED,
-                                 size_t size ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::LoadRawPtrFromThread32(ManagedRegister dest ATTRIBUTE_UNUSED,
-                                       ThreadOffset32 offs ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::LoadRawPtrFromThread64(ManagedRegister dest ATTRIBUTE_UNUSED,
-                                       ThreadOffset64 offs ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CopyRawPtrFromThread32(FrameOffset fr_offs ATTRIBUTE_UNUSED,
-                                       ThreadOffset32 thr_offs ATTRIBUTE_UNUSED,
-                                       ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs ATTRIBUTE_UNUSED,
-                                       ThreadOffset64 thr_offs ATTRIBUTE_UNUSED,
-                                       ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CopyRawPtrToThread32(ThreadOffset32 thr_offs ATTRIBUTE_UNUSED,
-                                     FrameOffset fr_offs ATTRIBUTE_UNUSED,
-                                     ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CopyRawPtrToThread64(ThreadOffset64 thr_offs ATTRIBUTE_UNUSED,
-                                     FrameOffset fr_offs ATTRIBUTE_UNUSED,
-                                     ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CallFromThread32(ThreadOffset32 offset ATTRIBUTE_UNUSED,
-                                 ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CallFromThread64(ThreadOffset64 offset ATTRIBUTE_UNUSED,
-                                 ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
 }  // namespace art
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 89f7947..8981776 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -356,11 +356,6 @@
 
 class Assembler : public DeletableArenaObject<kArenaAllocAssembler> {
  public:
-  static std::unique_ptr<Assembler> Create(
-      ArenaAllocator* arena,
-      InstructionSet instruction_set,
-      const InstructionSetFeatures* instruction_set_features = nullptr);
-
   // Finalize the code; emit slow paths, fixup branches, add literal pool, etc.
   virtual void FinalizeCode() { buffer_.EmitSlowPaths(this); }
 
@@ -376,144 +371,6 @@
   // TODO: Implement with disassembler.
   virtual void Comment(const char* format ATTRIBUTE_UNUSED, ...) {}
 
-  // Emit code that will create an activation on the stack
-  virtual void BuildFrame(size_t frame_size,
-                          ManagedRegister method_reg,
-                          ArrayRef<const ManagedRegister> callee_save_regs,
-                          const ManagedRegisterEntrySpills& entry_spills) = 0;
-
-  // Emit code that will remove an activation from the stack
-  virtual void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs) = 0;
-
-  virtual void IncreaseFrameSize(size_t adjust) = 0;
-  virtual void DecreaseFrameSize(size_t adjust) = 0;
-
-  // Store routines
-  virtual void Store(FrameOffset offs, ManagedRegister src, size_t size) = 0;
-  virtual void StoreRef(FrameOffset dest, ManagedRegister src) = 0;
-  virtual void StoreRawPtr(FrameOffset dest, ManagedRegister src) = 0;
-
-  virtual void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) = 0;
-
-  virtual void StoreImmediateToThread32(ThreadOffset32 dest,
-                                        uint32_t imm,
-                                        ManagedRegister scratch);
-  virtual void StoreImmediateToThread64(ThreadOffset64 dest,
-                                        uint32_t imm,
-                                        ManagedRegister scratch);
-
-  virtual void StoreStackOffsetToThread32(ThreadOffset32 thr_offs,
-                                          FrameOffset fr_offs,
-                                          ManagedRegister scratch);
-  virtual void StoreStackOffsetToThread64(ThreadOffset64 thr_offs,
-                                          FrameOffset fr_offs,
-                                          ManagedRegister scratch);
-
-  virtual void StoreStackPointerToThread32(ThreadOffset32 thr_offs);
-  virtual void StoreStackPointerToThread64(ThreadOffset64 thr_offs);
-
-  virtual void StoreSpanning(FrameOffset dest, ManagedRegister src,
-                             FrameOffset in_off, ManagedRegister scratch) = 0;
-
-  // Load routines
-  virtual void Load(ManagedRegister dest, FrameOffset src, size_t size) = 0;
-
-  virtual void LoadFromThread32(ManagedRegister dest, ThreadOffset32 src, size_t size);
-  virtual void LoadFromThread64(ManagedRegister dest, ThreadOffset64 src, size_t size);
-
-  virtual void LoadRef(ManagedRegister dest, FrameOffset src) = 0;
-  // If unpoison_reference is true and kPoisonReference is true, then we negate the read reference.
-  virtual void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
-                       bool unpoison_reference) = 0;
-
-  virtual void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) = 0;
-
-  virtual void LoadRawPtrFromThread32(ManagedRegister dest, ThreadOffset32 offs);
-  virtual void LoadRawPtrFromThread64(ManagedRegister dest, ThreadOffset64 offs);
-
-  // Copying routines
-  virtual void Move(ManagedRegister dest, ManagedRegister src, size_t size) = 0;
-
-  virtual void CopyRawPtrFromThread32(FrameOffset fr_offs,
-                                      ThreadOffset32 thr_offs,
-                                      ManagedRegister scratch);
-  virtual void CopyRawPtrFromThread64(FrameOffset fr_offs,
-                                      ThreadOffset64 thr_offs,
-                                      ManagedRegister scratch);
-
-  virtual void CopyRawPtrToThread32(ThreadOffset32 thr_offs,
-                                    FrameOffset fr_offs,
-                                    ManagedRegister scratch);
-  virtual void CopyRawPtrToThread64(ThreadOffset64 thr_offs,
-                                    FrameOffset fr_offs,
-                                    ManagedRegister scratch);
-
-  virtual void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) = 0;
-
-  virtual void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) = 0;
-
-  virtual void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset,
-                    ManagedRegister scratch, size_t size) = 0;
-
-  virtual void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
-                    ManagedRegister scratch, size_t size) = 0;
-
-  virtual void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset,
-                    ManagedRegister scratch, size_t size) = 0;
-
-  virtual void Copy(ManagedRegister dest, Offset dest_offset,
-                    ManagedRegister src, Offset src_offset,
-                    ManagedRegister scratch, size_t size) = 0;
-
-  virtual void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-                    ManagedRegister scratch, size_t size) = 0;
-
-  virtual void MemoryBarrier(ManagedRegister scratch) = 0;
-
-  // Sign extension
-  virtual void SignExtend(ManagedRegister mreg, size_t size) = 0;
-
-  // Zero extension
-  virtual void ZeroExtend(ManagedRegister mreg, size_t size) = 0;
-
-  // Exploit fast access in managed code to Thread::Current()
-  virtual void GetCurrentThread(ManagedRegister tr) = 0;
-  virtual void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) = 0;
-
-  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the handle scope entry to see if the value is
-  // null.
-  virtual void CreateHandleScopeEntry(ManagedRegister out_reg,
-                                      FrameOffset handlescope_offset,
-                                      ManagedRegister in_reg,
-                                      bool null_allowed) = 0;
-
-  // Set up out_off to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed.
-  virtual void CreateHandleScopeEntry(FrameOffset out_off,
-                                      FrameOffset handlescope_offset,
-                                      ManagedRegister scratch,
-                                      bool null_allowed) = 0;
-
-  // src holds a handle scope entry (Object**) load this into dst
-  virtual void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) = 0;
-
-  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
-  // know that src may not be null.
-  virtual void VerifyObject(ManagedRegister src, bool could_be_null) = 0;
-  virtual void VerifyObject(FrameOffset src, bool could_be_null) = 0;
-
-  // Call to address held at [base+offset]
-  virtual void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) = 0;
-  virtual void Call(FrameOffset base, Offset offset, ManagedRegister scratch) = 0;
-  virtual void CallFromThread32(ThreadOffset32 offset, ManagedRegister scratch);
-  virtual void CallFromThread64(ThreadOffset64 offset, ManagedRegister scratch);
-
-  // Generate code to check if Thread::Current()->exception_ is non-null
-  // and branch to a ExceptionSlowPath if it is.
-  virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0;
-
   virtual void Bind(Label* label) = 0;
   virtual void Jump(Label* label) = 0;
 
@@ -525,13 +382,17 @@
    */
   DebugFrameOpCodeWriterForAssembler& cfi() { return cfi_; }
 
- protected:
-  explicit Assembler(ArenaAllocator* arena) : buffer_(arena), cfi_(this) {}
-
   ArenaAllocator* GetArena() {
     return buffer_.GetArena();
   }
 
+  AssemblerBuffer* GetBuffer() {
+    return &buffer_;
+  }
+
+ protected:
+  explicit Assembler(ArenaAllocator* arena) : buffer_(arena), cfi_(this) {}
+
   AssemblerBuffer buffer_;
 
   DebugFrameOpCodeWriterForAssembler cfi_;
diff --git a/compiler/utils/jni_macro_assembler.cc b/compiler/utils/jni_macro_assembler.cc
new file mode 100644
index 0000000..797a98c
--- /dev/null
+++ b/compiler/utils/jni_macro_assembler.cc
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni_macro_assembler.h"
+
+#include <algorithm>
+#include <vector>
+
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "arm/jni_macro_assembler_arm.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
+#include "arm64/jni_macro_assembler_arm64.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "mips/assembler_mips.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
+#include "mips64/assembler_mips64.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+#include "x86/jni_macro_assembler_x86.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+#include "x86_64/jni_macro_assembler_x86_64.h"
+#endif
+#include "base/casts.h"
+#include "globals.h"
+#include "memory_region.h"
+
+namespace art {
+
+using MacroAsm32UniquePtr = std::unique_ptr<JNIMacroAssembler<PointerSize::k32>>;
+
+template <>
+MacroAsm32UniquePtr JNIMacroAssembler<PointerSize::k32>::Create(
+    ArenaAllocator* arena,
+    InstructionSet instruction_set,
+    const InstructionSetFeatures* instruction_set_features) {
+#ifndef ART_ENABLE_CODEGEN_mips
+  UNUSED(instruction_set_features);
+#endif
+
+  switch (instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_arm
+    case kArm:
+    case kThumb2:
+      return MacroAsm32UniquePtr(new (arena) arm::ArmJNIMacroAssembler(arena, instruction_set));
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+    case kMips:
+      return MacroAsm32UniquePtr(new (arena) mips::MipsAssembler(
+          arena,
+          instruction_set_features != nullptr
+              ? instruction_set_features->AsMipsInstructionSetFeatures()
+              : nullptr));
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+    case kX86:
+      return MacroAsm32UniquePtr(new (arena) x86::X86JNIMacroAssembler(arena));
+#endif
+    default:
+      LOG(FATAL) << "Unknown/unsupported 4B InstructionSet: " << instruction_set;
+      UNREACHABLE();
+  }
+}
+
+using MacroAsm64UniquePtr = std::unique_ptr<JNIMacroAssembler<PointerSize::k64>>;
+
+template <>
+MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create(
+    ArenaAllocator* arena,
+    InstructionSet instruction_set,
+    const InstructionSetFeatures* instruction_set_features ATTRIBUTE_UNUSED) {
+  switch (instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_arm64
+    case kArm64:
+      return MacroAsm64UniquePtr(new (arena) arm64::Arm64JNIMacroAssembler(arena));
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
+    case kMips64:
+      return MacroAsm64UniquePtr(new (arena) mips64::Mips64Assembler(arena));
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+    case kX86_64:
+      return MacroAsm64UniquePtr(new (arena) x86_64::X86_64JNIMacroAssembler(arena));
+#endif
+    default:
+      LOG(FATAL) << "Unknown/unsupported 8B InstructionSet: " << instruction_set;
+      UNREACHABLE();
+  }
+}
+
+}  // namespace art
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
new file mode 100644
index 0000000..6f45bd6
--- /dev/null
+++ b/compiler/utils/jni_macro_assembler.h
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_H_
+#define ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_H_
+
+#include <vector>
+
+#include "arch/instruction_set.h"
+#include "base/arena_allocator.h"
+#include "base/arena_object.h"
+#include "base/enums.h"
+#include "base/logging.h"
+#include "base/macros.h"
+#include "managed_register.h"
+#include "offsets.h"
+#include "utils/array_ref.h"
+
+namespace art {
+
+class ArenaAllocator;
+class DebugFrameOpCodeWriterForAssembler;
+class InstructionSetFeatures;
+class MemoryRegion;
+
+template <PointerSize kPointerSize>
+class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> {
+ public:
+  static std::unique_ptr<JNIMacroAssembler<kPointerSize>> Create(
+      ArenaAllocator* arena,
+      InstructionSet instruction_set,
+      const InstructionSetFeatures* instruction_set_features = nullptr);
+
+  // Finalize the code; emit slow paths, fixup branches, add literal pool, etc.
+  virtual void FinalizeCode() = 0;
+
+  // Size of generated code
+  virtual size_t CodeSize() const = 0;
+
+  // Copy instructions out of assembly buffer into the given region of memory
+  virtual void FinalizeInstructions(const MemoryRegion& region) = 0;
+
+  // Emit code that will create an activation on the stack
+  virtual void BuildFrame(size_t frame_size,
+                          ManagedRegister method_reg,
+                          ArrayRef<const ManagedRegister> callee_save_regs,
+                          const ManagedRegisterEntrySpills& entry_spills) = 0;
+
+  // Emit code that will remove an activation from the stack
+  virtual void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs) = 0;
+
+  virtual void IncreaseFrameSize(size_t adjust) = 0;
+  virtual void DecreaseFrameSize(size_t adjust) = 0;
+
+  // Store routines
+  virtual void Store(FrameOffset offs, ManagedRegister src, size_t size) = 0;
+  virtual void StoreRef(FrameOffset dest, ManagedRegister src) = 0;
+  virtual void StoreRawPtr(FrameOffset dest, ManagedRegister src) = 0;
+
+  virtual void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) = 0;
+
+  virtual void StoreStackOffsetToThread(ThreadOffset<kPointerSize> thr_offs,
+                                        FrameOffset fr_offs,
+                                        ManagedRegister scratch) = 0;
+
+  virtual void StoreStackPointerToThread(ThreadOffset<kPointerSize> thr_offs) = 0;
+
+  virtual void StoreSpanning(FrameOffset dest,
+                             ManagedRegister src,
+                             FrameOffset in_off,
+                             ManagedRegister scratch) = 0;
+
+  // Load routines
+  virtual void Load(ManagedRegister dest, FrameOffset src, size_t size) = 0;
+
+  virtual void LoadFromThread(ManagedRegister dest,
+                              ThreadOffset<kPointerSize> src,
+                              size_t size) = 0;
+
+  virtual void LoadRef(ManagedRegister dest, FrameOffset src) = 0;
+  // If unpoison_reference is true and kPoisonReference is true, then we negate the read reference.
+  virtual void LoadRef(ManagedRegister dest,
+                       ManagedRegister base,
+                       MemberOffset offs,
+                       bool unpoison_reference) = 0;
+
+  virtual void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) = 0;
+
+  virtual void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset<kPointerSize> offs) = 0;
+
+  // Copying routines
+  virtual void Move(ManagedRegister dest, ManagedRegister src, size_t size) = 0;
+
+  virtual void CopyRawPtrFromThread(FrameOffset fr_offs,
+                                    ThreadOffset<kPointerSize> thr_offs,
+                                    ManagedRegister scratch) = 0;
+
+  virtual void CopyRawPtrToThread(ThreadOffset<kPointerSize> thr_offs,
+                                  FrameOffset fr_offs,
+                                  ManagedRegister scratch) = 0;
+
+  virtual void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) = 0;
+
+  virtual void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) = 0;
+
+  virtual void Copy(FrameOffset dest,
+                    ManagedRegister src_base,
+                    Offset src_offset,
+                    ManagedRegister scratch,
+                    size_t size) = 0;
+
+  virtual void Copy(ManagedRegister dest_base,
+                    Offset dest_offset,
+                    FrameOffset src,
+                    ManagedRegister scratch,
+                    size_t size) = 0;
+
+  virtual void Copy(FrameOffset dest,
+                    FrameOffset src_base,
+                    Offset src_offset,
+                    ManagedRegister scratch,
+                    size_t size) = 0;
+
+  virtual void Copy(ManagedRegister dest,
+                    Offset dest_offset,
+                    ManagedRegister src,
+                    Offset src_offset,
+                    ManagedRegister scratch,
+                    size_t size) = 0;
+
+  virtual void Copy(FrameOffset dest,
+                    Offset dest_offset,
+                    FrameOffset src,
+                    Offset src_offset,
+                    ManagedRegister scratch,
+                    size_t size) = 0;
+
+  virtual void MemoryBarrier(ManagedRegister scratch) = 0;
+
+  // Sign extension
+  virtual void SignExtend(ManagedRegister mreg, size_t size) = 0;
+
+  // Zero extension
+  virtual void ZeroExtend(ManagedRegister mreg, size_t size) = 0;
+
+  // Exploit fast access in managed code to Thread::Current()
+  virtual void GetCurrentThread(ManagedRegister tr) = 0;
+  virtual void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) = 0;
+
+  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed. in_reg holds a possibly stale reference
+  // that can be used to avoid loading the handle scope entry to see if the value is
+  // null.
+  virtual void CreateHandleScopeEntry(ManagedRegister out_reg,
+                                      FrameOffset handlescope_offset,
+                                      ManagedRegister in_reg,
+                                      bool null_allowed) = 0;
+
+  // Set up out_off to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed.
+  virtual void CreateHandleScopeEntry(FrameOffset out_off,
+                                      FrameOffset handlescope_offset,
+                                      ManagedRegister scratch,
+                                      bool null_allowed) = 0;
+
+  // src holds a handle scope entry (Object**) load this into dst
+  virtual void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) = 0;
+
+  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
+  // know that src may not be null.
+  virtual void VerifyObject(ManagedRegister src, bool could_be_null) = 0;
+  virtual void VerifyObject(FrameOffset src, bool could_be_null) = 0;
+
+  // Call to address held at [base+offset]
+  virtual void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) = 0;
+  virtual void Call(FrameOffset base, Offset offset, ManagedRegister scratch) = 0;
+  virtual void CallFromThread(ThreadOffset<kPointerSize> offset, ManagedRegister scratch) = 0;
+
+  // Generate code to check if Thread::Current()->exception_ is non-null
+  // and branch to a ExceptionSlowPath if it is.
+  virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0;
+
+  virtual ~JNIMacroAssembler() {}
+
+  /**
+   * @brief Buffer of DWARF's Call Frame Information opcodes.
+   * @details It is used by debuggers and other tools to unwind the call stack.
+   */
+  virtual DebugFrameOpCodeWriterForAssembler& cfi() = 0;
+
+ protected:
+  explicit JNIMacroAssembler() {}
+};
+
+template <typename T, PointerSize kPointerSize>
+class JNIMacroAssemblerFwd : public JNIMacroAssembler<kPointerSize> {
+ public:
+  void FinalizeCode() OVERRIDE {
+    asm_.FinalizeCode();
+  }
+
+  size_t CodeSize() const OVERRIDE {
+    return asm_.CodeSize();
+  }
+
+  void FinalizeInstructions(const MemoryRegion& region) OVERRIDE {
+    asm_.FinalizeInstructions(region);
+  }
+
+  DebugFrameOpCodeWriterForAssembler& cfi() OVERRIDE {
+    return asm_.cfi();
+  }
+
+ protected:
+  explicit JNIMacroAssemblerFwd(ArenaAllocator* arena) : asm_(arena) {}
+
+  T asm_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_H_
diff --git a/compiler/utils/jni_macro_assembler_test.h b/compiler/utils/jni_macro_assembler_test.h
new file mode 100644
index 0000000..829f34b
--- /dev/null
+++ b/compiler/utils/jni_macro_assembler_test.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_TEST_H_
+#define ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_TEST_H_
+
+#include "jni_macro_assembler.h"
+
+#include "assembler_test_base.h"
+#include "common_runtime_test.h"  // For ScratchFile
+
+#include <cstdio>
+#include <cstdlib>
+#include <fstream>
+#include <iterator>
+#include <sys/stat.h>
+
+namespace art {
+
+template<typename Ass>
+class JNIMacroAssemblerTest : public testing::Test {
+ public:
+  Ass* GetAssembler() {
+    return assembler_.get();
+  }
+
+  typedef std::string (*TestFn)(JNIMacroAssemblerTest* assembler_test, Ass* assembler);
+
+  void DriverFn(TestFn f, std::string test_name) {
+    DriverWrapper(f(this, assembler_.get()), test_name);
+  }
+
+  // This driver assumes the assembler has already been called.
+  void DriverStr(std::string assembly_string, std::string test_name) {
+    DriverWrapper(assembly_string, test_name);
+  }
+
+  // This is intended to be run as a test.
+  bool CheckTools() {
+    return test_helper_->CheckTools();
+  }
+
+ protected:
+  explicit JNIMacroAssemblerTest() {}
+
+  void SetUp() OVERRIDE {
+    arena_.reset(new ArenaAllocator(&pool_));
+    assembler_.reset(CreateAssembler(arena_.get()));
+    test_helper_.reset(
+        new AssemblerTestInfrastructure(GetArchitectureString(),
+                                        GetAssemblerCmdName(),
+                                        GetAssemblerParameters(),
+                                        GetObjdumpCmdName(),
+                                        GetObjdumpParameters(),
+                                        GetDisassembleCmdName(),
+                                        GetDisassembleParameters(),
+                                        GetAssemblyHeader()));
+
+    SetUpHelpers();
+  }
+
+  void TearDown() OVERRIDE {
+    test_helper_.reset();  // Clean up the helper.
+    assembler_.reset();
+    arena_.reset();
+  }
+
+  // Override this to set up any architecture-specific things, e.g., CPU revision.
+  virtual Ass* CreateAssembler(ArenaAllocator* arena) {
+    return new (arena) Ass(arena);
+  }
+
+  // Override this to set up any architecture-specific things, e.g., register vectors.
+  virtual void SetUpHelpers() {}
+
+  // Get the typically used name for this architecture, e.g., aarch64, x86_64, ...
+  virtual std::string GetArchitectureString() = 0;
+
+  // Get the name of the assembler, e.g., "as" by default.
+  virtual std::string GetAssemblerCmdName() {
+    return "as";
+  }
+
+  // Switches to the assembler command. Default none.
+  virtual std::string GetAssemblerParameters() {
+    return "";
+  }
+
+  // Get the name of the objdump, e.g., "objdump" by default.
+  virtual std::string GetObjdumpCmdName() {
+    return "objdump";
+  }
+
+  // Switches to the objdump command. Default is " -h".
+  virtual std::string GetObjdumpParameters() {
+    return " -h";
+  }
+
+  // Get the name of the objdump, e.g., "objdump" by default.
+  virtual std::string GetDisassembleCmdName() {
+    return "objdump";
+  }
+
+  // Switches to the objdump command. As it's a binary, one needs to push the architecture and
+  // such to objdump, so it's architecture-specific and there is no default.
+  virtual std::string GetDisassembleParameters() = 0;
+
+  // If the assembly file needs a header, return it in a sub-class.
+  virtual const char* GetAssemblyHeader() {
+    return nullptr;
+  }
+
+ private:
+  // Override this to pad the code with NOPs to a certain size if needed.
+  virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) {
+  }
+
+  void DriverWrapper(std::string assembly_text, std::string test_name) {
+    assembler_->FinalizeCode();
+    size_t cs = assembler_->CodeSize();
+    std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
+    MemoryRegion code(&(*data)[0], data->size());
+    assembler_->FinalizeInstructions(code);
+    Pad(*data);
+    test_helper_->Driver(*data, assembly_text, test_name);
+  }
+
+  ArenaPool pool_;
+  std::unique_ptr<ArenaAllocator> arena_;
+  std::unique_ptr<Ass> assembler_;
+  std::unique_ptr<AssemblerTestInfrastructure> test_helper_;
+
+  DISALLOW_COPY_AND_ASSIGN(JNIMacroAssemblerTest);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_TEST_H_
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index e6b32de..8b7da3f 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -2799,27 +2799,17 @@
   StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
 }
 
-void MipsAssembler::StoreImmediateToThread32(ThreadOffset32 dest,
-                                             uint32_t imm,
+void MipsAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                             FrameOffset fr_offs,
                                              ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
-  // Is this function even referenced anywhere else in the code?
-  LoadConst32(scratch.AsCoreRegister(), imm);
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), S1, dest.Int32Value());
-}
-
-void MipsAssembler::StoreStackOffsetToThread32(ThreadOffset32 thr_offs,
-                                               FrameOffset fr_offs,
-                                               ManagedRegister mscratch) {
-  MipsManagedRegister scratch = mscratch.AsMips();
-  CHECK(scratch.IsCoreRegister()) << scratch;
   Addiu32(scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
   StoreToOffset(kStoreWord, scratch.AsCoreRegister(),
                 S1, thr_offs.Int32Value());
 }
 
-void MipsAssembler::StoreStackPointerToThread32(ThreadOffset32 thr_offs) {
+void MipsAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) {
   StoreToOffset(kStoreWord, SP, S1, thr_offs.Int32Value());
 }
 
@@ -2836,7 +2826,7 @@
   return EmitLoad(mdest, SP, src.Int32Value(), size);
 }
 
-void MipsAssembler::LoadFromThread32(ManagedRegister mdest, ThreadOffset32 src, size_t size) {
+void MipsAssembler::LoadFromThread(ManagedRegister mdest, ThreadOffset32 src, size_t size) {
   return EmitLoad(mdest, S1, src.Int32Value(), size);
 }
 
@@ -2864,7 +2854,7 @@
                  base.AsMips().AsCoreRegister(), offs.Int32Value());
 }
 
-void MipsAssembler::LoadRawPtrFromThread32(ManagedRegister mdest, ThreadOffset32 offs) {
+void MipsAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) {
   MipsManagedRegister dest = mdest.AsMips();
   CHECK(dest.IsCoreRegister());
   LoadFromOffset(kLoadWord, dest.AsCoreRegister(), S1, offs.Int32Value());
@@ -2918,9 +2908,9 @@
   StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
 }
 
-void MipsAssembler::CopyRawPtrFromThread32(FrameOffset fr_offs,
-                                           ThreadOffset32 thr_offs,
-                                           ManagedRegister mscratch) {
+void MipsAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                         ThreadOffset32 thr_offs,
+                                         ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
@@ -2929,9 +2919,9 @@
                 SP, fr_offs.Int32Value());
 }
 
-void MipsAssembler::CopyRawPtrToThread32(ThreadOffset32 thr_offs,
-                                         FrameOffset fr_offs,
-                                         ManagedRegister mscratch) {
+void MipsAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs,
+                                       FrameOffset fr_offs,
+                                       ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
@@ -3103,8 +3093,8 @@
   // TODO: place reference map on call.
 }
 
-void MipsAssembler::CallFromThread32(ThreadOffset32 offset ATTRIBUTE_UNUSED,
-                                     ManagedRegister mscratch ATTRIBUTE_UNUSED) {
+void MipsAssembler::CallFromThread(ThreadOffset32 offset ATTRIBUTE_UNUSED,
+                                   ManagedRegister mscratch ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "no mips implementation";
 }
 
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 852ced6..41b6c6b 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -23,12 +23,14 @@
 
 #include "arch/mips/instruction_set_features_mips.h"
 #include "base/arena_containers.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "constants_mips.h"
 #include "globals.h"
 #include "managed_register_mips.h"
 #include "offsets.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 #include "utils/label.h"
 
 namespace art {
@@ -145,7 +147,7 @@
   DISALLOW_COPY_AND_ASSIGN(MipsExceptionSlowPath);
 };
 
-class MipsAssembler FINAL : public Assembler {
+class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSize::k32> {
  public:
   explicit MipsAssembler(ArenaAllocator* arena,
                          const MipsInstructionSetFeatures* instruction_set_features = nullptr)
@@ -160,6 +162,9 @@
     cfi().DelayEmittingAdvancePCs();
   }
 
+  size_t CodeSize() const OVERRIDE { return Assembler::CodeSize(); }
+  DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); }
+
   virtual ~MipsAssembler() {
     for (auto& branch : branches_) {
       CHECK(branch.IsResolved());
@@ -500,15 +505,11 @@
 
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE;
 
-  void StoreImmediateToThread32(ThreadOffset32 dest,
-                                uint32_t imm,
+  void StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                FrameOffset fr_offs,
                                 ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackOffsetToThread32(ThreadOffset32 thr_offs,
-                                  FrameOffset fr_offs,
-                                  ManagedRegister mscratch) OVERRIDE;
-
-  void StoreStackPointerToThread32(ThreadOffset32 thr_offs) OVERRIDE;
+  void StoreStackPointerToThread(ThreadOffset32 thr_offs) OVERRIDE;
 
   void StoreSpanning(FrameOffset dest,
                      ManagedRegister msrc,
@@ -518,7 +519,7 @@
   // Load routines.
   void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE;
 
-  void LoadFromThread32(ManagedRegister mdest, ThreadOffset32 src, size_t size) OVERRIDE;
+  void LoadFromThread(ManagedRegister mdest, ThreadOffset32 src, size_t size) OVERRIDE;
 
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
 
@@ -529,19 +530,19 @@
 
   void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE;
 
-  void LoadRawPtrFromThread32(ManagedRegister mdest, ThreadOffset32 offs) OVERRIDE;
+  void LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) OVERRIDE;
 
   // Copying routines.
   void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE;
 
-  void CopyRawPtrFromThread32(FrameOffset fr_offs,
-                              ThreadOffset32 thr_offs,
-                              ManagedRegister mscratch) OVERRIDE;
-
-  void CopyRawPtrToThread32(ThreadOffset32 thr_offs,
-                            FrameOffset fr_offs,
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset32 thr_offs,
                             ManagedRegister mscratch) OVERRIDE;
 
+  void CopyRawPtrToThread(ThreadOffset32 thr_offs,
+                          FrameOffset fr_offs,
+                          ManagedRegister mscratch) OVERRIDE;
+
   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE;
 
   void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) OVERRIDE;
@@ -617,7 +618,7 @@
   // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE;
-  void CallFromThread32(ThreadOffset32 offset, ManagedRegister mscratch) OVERRIDE;
+  void CallFromThread(ThreadOffset32 offset, ManagedRegister mscratch) OVERRIDE;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 3fd77a0..a2621cb 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -2115,16 +2115,16 @@
   StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value());
 }
 
-void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset64 thr_offs,
-                                                 FrameOffset fr_offs,
-                                                 ManagedRegister mscratch) {
+void Mips64Assembler::StoreStackOffsetToThread(ThreadOffset64 thr_offs,
+                                               FrameOffset fr_offs,
+                                               ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
   Daddiu64(scratch.AsGpuRegister(), SP, fr_offs.Int32Value());
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value());
 }
 
-void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset64 thr_offs) {
+void Mips64Assembler::StoreStackPointerToThread(ThreadOffset64 thr_offs) {
   StoreToOffset(kStoreDoubleword, SP, S1, thr_offs.Int32Value());
 }
 
@@ -2141,7 +2141,7 @@
   return EmitLoad(mdest, SP, src.Int32Value(), size);
 }
 
-void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset64 src, size_t size) {
+void Mips64Assembler::LoadFromThread(ManagedRegister mdest, ThreadOffset64 src, size_t size) {
   return EmitLoad(mdest, S1, src.Int32Value(), size);
 }
 
@@ -2174,7 +2174,7 @@
                  base.AsMips64().AsGpuRegister(), offs.Int32Value());
 }
 
-void Mips64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset64 offs) {
+void Mips64Assembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) {
   Mips64ManagedRegister dest = mdest.AsMips64();
   CHECK(dest.IsGpuRegister());
   LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), S1, offs.Int32Value());
@@ -2218,18 +2218,18 @@
   StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value());
 }
 
-void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
-                                             ThreadOffset64 thr_offs,
-                                             ManagedRegister mscratch) {
+void Mips64Assembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                           ThreadOffset64 thr_offs,
+                                           ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
   LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value());
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, fr_offs.Int32Value());
 }
 
-void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset64 thr_offs,
-                                           FrameOffset fr_offs,
-                                           ManagedRegister mscratch) {
+void Mips64Assembler::CopyRawPtrToThread(ThreadOffset64 thr_offs,
+                                         FrameOffset fr_offs,
+                                         ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
   LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
@@ -2431,8 +2431,8 @@
   // TODO: place reference map on call
 }
 
-void Mips64Assembler::CallFromThread64(ThreadOffset64 offset ATTRIBUTE_UNUSED,
-                                       ManagedRegister mscratch ATTRIBUTE_UNUSED) {
+void Mips64Assembler::CallFromThread(ThreadOffset64 offset ATTRIBUTE_UNUSED,
+                                     ManagedRegister mscratch ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
 }
 
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 1ad05b0..a7d350c 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -20,12 +20,14 @@
 #include <utility>
 #include <vector>
 
+#include "base/enums.h"
 #include "base/macros.h"
 #include "constants_mips64.h"
 #include "globals.h"
 #include "managed_register_mips64.h"
 #include "offsets.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 #include "utils/label.h"
 
 namespace art {
@@ -100,7 +102,7 @@
   DISALLOW_COPY_AND_ASSIGN(Mips64ExceptionSlowPath);
 };
 
-class Mips64Assembler FINAL : public Assembler {
+class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<PointerSize::k64> {
  public:
   explicit Mips64Assembler(ArenaAllocator* arena)
       : Assembler(arena),
@@ -118,6 +120,9 @@
     }
   }
 
+  size_t CodeSize() const OVERRIDE { return Assembler::CodeSize(); }
+  DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); }
+
   // Emit Machine Instructions.
   void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Addiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
@@ -383,11 +388,11 @@
 
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackOffsetToThread64(ThreadOffset64 thr_offs,
-                                  FrameOffset fr_offs,
-                                  ManagedRegister mscratch) OVERRIDE;
+  void StoreStackOffsetToThread(ThreadOffset64 thr_offs,
+                                FrameOffset fr_offs,
+                                ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackPointerToThread64(ThreadOffset64 thr_offs) OVERRIDE;
+  void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE;
 
   void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off,
                      ManagedRegister mscratch) OVERRIDE;
@@ -395,7 +400,7 @@
   // Load routines.
   void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE;
 
-  void LoadFromThread64(ManagedRegister mdest, ThreadOffset64 src, size_t size) OVERRIDE;
+  void LoadFromThread(ManagedRegister mdest, ThreadOffset64 src, size_t size) OVERRIDE;
 
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
 
@@ -404,19 +409,19 @@
 
   void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE;
 
-  void LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset64 offs) OVERRIDE;
+  void LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) OVERRIDE;
 
   // Copying routines.
   void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE;
 
-  void CopyRawPtrFromThread64(FrameOffset fr_offs,
-                              ThreadOffset64 thr_offs,
-                              ManagedRegister mscratch) OVERRIDE;
-
-  void CopyRawPtrToThread64(ThreadOffset64 thr_offs,
-                            FrameOffset fr_offs,
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset64 thr_offs,
                             ManagedRegister mscratch) OVERRIDE;
 
+  void CopyRawPtrToThread(ThreadOffset64 thr_offs,
+                          FrameOffset fr_offs,
+                          ManagedRegister mscratch) OVERRIDE;
+
   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE;
 
   void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) OVERRIDE;
@@ -471,7 +476,7 @@
   // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE;
-  void CallFromThread64(ThreadOffset64 offset, ManagedRegister mscratch) OVERRIDE;
+  void CallFromThread(ThreadOffset64 offset, ManagedRegister mscratch) OVERRIDE;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index bd5fc40..f1a9915 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1943,489 +1943,6 @@
   EmitOperand(reg_or_opcode, operand);
 }
 
-static dwarf::Reg DWARFReg(Register reg) {
-  return dwarf::Reg::X86Core(static_cast<int>(reg));
-}
-
-constexpr size_t kFramePointerSize = 4;
-
-void X86Assembler::BuildFrame(size_t frame_size,
-                              ManagedRegister method_reg,
-                              ArrayRef<const ManagedRegister> spill_regs,
-                              const ManagedRegisterEntrySpills& entry_spills) {
-  DCHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet.
-  cfi_.SetCurrentCFAOffset(4);  // Return address on stack.
-  CHECK_ALIGNED(frame_size, kStackAlignment);
-  int gpr_count = 0;
-  for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    Register spill = spill_regs[i].AsX86().AsCpuRegister();
-    pushl(spill);
-    gpr_count++;
-    cfi_.AdjustCFAOffset(kFramePointerSize);
-    cfi_.RelOffset(DWARFReg(spill), 0);
-  }
-
-  // return address then method on stack.
-  int32_t adjust = frame_size - gpr_count * kFramePointerSize -
-      kFramePointerSize /*method*/ -
-      kFramePointerSize /*return address*/;
-  addl(ESP, Immediate(-adjust));
-  cfi_.AdjustCFAOffset(adjust);
-  pushl(method_reg.AsX86().AsCpuRegister());
-  cfi_.AdjustCFAOffset(kFramePointerSize);
-  DCHECK_EQ(static_cast<size_t>(cfi_.GetCurrentCFAOffset()), frame_size);
-
-  for (size_t i = 0; i < entry_spills.size(); ++i) {
-    ManagedRegisterSpill spill = entry_spills.at(i);
-    if (spill.AsX86().IsCpuRegister()) {
-      int offset = frame_size + spill.getSpillOffset();
-      movl(Address(ESP, offset), spill.AsX86().AsCpuRegister());
-    } else {
-      DCHECK(spill.AsX86().IsXmmRegister());
-      if (spill.getSize() == 8) {
-        movsd(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
-      } else {
-        CHECK_EQ(spill.getSize(), 4);
-        movss(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
-      }
-    }
-  }
-}
-
-void X86Assembler::RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> spill_regs) {
-  CHECK_ALIGNED(frame_size, kStackAlignment);
-  cfi_.RememberState();
-  // -kFramePointerSize for ArtMethod*.
-  int adjust = frame_size - spill_regs.size() * kFramePointerSize - kFramePointerSize;
-  addl(ESP, Immediate(adjust));
-  cfi_.AdjustCFAOffset(-adjust);
-  for (size_t i = 0; i < spill_regs.size(); ++i) {
-    Register spill = spill_regs[i].AsX86().AsCpuRegister();
-    popl(spill);
-    cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
-    cfi_.Restore(DWARFReg(spill));
-  }
-  ret();
-  // The CFI should be restored for any code that follows the exit block.
-  cfi_.RestoreState();
-  cfi_.DefCFAOffset(frame_size);
-}
-
-void X86Assembler::IncreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  addl(ESP, Immediate(-adjust));
-  cfi_.AdjustCFAOffset(adjust);
-}
-
-void X86Assembler::DecreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  addl(ESP, Immediate(adjust));
-  cfi_.AdjustCFAOffset(-adjust);
-}
-
-void X86Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
-  X86ManagedRegister src = msrc.AsX86();
-  if (src.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (src.IsCpuRegister()) {
-    CHECK_EQ(4u, size);
-    movl(Address(ESP, offs), src.AsCpuRegister());
-  } else if (src.IsRegisterPair()) {
-    CHECK_EQ(8u, size);
-    movl(Address(ESP, offs), src.AsRegisterPairLow());
-    movl(Address(ESP, FrameOffset(offs.Int32Value()+4)),
-         src.AsRegisterPairHigh());
-  } else if (src.IsX87Register()) {
-    if (size == 4) {
-      fstps(Address(ESP, offs));
-    } else {
-      fstpl(Address(ESP, offs));
-    }
-  } else {
-    CHECK(src.IsXmmRegister());
-    if (size == 4) {
-      movss(Address(ESP, offs), src.AsXmmRegister());
-    } else {
-      movsd(Address(ESP, offs), src.AsXmmRegister());
-    }
-  }
-}
-
-void X86Assembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
-  X86ManagedRegister src = msrc.AsX86();
-  CHECK(src.IsCpuRegister());
-  movl(Address(ESP, dest), src.AsCpuRegister());
-}
-
-void X86Assembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
-  X86ManagedRegister src = msrc.AsX86();
-  CHECK(src.IsCpuRegister());
-  movl(Address(ESP, dest), src.AsCpuRegister());
-}
-
-void X86Assembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm,
-                                         ManagedRegister) {
-  movl(Address(ESP, dest), Immediate(imm));
-}
-
-void X86Assembler::StoreImmediateToThread32(ThreadOffset32 dest, uint32_t imm, ManagedRegister) {
-  fs()->movl(Address::Absolute(dest), Immediate(imm));
-}
-
-void X86Assembler::StoreStackOffsetToThread32(ThreadOffset32 thr_offs,
-                                              FrameOffset fr_offs,
-                                              ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  leal(scratch.AsCpuRegister(), Address(ESP, fr_offs));
-  fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
-}
-
-void X86Assembler::StoreStackPointerToThread32(ThreadOffset32 thr_offs) {
-  fs()->movl(Address::Absolute(thr_offs), ESP);
-}
-
-void X86Assembler::StoreSpanning(FrameOffset /*dst*/, ManagedRegister /*src*/,
-                                 FrameOffset /*in_off*/, ManagedRegister /*scratch*/) {
-  UNIMPLEMENTED(FATAL);  // this case only currently exists for ARM
-}
-
-void X86Assembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) {
-  X86ManagedRegister dest = mdest.AsX86();
-  if (dest.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (dest.IsCpuRegister()) {
-    CHECK_EQ(4u, size);
-    movl(dest.AsCpuRegister(), Address(ESP, src));
-  } else if (dest.IsRegisterPair()) {
-    CHECK_EQ(8u, size);
-    movl(dest.AsRegisterPairLow(), Address(ESP, src));
-    movl(dest.AsRegisterPairHigh(), Address(ESP, FrameOffset(src.Int32Value()+4)));
-  } else if (dest.IsX87Register()) {
-    if (size == 4) {
-      flds(Address(ESP, src));
-    } else {
-      fldl(Address(ESP, src));
-    }
-  } else {
-    CHECK(dest.IsXmmRegister());
-    if (size == 4) {
-      movss(dest.AsXmmRegister(), Address(ESP, src));
-    } else {
-      movsd(dest.AsXmmRegister(), Address(ESP, src));
-    }
-  }
-}
-
-void X86Assembler::LoadFromThread32(ManagedRegister mdest, ThreadOffset32 src, size_t size) {
-  X86ManagedRegister dest = mdest.AsX86();
-  if (dest.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (dest.IsCpuRegister()) {
-    CHECK_EQ(4u, size);
-    fs()->movl(dest.AsCpuRegister(), Address::Absolute(src));
-  } else if (dest.IsRegisterPair()) {
-    CHECK_EQ(8u, size);
-    fs()->movl(dest.AsRegisterPairLow(), Address::Absolute(src));
-    fs()->movl(dest.AsRegisterPairHigh(), Address::Absolute(ThreadOffset32(src.Int32Value()+4)));
-  } else if (dest.IsX87Register()) {
-    if (size == 4) {
-      fs()->flds(Address::Absolute(src));
-    } else {
-      fs()->fldl(Address::Absolute(src));
-    }
-  } else {
-    CHECK(dest.IsXmmRegister());
-    if (size == 4) {
-      fs()->movss(dest.AsXmmRegister(), Address::Absolute(src));
-    } else {
-      fs()->movsd(dest.AsXmmRegister(), Address::Absolute(src));
-    }
-  }
-}
-
-void X86Assembler::LoadRef(ManagedRegister mdest, FrameOffset src) {
-  X86ManagedRegister dest = mdest.AsX86();
-  CHECK(dest.IsCpuRegister());
-  movl(dest.AsCpuRegister(), Address(ESP, src));
-}
-
-void X86Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
-                           bool unpoison_reference) {
-  X86ManagedRegister dest = mdest.AsX86();
-  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
-  movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs));
-  if (unpoison_reference) {
-    MaybeUnpoisonHeapReference(dest.AsCpuRegister());
-  }
-}
-
-void X86Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base,
-                              Offset offs) {
-  X86ManagedRegister dest = mdest.AsX86();
-  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
-  movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs));
-}
-
-void X86Assembler::LoadRawPtrFromThread32(ManagedRegister mdest,
-                                          ThreadOffset32 offs) {
-  X86ManagedRegister dest = mdest.AsX86();
-  CHECK(dest.IsCpuRegister());
-  fs()->movl(dest.AsCpuRegister(), Address::Absolute(offs));
-}
-
-void X86Assembler::SignExtend(ManagedRegister mreg, size_t size) {
-  X86ManagedRegister reg = mreg.AsX86();
-  CHECK(size == 1 || size == 2) << size;
-  CHECK(reg.IsCpuRegister()) << reg;
-  if (size == 1) {
-    movsxb(reg.AsCpuRegister(), reg.AsByteRegister());
-  } else {
-    movsxw(reg.AsCpuRegister(), reg.AsCpuRegister());
-  }
-}
-
-void X86Assembler::ZeroExtend(ManagedRegister mreg, size_t size) {
-  X86ManagedRegister reg = mreg.AsX86();
-  CHECK(size == 1 || size == 2) << size;
-  CHECK(reg.IsCpuRegister()) << reg;
-  if (size == 1) {
-    movzxb(reg.AsCpuRegister(), reg.AsByteRegister());
-  } else {
-    movzxw(reg.AsCpuRegister(), reg.AsCpuRegister());
-  }
-}
-
-void X86Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
-  X86ManagedRegister dest = mdest.AsX86();
-  X86ManagedRegister src = msrc.AsX86();
-  if (!dest.Equals(src)) {
-    if (dest.IsCpuRegister() && src.IsCpuRegister()) {
-      movl(dest.AsCpuRegister(), src.AsCpuRegister());
-    } else if (src.IsX87Register() && dest.IsXmmRegister()) {
-      // Pass via stack and pop X87 register
-      subl(ESP, Immediate(16));
-      if (size == 4) {
-        CHECK_EQ(src.AsX87Register(), ST0);
-        fstps(Address(ESP, 0));
-        movss(dest.AsXmmRegister(), Address(ESP, 0));
-      } else {
-        CHECK_EQ(src.AsX87Register(), ST0);
-        fstpl(Address(ESP, 0));
-        movsd(dest.AsXmmRegister(), Address(ESP, 0));
-      }
-      addl(ESP, Immediate(16));
-    } else {
-      // TODO: x87, SSE
-      UNIMPLEMENTED(FATAL) << ": Move " << dest << ", " << src;
-    }
-  }
-}
-
-void X86Assembler::CopyRef(FrameOffset dest, FrameOffset src,
-                           ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  movl(scratch.AsCpuRegister(), Address(ESP, src));
-  movl(Address(ESP, dest), scratch.AsCpuRegister());
-}
-
-void X86Assembler::CopyRawPtrFromThread32(FrameOffset fr_offs,
-                                          ThreadOffset32 thr_offs,
-                                          ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  fs()->movl(scratch.AsCpuRegister(), Address::Absolute(thr_offs));
-  Store(fr_offs, scratch, 4);
-}
-
-void X86Assembler::CopyRawPtrToThread32(ThreadOffset32 thr_offs,
-                                        FrameOffset fr_offs,
-                                        ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  Load(scratch, fr_offs, 4);
-  fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
-}
-
-void X86Assembler::Copy(FrameOffset dest, FrameOffset src,
-                        ManagedRegister mscratch,
-                        size_t size) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  if (scratch.IsCpuRegister() && size == 8) {
-    Load(scratch, src, 4);
-    Store(dest, scratch, 4);
-    Load(scratch, FrameOffset(src.Int32Value() + 4), 4);
-    Store(FrameOffset(dest.Int32Value() + 4), scratch, 4);
-  } else {
-    Load(scratch, src, size);
-    Store(dest, scratch, size);
-  }
-}
-
-void X86Assembler::Copy(FrameOffset /*dst*/, ManagedRegister /*src_base*/, Offset /*src_offset*/,
-                        ManagedRegister /*scratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void X86Assembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
-                        ManagedRegister scratch, size_t size) {
-  CHECK(scratch.IsNoRegister());
-  CHECK_EQ(size, 4u);
-  pushl(Address(ESP, src));
-  popl(Address(dest_base.AsX86().AsCpuRegister(), dest_offset));
-}
-
-void X86Assembler::Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset,
-                        ManagedRegister mscratch, size_t size) {
-  Register scratch = mscratch.AsX86().AsCpuRegister();
-  CHECK_EQ(size, 4u);
-  movl(scratch, Address(ESP, src_base));
-  movl(scratch, Address(scratch, src_offset));
-  movl(Address(ESP, dest), scratch);
-}
-
-void X86Assembler::Copy(ManagedRegister dest, Offset dest_offset,
-                        ManagedRegister src, Offset src_offset,
-                        ManagedRegister scratch, size_t size) {
-  CHECK_EQ(size, 4u);
-  CHECK(scratch.IsNoRegister());
-  pushl(Address(src.AsX86().AsCpuRegister(), src_offset));
-  popl(Address(dest.AsX86().AsCpuRegister(), dest_offset));
-}
-
-void X86Assembler::Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-                        ManagedRegister mscratch, size_t size) {
-  Register scratch = mscratch.AsX86().AsCpuRegister();
-  CHECK_EQ(size, 4u);
-  CHECK_EQ(dest.Int32Value(), src.Int32Value());
-  movl(scratch, Address(ESP, src));
-  pushl(Address(scratch, src_offset));
-  popl(Address(scratch, dest_offset));
-}
-
-void X86Assembler::MemoryBarrier(ManagedRegister) {
-  mfence();
-}
-
-void X86Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
-                                   FrameOffset handle_scope_offset,
-                                   ManagedRegister min_reg, bool null_allowed) {
-  X86ManagedRegister out_reg = mout_reg.AsX86();
-  X86ManagedRegister in_reg = min_reg.AsX86();
-  CHECK(in_reg.IsCpuRegister());
-  CHECK(out_reg.IsCpuRegister());
-  VerifyObject(in_reg, null_allowed);
-  if (null_allowed) {
-    Label null_arg;
-    if (!out_reg.Equals(in_reg)) {
-      xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
-    }
-    testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
-    j(kZero, &null_arg);
-    leal(out_reg.AsCpuRegister(), Address(ESP, handle_scope_offset));
-    Bind(&null_arg);
-  } else {
-    leal(out_reg.AsCpuRegister(), Address(ESP, handle_scope_offset));
-  }
-}
-
-void X86Assembler::CreateHandleScopeEntry(FrameOffset out_off,
-                                   FrameOffset handle_scope_offset,
-                                   ManagedRegister mscratch,
-                                   bool null_allowed) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  if (null_allowed) {
-    Label null_arg;
-    movl(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
-    testl(scratch.AsCpuRegister(), scratch.AsCpuRegister());
-    j(kZero, &null_arg);
-    leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
-    Bind(&null_arg);
-  } else {
-    leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
-  }
-  Store(out_off, scratch, 4);
-}
-
-// Given a handle scope entry, load the associated reference.
-void X86Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
-                                         ManagedRegister min_reg) {
-  X86ManagedRegister out_reg = mout_reg.AsX86();
-  X86ManagedRegister in_reg = min_reg.AsX86();
-  CHECK(out_reg.IsCpuRegister());
-  CHECK(in_reg.IsCpuRegister());
-  Label null_arg;
-  if (!out_reg.Equals(in_reg)) {
-    xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
-  }
-  testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
-  j(kZero, &null_arg);
-  movl(out_reg.AsCpuRegister(), Address(in_reg.AsCpuRegister(), 0));
-  Bind(&null_arg);
-}
-
-void X86Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references
-}
-
-void X86Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references
-}
-
-void X86Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) {
-  X86ManagedRegister base = mbase.AsX86();
-  CHECK(base.IsCpuRegister());
-  call(Address(base.AsCpuRegister(), offset.Int32Value()));
-  // TODO: place reference map on call
-}
-
-void X86Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
-  Register scratch = mscratch.AsX86().AsCpuRegister();
-  movl(scratch, Address(ESP, base));
-  call(Address(scratch, offset));
-}
-
-void X86Assembler::CallFromThread32(ThreadOffset32 offset, ManagedRegister /*mscratch*/) {
-  fs()->call(Address::Absolute(offset));
-}
-
-void X86Assembler::GetCurrentThread(ManagedRegister tr) {
-  fs()->movl(tr.AsX86().AsCpuRegister(),
-             Address::Absolute(Thread::SelfOffset<kX86PointerSize>()));
-}
-
-void X86Assembler::GetCurrentThread(FrameOffset offset,
-                                    ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  fs()->movl(scratch.AsCpuRegister(), Address::Absolute(Thread::SelfOffset<kX86PointerSize>()));
-  movl(Address(ESP, offset), scratch.AsCpuRegister());
-}
-
-void X86Assembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
-  X86ExceptionSlowPath* slow = new (GetArena()) X86ExceptionSlowPath(stack_adjust);
-  buffer_.EnqueueSlowPath(slow);
-  fs()->cmpl(Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>()), Immediate(0));
-  j(kNotEqual, slow->Entry());
-}
-
-void X86ExceptionSlowPath::Emit(Assembler *sasm) {
-  X86Assembler* sp_asm = down_cast<X86Assembler*>(sasm);
-#define __ sp_asm->
-  __ Bind(&entry_);
-  // Note: the return value is dead
-  if (stack_adjust_ != 0) {  // Fix up the frame.
-    __ DecreaseFrameSize(stack_adjust_);
-  }
-  // Pass exception as argument in EAX
-  __ fs()->movl(EAX, Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>()));
-  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pDeliverException)));
-  // this call should never return
-  __ int3();
-#undef __
-}
-
 void X86Assembler::AddConstantArea() {
   ArrayRef<const int32_t> area = constant_area_.GetBuffer();
   // Generate the data for the literal area.
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 6d519e4..92a92a5 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -21,6 +21,7 @@
 
 #include "base/arena_containers.h"
 #include "base/bit_utils.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "constants_x86.h"
 #include "globals.h"
@@ -631,124 +632,6 @@
   void Bind(NearLabel* label);
 
   //
-  // Overridden common assembler high-level functionality
-  //
-
-  // Emit code that will create an activation on the stack
-  void BuildFrame(size_t frame_size,
-                  ManagedRegister method_reg,
-                  ArrayRef<const ManagedRegister> callee_save_regs,
-                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
-
-  // Emit code that will remove an activation from the stack
-  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
-      OVERRIDE;
-
-  void IncreaseFrameSize(size_t adjust) OVERRIDE;
-  void DecreaseFrameSize(size_t adjust) OVERRIDE;
-
-  // Store routines
-  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
-  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
-  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
-
-  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
-
-  void StoreImmediateToThread32(ThreadOffset32 dest, uint32_t imm, ManagedRegister scratch)
-      OVERRIDE;
-
-  void StoreStackOffsetToThread32(ThreadOffset32 thr_offs, FrameOffset fr_offs,
-                                  ManagedRegister scratch) OVERRIDE;
-
-  void StoreStackPointerToThread32(ThreadOffset32 thr_offs) OVERRIDE;
-
-  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
-                     ManagedRegister scratch) OVERRIDE;
-
-  // Load routines
-  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
-
-  void LoadFromThread32(ManagedRegister dest, ThreadOffset32 src, size_t size) OVERRIDE;
-
-  void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
-
-  void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
-               bool unpoison_reference) OVERRIDE;
-
-  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
-
-  void LoadRawPtrFromThread32(ManagedRegister dest, ThreadOffset32 offs) OVERRIDE;
-
-  // Copying routines
-  void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
-
-  void CopyRawPtrFromThread32(FrameOffset fr_offs, ThreadOffset32 thr_offs,
-                              ManagedRegister scratch) OVERRIDE;
-
-  void CopyRawPtrToThread32(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
-      OVERRIDE;
-
-  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
-
-  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void MemoryBarrier(ManagedRegister) OVERRIDE;
-
-  // Sign extension
-  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Zero extension
-  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Exploit fast access in managed code to Thread::Current()
-  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
-  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
-
-  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the handle scope entry to see if the value is
-  // null.
-  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
-                              ManagedRegister in_reg, bool null_allowed) OVERRIDE;
-
-  // Set up out_off to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed.
-  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
-                              ManagedRegister scratch, bool null_allowed) OVERRIDE;
-
-  // src holds a handle scope entry (Object**) load this into dst
-  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
-
-  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
-  // know that src may not be null.
-  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
-  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
-
-  // Call to address held at [base+offset]
-  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void CallFromThread32(ThreadOffset32 offset, ManagedRegister scratch) OVERRIDE;
-
-  // Generate code to check if Thread::Current()->exception_ is non-null
-  // and branch to a ExceptionSlowPath if it is.
-  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
-
-  //
   // Heap poisoning.
   //
 
@@ -845,15 +728,6 @@
   EmitUint8(0x66);
 }
 
-// Slowpath entered when Thread::Current()->_exception is non-null
-class X86ExceptionSlowPath FINAL : public SlowPath {
- public:
-  explicit X86ExceptionSlowPath(size_t stack_adjust) : stack_adjust_(stack_adjust) {}
-  virtual void Emit(Assembler *sp_asm) OVERRIDE;
- private:
-  const size_t stack_adjust_;
-};
-
 }  // namespace x86
 }  // namespace art
 
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
new file mode 100644
index 0000000..77af885
--- /dev/null
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -0,0 +1,541 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni_macro_assembler_x86.h"
+
+#include "utils/assembler.h"
+#include "base/casts.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "thread.h"
+
+namespace art {
+namespace x86 {
+
+// Slowpath entered when Thread::Current()->_exception is non-null
+class X86ExceptionSlowPath FINAL : public SlowPath {
+ public:
+  explicit X86ExceptionSlowPath(size_t stack_adjust) : stack_adjust_(stack_adjust) {}
+  virtual void Emit(Assembler *sp_asm) OVERRIDE;
+ private:
+  const size_t stack_adjust_;
+};
+
+static dwarf::Reg DWARFReg(Register reg) {
+  return dwarf::Reg::X86Core(static_cast<int>(reg));
+}
+
+constexpr size_t kFramePointerSize = 4;
+
+#define __ asm_.
+
+void X86JNIMacroAssembler::BuildFrame(size_t frame_size,
+                                      ManagedRegister method_reg,
+                                      ArrayRef<const ManagedRegister> spill_regs,
+                                      const ManagedRegisterEntrySpills& entry_spills) {
+  DCHECK_EQ(CodeSize(), 0U);  // Nothing emitted yet.
+  cfi().SetCurrentCFAOffset(4);  // Return address on stack.
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+  int gpr_count = 0;
+  for (int i = spill_regs.size() - 1; i >= 0; --i) {
+    Register spill = spill_regs[i].AsX86().AsCpuRegister();
+    __ pushl(spill);
+    gpr_count++;
+    cfi().AdjustCFAOffset(kFramePointerSize);
+    cfi().RelOffset(DWARFReg(spill), 0);
+  }
+
+  // return address then method on stack.
+  int32_t adjust = frame_size - gpr_count * kFramePointerSize -
+      kFramePointerSize /*method*/ -
+      kFramePointerSize /*return address*/;
+  __ addl(ESP, Immediate(-adjust));
+  cfi().AdjustCFAOffset(adjust);
+  __ pushl(method_reg.AsX86().AsCpuRegister());
+  cfi().AdjustCFAOffset(kFramePointerSize);
+  DCHECK_EQ(static_cast<size_t>(cfi().GetCurrentCFAOffset()), frame_size);
+
+  for (size_t i = 0; i < entry_spills.size(); ++i) {
+    ManagedRegisterSpill spill = entry_spills.at(i);
+    if (spill.AsX86().IsCpuRegister()) {
+      int offset = frame_size + spill.getSpillOffset();
+      __ movl(Address(ESP, offset), spill.AsX86().AsCpuRegister());
+    } else {
+      DCHECK(spill.AsX86().IsXmmRegister());
+      if (spill.getSize() == 8) {
+        __ movsd(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
+      } else {
+        CHECK_EQ(spill.getSize(), 4);
+        __ movss(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
+      }
+    }
+  }
+}
+
+void X86JNIMacroAssembler::RemoveFrame(size_t frame_size,
+                                       ArrayRef<const ManagedRegister> spill_regs) {
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+  cfi().RememberState();
+  // -kFramePointerSize for ArtMethod*.
+  int adjust = frame_size - spill_regs.size() * kFramePointerSize - kFramePointerSize;
+  __ addl(ESP, Immediate(adjust));
+  cfi().AdjustCFAOffset(-adjust);
+  for (size_t i = 0; i < spill_regs.size(); ++i) {
+    Register spill = spill_regs[i].AsX86().AsCpuRegister();
+    __ popl(spill);
+    cfi().AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
+    cfi().Restore(DWARFReg(spill));
+  }
+  __ ret();
+  // The CFI should be restored for any code that follows the exit block.
+  cfi().RestoreState();
+  cfi().DefCFAOffset(frame_size);
+}
+
+void X86JNIMacroAssembler::IncreaseFrameSize(size_t adjust) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  __ addl(ESP, Immediate(-adjust));
+  cfi().AdjustCFAOffset(adjust);
+}
+
+static void DecreaseFrameSizeImpl(X86Assembler* assembler, size_t adjust) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  assembler->addl(ESP, Immediate(adjust));
+  assembler->cfi().AdjustCFAOffset(-adjust);
+}
+
+void X86JNIMacroAssembler::DecreaseFrameSize(size_t adjust) {
+  DecreaseFrameSizeImpl(&asm_, adjust);
+}
+
+void X86JNIMacroAssembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
+  X86ManagedRegister src = msrc.AsX86();
+  if (src.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (src.IsCpuRegister()) {
+    CHECK_EQ(4u, size);
+    __ movl(Address(ESP, offs), src.AsCpuRegister());
+  } else if (src.IsRegisterPair()) {
+    CHECK_EQ(8u, size);
+    __ movl(Address(ESP, offs), src.AsRegisterPairLow());
+    __ movl(Address(ESP, FrameOffset(offs.Int32Value()+4)), src.AsRegisterPairHigh());
+  } else if (src.IsX87Register()) {
+    if (size == 4) {
+      __ fstps(Address(ESP, offs));
+    } else {
+      __ fstpl(Address(ESP, offs));
+    }
+  } else {
+    CHECK(src.IsXmmRegister());
+    if (size == 4) {
+      __ movss(Address(ESP, offs), src.AsXmmRegister());
+    } else {
+      __ movsd(Address(ESP, offs), src.AsXmmRegister());
+    }
+  }
+}
+
+void X86JNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
+  X86ManagedRegister src = msrc.AsX86();
+  CHECK(src.IsCpuRegister());
+  __ movl(Address(ESP, dest), src.AsCpuRegister());
+}
+
+void X86JNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
+  X86ManagedRegister src = msrc.AsX86();
+  CHECK(src.IsCpuRegister());
+  __ movl(Address(ESP, dest), src.AsCpuRegister());
+}
+
+void X86JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister) {
+  __ movl(Address(ESP, dest), Immediate(imm));
+}
+
+void X86JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                                    FrameOffset fr_offs,
+                                                    ManagedRegister mscratch) {
+  X86ManagedRegister scratch = mscratch.AsX86();
+  CHECK(scratch.IsCpuRegister());
+  __ leal(scratch.AsCpuRegister(), Address(ESP, fr_offs));
+  __ fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
+}
+
+void X86JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) {
+  __ fs()->movl(Address::Absolute(thr_offs), ESP);
+}
+
+void X86JNIMacroAssembler::StoreSpanning(FrameOffset /*dst*/,
+                                         ManagedRegister /*src*/,
+                                         FrameOffset /*in_off*/,
+                                         ManagedRegister /*scratch*/) {
+  UNIMPLEMENTED(FATAL);  // this case only currently exists for ARM
+}
+
+void X86JNIMacroAssembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) {
+  X86ManagedRegister dest = mdest.AsX86();
+  if (dest.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (dest.IsCpuRegister()) {
+    CHECK_EQ(4u, size);
+    __ movl(dest.AsCpuRegister(), Address(ESP, src));
+  } else if (dest.IsRegisterPair()) {
+    CHECK_EQ(8u, size);
+    __ movl(dest.AsRegisterPairLow(), Address(ESP, src));
+    __ movl(dest.AsRegisterPairHigh(), Address(ESP, FrameOffset(src.Int32Value()+4)));
+  } else if (dest.IsX87Register()) {
+    if (size == 4) {
+      __ flds(Address(ESP, src));
+    } else {
+      __ fldl(Address(ESP, src));
+    }
+  } else {
+    CHECK(dest.IsXmmRegister());
+    if (size == 4) {
+      __ movss(dest.AsXmmRegister(), Address(ESP, src));
+    } else {
+      __ movsd(dest.AsXmmRegister(), Address(ESP, src));
+    }
+  }
+}
+
+void X86JNIMacroAssembler::LoadFromThread(ManagedRegister mdest, ThreadOffset32 src, size_t size) {
+  X86ManagedRegister dest = mdest.AsX86();
+  if (dest.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (dest.IsCpuRegister()) {
+    CHECK_EQ(4u, size);
+    __ fs()->movl(dest.AsCpuRegister(), Address::Absolute(src));
+  } else if (dest.IsRegisterPair()) {
+    CHECK_EQ(8u, size);
+    __ fs()->movl(dest.AsRegisterPairLow(), Address::Absolute(src));
+    __ fs()->movl(dest.AsRegisterPairHigh(), Address::Absolute(ThreadOffset32(src.Int32Value()+4)));
+  } else if (dest.IsX87Register()) {
+    if (size == 4) {
+      __ fs()->flds(Address::Absolute(src));
+    } else {
+      __ fs()->fldl(Address::Absolute(src));
+    }
+  } else {
+    CHECK(dest.IsXmmRegister());
+    if (size == 4) {
+      __ fs()->movss(dest.AsXmmRegister(), Address::Absolute(src));
+    } else {
+      __ fs()->movsd(dest.AsXmmRegister(), Address::Absolute(src));
+    }
+  }
+}
+
+void X86JNIMacroAssembler::LoadRef(ManagedRegister mdest, FrameOffset src) {
+  X86ManagedRegister dest = mdest.AsX86();
+  CHECK(dest.IsCpuRegister());
+  __ movl(dest.AsCpuRegister(), Address(ESP, src));
+}
+
+void X86JNIMacroAssembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
+                           bool unpoison_reference) {
+  X86ManagedRegister dest = mdest.AsX86();
+  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
+  __ movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs));
+  if (unpoison_reference) {
+    __ MaybeUnpoisonHeapReference(dest.AsCpuRegister());
+  }
+}
+
+void X86JNIMacroAssembler::LoadRawPtr(ManagedRegister mdest,
+                                      ManagedRegister base,
+                                      Offset offs) {
+  X86ManagedRegister dest = mdest.AsX86();
+  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
+  __ movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs));
+}
+
+void X86JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) {
+  X86ManagedRegister dest = mdest.AsX86();
+  CHECK(dest.IsCpuRegister());
+  __ fs()->movl(dest.AsCpuRegister(), Address::Absolute(offs));
+}
+
+void X86JNIMacroAssembler::SignExtend(ManagedRegister mreg, size_t size) {
+  X86ManagedRegister reg = mreg.AsX86();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsCpuRegister()) << reg;
+  if (size == 1) {
+    __ movsxb(reg.AsCpuRegister(), reg.AsByteRegister());
+  } else {
+    __ movsxw(reg.AsCpuRegister(), reg.AsCpuRegister());
+  }
+}
+
+void X86JNIMacroAssembler::ZeroExtend(ManagedRegister mreg, size_t size) {
+  X86ManagedRegister reg = mreg.AsX86();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsCpuRegister()) << reg;
+  if (size == 1) {
+    __ movzxb(reg.AsCpuRegister(), reg.AsByteRegister());
+  } else {
+    __ movzxw(reg.AsCpuRegister(), reg.AsCpuRegister());
+  }
+}
+
+void X86JNIMacroAssembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
+  X86ManagedRegister dest = mdest.AsX86();
+  X86ManagedRegister src = msrc.AsX86();
+  if (!dest.Equals(src)) {
+    if (dest.IsCpuRegister() && src.IsCpuRegister()) {
+      __ movl(dest.AsCpuRegister(), src.AsCpuRegister());
+    } else if (src.IsX87Register() && dest.IsXmmRegister()) {
+      // Pass via stack and pop X87 register
+      __ subl(ESP, Immediate(16));
+      if (size == 4) {
+        CHECK_EQ(src.AsX87Register(), ST0);
+        __ fstps(Address(ESP, 0));
+        __ movss(dest.AsXmmRegister(), Address(ESP, 0));
+      } else {
+        CHECK_EQ(src.AsX87Register(), ST0);
+        __ fstpl(Address(ESP, 0));
+        __ movsd(dest.AsXmmRegister(), Address(ESP, 0));
+      }
+      __ addl(ESP, Immediate(16));
+    } else {
+      // TODO: x87, SSE
+      UNIMPLEMENTED(FATAL) << ": Move " << dest << ", " << src;
+    }
+  }
+}
+
+void X86JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) {
+  X86ManagedRegister scratch = mscratch.AsX86();
+  CHECK(scratch.IsCpuRegister());
+  __ movl(scratch.AsCpuRegister(), Address(ESP, src));
+  __ movl(Address(ESP, dest), scratch.AsCpuRegister());
+}
+
+void X86JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                                ThreadOffset32 thr_offs,
+                                                ManagedRegister mscratch) {
+  X86ManagedRegister scratch = mscratch.AsX86();
+  CHECK(scratch.IsCpuRegister());
+  __ fs()->movl(scratch.AsCpuRegister(), Address::Absolute(thr_offs));
+  Store(fr_offs, scratch, 4);
+}
+
+void X86JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs,
+                                              FrameOffset fr_offs,
+                                              ManagedRegister mscratch) {
+  X86ManagedRegister scratch = mscratch.AsX86();
+  CHECK(scratch.IsCpuRegister());
+  Load(scratch, fr_offs, 4);
+  __ fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
+}
+
+void X86JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src,
+                        ManagedRegister mscratch,
+                        size_t size) {
+  X86ManagedRegister scratch = mscratch.AsX86();
+  if (scratch.IsCpuRegister() && size == 8) {
+    Load(scratch, src, 4);
+    Store(dest, scratch, 4);
+    Load(scratch, FrameOffset(src.Int32Value() + 4), 4);
+    Store(FrameOffset(dest.Int32Value() + 4), scratch, 4);
+  } else {
+    Load(scratch, src, size);
+    Store(dest, scratch, size);
+  }
+}
+
+void X86JNIMacroAssembler::Copy(FrameOffset /*dst*/,
+                                ManagedRegister /*src_base*/,
+                                Offset /*src_offset*/,
+                                ManagedRegister /*scratch*/,
+                                size_t /*size*/) {
+  UNIMPLEMENTED(FATAL);
+}
+
+void X86JNIMacroAssembler::Copy(ManagedRegister dest_base,
+                                Offset dest_offset,
+                                FrameOffset src,
+                                ManagedRegister scratch,
+                                size_t size) {
+  CHECK(scratch.IsNoRegister());
+  CHECK_EQ(size, 4u);
+  __ pushl(Address(ESP, src));
+  __ popl(Address(dest_base.AsX86().AsCpuRegister(), dest_offset));
+}
+
+void X86JNIMacroAssembler::Copy(FrameOffset dest,
+                                FrameOffset src_base,
+                                Offset src_offset,
+                                ManagedRegister mscratch,
+                                size_t size) {
+  Register scratch = mscratch.AsX86().AsCpuRegister();
+  CHECK_EQ(size, 4u);
+  __ movl(scratch, Address(ESP, src_base));
+  __ movl(scratch, Address(scratch, src_offset));
+  __ movl(Address(ESP, dest), scratch);
+}
+
+void X86JNIMacroAssembler::Copy(ManagedRegister dest,
+                                Offset dest_offset,
+                                ManagedRegister src,
+                                Offset src_offset,
+                                ManagedRegister scratch,
+                                size_t size) {
+  CHECK_EQ(size, 4u);
+  CHECK(scratch.IsNoRegister());
+  __ pushl(Address(src.AsX86().AsCpuRegister(), src_offset));
+  __ popl(Address(dest.AsX86().AsCpuRegister(), dest_offset));
+}
+
+void X86JNIMacroAssembler::Copy(FrameOffset dest,
+                                Offset dest_offset,
+                                FrameOffset src,
+                                Offset src_offset,
+                                ManagedRegister mscratch,
+                                size_t size) {
+  Register scratch = mscratch.AsX86().AsCpuRegister();
+  CHECK_EQ(size, 4u);
+  CHECK_EQ(dest.Int32Value(), src.Int32Value());
+  __ movl(scratch, Address(ESP, src));
+  __ pushl(Address(scratch, src_offset));
+  __ popl(Address(scratch, dest_offset));
+}
+
+void X86JNIMacroAssembler::MemoryBarrier(ManagedRegister) {
+  __ mfence();
+}
+
+void X86JNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
+                                                  FrameOffset handle_scope_offset,
+                                                  ManagedRegister min_reg,
+                                                  bool null_allowed) {
+  X86ManagedRegister out_reg = mout_reg.AsX86();
+  X86ManagedRegister in_reg = min_reg.AsX86();
+  CHECK(in_reg.IsCpuRegister());
+  CHECK(out_reg.IsCpuRegister());
+  VerifyObject(in_reg, null_allowed);
+  if (null_allowed) {
+    Label null_arg;
+    if (!out_reg.Equals(in_reg)) {
+      __ xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
+    }
+    __ testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
+    __ j(kZero, &null_arg);
+    __ leal(out_reg.AsCpuRegister(), Address(ESP, handle_scope_offset));
+    __ Bind(&null_arg);
+  } else {
+    __ leal(out_reg.AsCpuRegister(), Address(ESP, handle_scope_offset));
+  }
+}
+
+void X86JNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off,
+                                                  FrameOffset handle_scope_offset,
+                                                  ManagedRegister mscratch,
+                                                  bool null_allowed) {
+  X86ManagedRegister scratch = mscratch.AsX86();
+  CHECK(scratch.IsCpuRegister());
+  if (null_allowed) {
+    Label null_arg;
+    __ movl(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
+    __ testl(scratch.AsCpuRegister(), scratch.AsCpuRegister());
+    __ j(kZero, &null_arg);
+    __ leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
+    __ Bind(&null_arg);
+  } else {
+    __ leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
+  }
+  Store(out_off, scratch, 4);
+}
+
+// Given a handle scope entry, load the associated reference.
+void X86JNIMacroAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
+                                                        ManagedRegister min_reg) {
+  X86ManagedRegister out_reg = mout_reg.AsX86();
+  X86ManagedRegister in_reg = min_reg.AsX86();
+  CHECK(out_reg.IsCpuRegister());
+  CHECK(in_reg.IsCpuRegister());
+  Label null_arg;
+  if (!out_reg.Equals(in_reg)) {
+    __ xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
+  }
+  __ testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
+  __ j(kZero, &null_arg);
+  __ movl(out_reg.AsCpuRegister(), Address(in_reg.AsCpuRegister(), 0));
+  __ Bind(&null_arg);
+}
+
+void X86JNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references
+}
+
+void X86JNIMacroAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references
+}
+
+void X86JNIMacroAssembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) {
+  X86ManagedRegister base = mbase.AsX86();
+  CHECK(base.IsCpuRegister());
+  __ call(Address(base.AsCpuRegister(), offset.Int32Value()));
+  // TODO: place reference map on call
+}
+
+void X86JNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
+  Register scratch = mscratch.AsX86().AsCpuRegister();
+  __ movl(scratch, Address(ESP, base));
+  __ call(Address(scratch, offset));
+}
+
+void X86JNIMacroAssembler::CallFromThread(ThreadOffset32 offset, ManagedRegister /*mscratch*/) {
+  __ fs()->call(Address::Absolute(offset));
+}
+
+void X86JNIMacroAssembler::GetCurrentThread(ManagedRegister tr) {
+  __ fs()->movl(tr.AsX86().AsCpuRegister(),
+                Address::Absolute(Thread::SelfOffset<kX86PointerSize>()));
+}
+
+void X86JNIMacroAssembler::GetCurrentThread(FrameOffset offset,
+                                    ManagedRegister mscratch) {
+  X86ManagedRegister scratch = mscratch.AsX86();
+  __ fs()->movl(scratch.AsCpuRegister(), Address::Absolute(Thread::SelfOffset<kX86PointerSize>()));
+  __ movl(Address(ESP, offset), scratch.AsCpuRegister());
+}
+
+void X86JNIMacroAssembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
+  X86ExceptionSlowPath* slow = new (__ GetArena()) X86ExceptionSlowPath(stack_adjust);
+  __ GetBuffer()->EnqueueSlowPath(slow);
+  __ fs()->cmpl(Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>()), Immediate(0));
+  __ j(kNotEqual, slow->Entry());
+}
+
+#undef __
+
+void X86ExceptionSlowPath::Emit(Assembler *sasm) {
+  X86Assembler* sp_asm = down_cast<X86Assembler*>(sasm);
+#define __ sp_asm->
+  __ Bind(&entry_);
+  // Note: the return value is dead
+  if (stack_adjust_ != 0) {  // Fix up the frame.
+    DecreaseFrameSizeImpl(sp_asm, stack_adjust_);
+  }
+  // Pass exception as argument in EAX
+  __ fs()->movl(EAX, Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>()));
+  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pDeliverException)));
+  // this call should never return
+  __ int3();
+#undef __
+}
+
+}  // namespace x86
+}  // namespace art
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.h b/compiler/utils/x86/jni_macro_assembler_x86.h
new file mode 100644
index 0000000..3f07ede
--- /dev/null
+++ b/compiler/utils/x86/jni_macro_assembler_x86.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_X86_JNI_MACRO_ASSEMBLER_X86_H_
+#define ART_COMPILER_UTILS_X86_JNI_MACRO_ASSEMBLER_X86_H_
+
+#include <vector>
+
+#include "assembler_x86.h"
+#include "base/arena_containers.h"
+#include "base/enums.h"
+#include "base/macros.h"
+#include "offsets.h"
+#include "utils/array_ref.h"
+#include "utils/jni_macro_assembler.h"
+
+namespace art {
+namespace x86 {
+
+class X86JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<X86Assembler, PointerSize::k32> {
+ public:
+  explicit X86JNIMacroAssembler(ArenaAllocator* arena) : JNIMacroAssemblerFwd(arena) {}
+  virtual ~X86JNIMacroAssembler() {}
+
+  //
+  // Overridden common assembler high-level functionality
+  //
+
+  // Emit code that will create an activation on the stack
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
+                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
+
+  // Emit code that will remove an activation from the stack
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
+      OVERRIDE;
+
+  void IncreaseFrameSize(size_t adjust) OVERRIDE;
+  void DecreaseFrameSize(size_t adjust) OVERRIDE;
+
+  // Store routines
+  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
+  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
+  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
+
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
+
+  void StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                FrameOffset fr_offs,
+                                ManagedRegister scratch) OVERRIDE;
+
+  void StoreStackPointerToThread(ThreadOffset32 thr_offs) OVERRIDE;
+
+  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
+                     ManagedRegister scratch) OVERRIDE;
+
+  // Load routines
+  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
+
+  void LoadFromThread(ManagedRegister dest, ThreadOffset32 src, size_t size) OVERRIDE;
+
+  void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
+
+  void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
+               bool unpoison_reference) OVERRIDE;
+
+  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
+
+  void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) OVERRIDE;
+
+  // Copying routines
+  void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
+
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset32 thr_offs,
+                            ManagedRegister scratch) OVERRIDE;
+
+  void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
+      OVERRIDE;
+
+  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
+
+  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
+            ManagedRegister scratch, size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
+            ManagedRegister scratch, size_t size) OVERRIDE;
+
+  void MemoryBarrier(ManagedRegister) OVERRIDE;
+
+  // Sign extension
+  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Zero extension
+  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Exploit fast access in managed code to Thread::Current()
+  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
+  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
+
+  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed. in_reg holds a possibly stale reference
+  // that can be used to avoid loading the handle scope entry to see if the value is
+  // null.
+  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
+                              ManagedRegister in_reg, bool null_allowed) OVERRIDE;
+
+  // Set up out_off to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed.
+  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
+                              ManagedRegister scratch, bool null_allowed) OVERRIDE;
+
+  // src holds a handle scope entry (Object**) load this into dst
+  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
+
+  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
+  // know that src may not be null.
+  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
+  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
+
+  // Call to address held at [base+offset]
+  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) OVERRIDE;
+
+  // Generate code to check if Thread::Current()->exception_ is non-null
+  // and branch to a ExceptionSlowPath if it is.
+  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(X86JNIMacroAssembler);
+};
+
+}  // namespace x86
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_X86_JNI_MACRO_ASSEMBLER_X86_H_
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 977ce9d..ddc8244 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -2639,547 +2639,6 @@
   }
 }
 
-static dwarf::Reg DWARFReg(Register reg) {
-  return dwarf::Reg::X86_64Core(static_cast<int>(reg));
-}
-static dwarf::Reg DWARFReg(FloatRegister reg) {
-  return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
-}
-
-constexpr size_t kFramePointerSize = 8;
-
-void X86_64Assembler::BuildFrame(size_t frame_size,
-                                 ManagedRegister method_reg,
-                                 ArrayRef<const ManagedRegister> spill_regs,
-                                 const ManagedRegisterEntrySpills& entry_spills) {
-  DCHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet.
-  cfi_.SetCurrentCFAOffset(8);  // Return address on stack.
-  CHECK_ALIGNED(frame_size, kStackAlignment);
-  int gpr_count = 0;
-  for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
-    if (spill.IsCpuRegister()) {
-      pushq(spill.AsCpuRegister());
-      gpr_count++;
-      cfi_.AdjustCFAOffset(kFramePointerSize);
-      cfi_.RelOffset(DWARFReg(spill.AsCpuRegister().AsRegister()), 0);
-    }
-  }
-  // return address then method on stack.
-  int64_t rest_of_frame = static_cast<int64_t>(frame_size)
-                          - (gpr_count * kFramePointerSize)
-                          - kFramePointerSize /*return address*/;
-  subq(CpuRegister(RSP), Immediate(rest_of_frame));
-  cfi_.AdjustCFAOffset(rest_of_frame);
-
-  // spill xmms
-  int64_t offset = rest_of_frame;
-  for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
-    if (spill.IsXmmRegister()) {
-      offset -= sizeof(double);
-      movsd(Address(CpuRegister(RSP), offset), spill.AsXmmRegister());
-      cfi_.RelOffset(DWARFReg(spill.AsXmmRegister().AsFloatRegister()), offset);
-    }
-  }
-
-  static_assert(static_cast<size_t>(kX86_64PointerSize) == kFramePointerSize,
-                "Unexpected frame pointer size.");
-
-  movq(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister());
-
-  for (size_t i = 0; i < entry_spills.size(); ++i) {
-    ManagedRegisterSpill spill = entry_spills.at(i);
-    if (spill.AsX86_64().IsCpuRegister()) {
-      if (spill.getSize() == 8) {
-        movq(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
-             spill.AsX86_64().AsCpuRegister());
-      } else {
-        CHECK_EQ(spill.getSize(), 4);
-        movl(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), spill.AsX86_64().AsCpuRegister());
-      }
-    } else {
-      if (spill.getSize() == 8) {
-        movsd(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), spill.AsX86_64().AsXmmRegister());
-      } else {
-        CHECK_EQ(spill.getSize(), 4);
-        movss(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), spill.AsX86_64().AsXmmRegister());
-      }
-    }
-  }
-}
-
-void X86_64Assembler::RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> spill_regs) {
-  CHECK_ALIGNED(frame_size, kStackAlignment);
-  cfi_.RememberState();
-  int gpr_count = 0;
-  // unspill xmms
-  int64_t offset = static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - 2 * kFramePointerSize;
-  for (size_t i = 0; i < spill_regs.size(); ++i) {
-    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
-    if (spill.IsXmmRegister()) {
-      offset += sizeof(double);
-      movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset));
-      cfi_.Restore(DWARFReg(spill.AsXmmRegister().AsFloatRegister()));
-    } else {
-      gpr_count++;
-    }
-  }
-  int adjust = static_cast<int>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize;
-  addq(CpuRegister(RSP), Immediate(adjust));
-  cfi_.AdjustCFAOffset(-adjust);
-  for (size_t i = 0; i < spill_regs.size(); ++i) {
-    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
-    if (spill.IsCpuRegister()) {
-      popq(spill.AsCpuRegister());
-      cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
-      cfi_.Restore(DWARFReg(spill.AsCpuRegister().AsRegister()));
-    }
-  }
-  ret();
-  // The CFI should be restored for any code that follows the exit block.
-  cfi_.RestoreState();
-  cfi_.DefCFAOffset(frame_size);
-}
-
-void X86_64Assembler::IncreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(adjust)));
-  cfi_.AdjustCFAOffset(adjust);
-}
-
-void X86_64Assembler::DecreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  addq(CpuRegister(RSP), Immediate(adjust));
-  cfi_.AdjustCFAOffset(-adjust);
-}
-
-void X86_64Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
-  X86_64ManagedRegister src = msrc.AsX86_64();
-  if (src.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (src.IsCpuRegister()) {
-    if (size == 4) {
-      CHECK_EQ(4u, size);
-      movl(Address(CpuRegister(RSP), offs), src.AsCpuRegister());
-    } else {
-      CHECK_EQ(8u, size);
-      movq(Address(CpuRegister(RSP), offs), src.AsCpuRegister());
-    }
-  } else if (src.IsRegisterPair()) {
-    CHECK_EQ(0u, size);
-    movq(Address(CpuRegister(RSP), offs), src.AsRegisterPairLow());
-    movq(Address(CpuRegister(RSP), FrameOffset(offs.Int32Value()+4)),
-         src.AsRegisterPairHigh());
-  } else if (src.IsX87Register()) {
-    if (size == 4) {
-      fstps(Address(CpuRegister(RSP), offs));
-    } else {
-      fstpl(Address(CpuRegister(RSP), offs));
-    }
-  } else {
-    CHECK(src.IsXmmRegister());
-    if (size == 4) {
-      movss(Address(CpuRegister(RSP), offs), src.AsXmmRegister());
-    } else {
-      movsd(Address(CpuRegister(RSP), offs), src.AsXmmRegister());
-    }
-  }
-}
-
-void X86_64Assembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
-  X86_64ManagedRegister src = msrc.AsX86_64();
-  CHECK(src.IsCpuRegister());
-  movl(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
-}
-
-void X86_64Assembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
-  X86_64ManagedRegister src = msrc.AsX86_64();
-  CHECK(src.IsCpuRegister());
-  movq(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
-}
-
-void X86_64Assembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm,
-                                            ManagedRegister) {
-  movl(Address(CpuRegister(RSP), dest), Immediate(imm));  // TODO(64) movq?
-}
-
-void X86_64Assembler::StoreImmediateToThread64(ThreadOffset64 dest, uint32_t imm, ManagedRegister) {
-  gs()->movl(Address::Absolute(dest, true), Immediate(imm));  // TODO(64) movq?
-}
-
-void X86_64Assembler::StoreStackOffsetToThread64(ThreadOffset64 thr_offs,
-                                                 FrameOffset fr_offs,
-                                                 ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
-  leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), fr_offs));
-  gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
-}
-
-void X86_64Assembler::StoreStackPointerToThread64(ThreadOffset64 thr_offs) {
-  gs()->movq(Address::Absolute(thr_offs, true), CpuRegister(RSP));
-}
-
-void X86_64Assembler::StoreSpanning(FrameOffset /*dst*/, ManagedRegister /*src*/,
-                                 FrameOffset /*in_off*/, ManagedRegister /*scratch*/) {
-  UNIMPLEMENTED(FATAL);  // this case only currently exists for ARM
-}
-
-void X86_64Assembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) {
-  X86_64ManagedRegister dest = mdest.AsX86_64();
-  if (dest.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (dest.IsCpuRegister()) {
-    if (size == 4) {
-      CHECK_EQ(4u, size);
-      movl(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
-    } else {
-      CHECK_EQ(8u, size);
-      movq(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
-    }
-  } else if (dest.IsRegisterPair()) {
-    CHECK_EQ(0u, size);
-    movq(dest.AsRegisterPairLow(), Address(CpuRegister(RSP), src));
-    movq(dest.AsRegisterPairHigh(), Address(CpuRegister(RSP), FrameOffset(src.Int32Value()+4)));
-  } else if (dest.IsX87Register()) {
-    if (size == 4) {
-      flds(Address(CpuRegister(RSP), src));
-    } else {
-      fldl(Address(CpuRegister(RSP), src));
-    }
-  } else {
-    CHECK(dest.IsXmmRegister());
-    if (size == 4) {
-      movss(dest.AsXmmRegister(), Address(CpuRegister(RSP), src));
-    } else {
-      movsd(dest.AsXmmRegister(), Address(CpuRegister(RSP), src));
-    }
-  }
-}
-
-void X86_64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset64 src, size_t size) {
-  X86_64ManagedRegister dest = mdest.AsX86_64();
-  if (dest.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (dest.IsCpuRegister()) {
-    CHECK_EQ(4u, size);
-    gs()->movl(dest.AsCpuRegister(), Address::Absolute(src, true));
-  } else if (dest.IsRegisterPair()) {
-    CHECK_EQ(8u, size);
-    gs()->movq(dest.AsRegisterPairLow(), Address::Absolute(src, true));
-  } else if (dest.IsX87Register()) {
-    if (size == 4) {
-      gs()->flds(Address::Absolute(src, true));
-    } else {
-      gs()->fldl(Address::Absolute(src, true));
-    }
-  } else {
-    CHECK(dest.IsXmmRegister());
-    if (size == 4) {
-      gs()->movss(dest.AsXmmRegister(), Address::Absolute(src, true));
-    } else {
-      gs()->movsd(dest.AsXmmRegister(), Address::Absolute(src, true));
-    }
-  }
-}
-
-void X86_64Assembler::LoadRef(ManagedRegister mdest, FrameOffset src) {
-  X86_64ManagedRegister dest = mdest.AsX86_64();
-  CHECK(dest.IsCpuRegister());
-  movq(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
-}
-
-void X86_64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
-                              bool unpoison_reference) {
-  X86_64ManagedRegister dest = mdest.AsX86_64();
-  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
-  movl(dest.AsCpuRegister(), Address(base.AsX86_64().AsCpuRegister(), offs));
-  if (unpoison_reference) {
-    MaybeUnpoisonHeapReference(dest.AsCpuRegister());
-  }
-}
-
-void X86_64Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base,
-                              Offset offs) {
-  X86_64ManagedRegister dest = mdest.AsX86_64();
-  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
-  movq(dest.AsCpuRegister(), Address(base.AsX86_64().AsCpuRegister(), offs));
-}
-
-void X86_64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset64 offs) {
-  X86_64ManagedRegister dest = mdest.AsX86_64();
-  CHECK(dest.IsCpuRegister());
-  gs()->movq(dest.AsCpuRegister(), Address::Absolute(offs, true));
-}
-
-void X86_64Assembler::SignExtend(ManagedRegister mreg, size_t size) {
-  X86_64ManagedRegister reg = mreg.AsX86_64();
-  CHECK(size == 1 || size == 2) << size;
-  CHECK(reg.IsCpuRegister()) << reg;
-  if (size == 1) {
-    movsxb(reg.AsCpuRegister(), reg.AsCpuRegister());
-  } else {
-    movsxw(reg.AsCpuRegister(), reg.AsCpuRegister());
-  }
-}
-
-void X86_64Assembler::ZeroExtend(ManagedRegister mreg, size_t size) {
-  X86_64ManagedRegister reg = mreg.AsX86_64();
-  CHECK(size == 1 || size == 2) << size;
-  CHECK(reg.IsCpuRegister()) << reg;
-  if (size == 1) {
-    movzxb(reg.AsCpuRegister(), reg.AsCpuRegister());
-  } else {
-    movzxw(reg.AsCpuRegister(), reg.AsCpuRegister());
-  }
-}
-
-void X86_64Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
-  X86_64ManagedRegister dest = mdest.AsX86_64();
-  X86_64ManagedRegister src = msrc.AsX86_64();
-  if (!dest.Equals(src)) {
-    if (dest.IsCpuRegister() && src.IsCpuRegister()) {
-      movq(dest.AsCpuRegister(), src.AsCpuRegister());
-    } else if (src.IsX87Register() && dest.IsXmmRegister()) {
-      // Pass via stack and pop X87 register
-      subl(CpuRegister(RSP), Immediate(16));
-      if (size == 4) {
-        CHECK_EQ(src.AsX87Register(), ST0);
-        fstps(Address(CpuRegister(RSP), 0));
-        movss(dest.AsXmmRegister(), Address(CpuRegister(RSP), 0));
-      } else {
-        CHECK_EQ(src.AsX87Register(), ST0);
-        fstpl(Address(CpuRegister(RSP), 0));
-        movsd(dest.AsXmmRegister(), Address(CpuRegister(RSP), 0));
-      }
-      addq(CpuRegister(RSP), Immediate(16));
-    } else {
-      // TODO: x87, SSE
-      UNIMPLEMENTED(FATAL) << ": Move " << dest << ", " << src;
-    }
-  }
-}
-
-void X86_64Assembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
-  movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), src));
-  movl(Address(CpuRegister(RSP), dest), scratch.AsCpuRegister());
-}
-
-void X86_64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
-                                             ThreadOffset64 thr_offs,
-                                             ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
-  gs()->movq(scratch.AsCpuRegister(), Address::Absolute(thr_offs, true));
-  Store(fr_offs, scratch, 8);
-}
-
-void X86_64Assembler::CopyRawPtrToThread64(ThreadOffset64 thr_offs,
-                                           FrameOffset fr_offs,
-                                           ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
-  Load(scratch, fr_offs, 8);
-  gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
-}
-
-void X86_64Assembler::Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch,
-                           size_t size) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  if (scratch.IsCpuRegister() && size == 8) {
-    Load(scratch, src, 4);
-    Store(dest, scratch, 4);
-    Load(scratch, FrameOffset(src.Int32Value() + 4), 4);
-    Store(FrameOffset(dest.Int32Value() + 4), scratch, 4);
-  } else {
-    Load(scratch, src, size);
-    Store(dest, scratch, size);
-  }
-}
-
-void X86_64Assembler::Copy(FrameOffset /*dst*/, ManagedRegister /*src_base*/, Offset /*src_offset*/,
-                        ManagedRegister /*scratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void X86_64Assembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
-                        ManagedRegister scratch, size_t size) {
-  CHECK(scratch.IsNoRegister());
-  CHECK_EQ(size, 4u);
-  pushq(Address(CpuRegister(RSP), src));
-  popq(Address(dest_base.AsX86_64().AsCpuRegister(), dest_offset));
-}
-
-void X86_64Assembler::Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset,
-                        ManagedRegister mscratch, size_t size) {
-  CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
-  CHECK_EQ(size, 4u);
-  movq(scratch, Address(CpuRegister(RSP), src_base));
-  movq(scratch, Address(scratch, src_offset));
-  movq(Address(CpuRegister(RSP), dest), scratch);
-}
-
-void X86_64Assembler::Copy(ManagedRegister dest, Offset dest_offset,
-                        ManagedRegister src, Offset src_offset,
-                        ManagedRegister scratch, size_t size) {
-  CHECK_EQ(size, 4u);
-  CHECK(scratch.IsNoRegister());
-  pushq(Address(src.AsX86_64().AsCpuRegister(), src_offset));
-  popq(Address(dest.AsX86_64().AsCpuRegister(), dest_offset));
-}
-
-void X86_64Assembler::Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-                        ManagedRegister mscratch, size_t size) {
-  CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
-  CHECK_EQ(size, 4u);
-  CHECK_EQ(dest.Int32Value(), src.Int32Value());
-  movq(scratch, Address(CpuRegister(RSP), src));
-  pushq(Address(scratch, src_offset));
-  popq(Address(scratch, dest_offset));
-}
-
-void X86_64Assembler::MemoryBarrier(ManagedRegister) {
-  mfence();
-}
-
-void X86_64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
-                                   FrameOffset handle_scope_offset,
-                                   ManagedRegister min_reg, bool null_allowed) {
-  X86_64ManagedRegister out_reg = mout_reg.AsX86_64();
-  X86_64ManagedRegister in_reg = min_reg.AsX86_64();
-  if (in_reg.IsNoRegister()) {  // TODO(64): && null_allowed
-    // Use out_reg as indicator of null.
-    in_reg = out_reg;
-    // TODO: movzwl
-    movl(in_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
-  }
-  CHECK(in_reg.IsCpuRegister());
-  CHECK(out_reg.IsCpuRegister());
-  VerifyObject(in_reg, null_allowed);
-  if (null_allowed) {
-    Label null_arg;
-    if (!out_reg.Equals(in_reg)) {
-      xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
-    }
-    testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
-    j(kZero, &null_arg);
-    leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
-    Bind(&null_arg);
-  } else {
-    leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
-  }
-}
-
-void X86_64Assembler::CreateHandleScopeEntry(FrameOffset out_off,
-                                   FrameOffset handle_scope_offset,
-                                   ManagedRegister mscratch,
-                                   bool null_allowed) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
-  if (null_allowed) {
-    Label null_arg;
-    movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
-    testl(scratch.AsCpuRegister(), scratch.AsCpuRegister());
-    j(kZero, &null_arg);
-    leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
-    Bind(&null_arg);
-  } else {
-    leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
-  }
-  Store(out_off, scratch, 8);
-}
-
-// Given a handle scope entry, load the associated reference.
-void X86_64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
-                                         ManagedRegister min_reg) {
-  X86_64ManagedRegister out_reg = mout_reg.AsX86_64();
-  X86_64ManagedRegister in_reg = min_reg.AsX86_64();
-  CHECK(out_reg.IsCpuRegister());
-  CHECK(in_reg.IsCpuRegister());
-  Label null_arg;
-  if (!out_reg.Equals(in_reg)) {
-    xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
-  }
-  testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
-  j(kZero, &null_arg);
-  movq(out_reg.AsCpuRegister(), Address(in_reg.AsCpuRegister(), 0));
-  Bind(&null_arg);
-}
-
-void X86_64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references
-}
-
-void X86_64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references
-}
-
-void X86_64Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) {
-  X86_64ManagedRegister base = mbase.AsX86_64();
-  CHECK(base.IsCpuRegister());
-  call(Address(base.AsCpuRegister(), offset.Int32Value()));
-  // TODO: place reference map on call
-}
-
-void X86_64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
-  CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
-  movq(scratch, Address(CpuRegister(RSP), base));
-  call(Address(scratch, offset));
-}
-
-void X86_64Assembler::CallFromThread64(ThreadOffset64 offset, ManagedRegister /*mscratch*/) {
-  gs()->call(Address::Absolute(offset, true));
-}
-
-void X86_64Assembler::GetCurrentThread(ManagedRegister tr) {
-  gs()->movq(tr.AsX86_64().AsCpuRegister(),
-             Address::Absolute(Thread::SelfOffset<kX86_64PointerSize>(), true));
-}
-
-void X86_64Assembler::GetCurrentThread(FrameOffset offset, ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  gs()->movq(scratch.AsCpuRegister(),
-             Address::Absolute(Thread::SelfOffset<kX86_64PointerSize>(), true));
-  movq(Address(CpuRegister(RSP), offset), scratch.AsCpuRegister());
-}
-
-// Slowpath entered when Thread::Current()->_exception is non-null
-class X86_64ExceptionSlowPath FINAL : public SlowPath {
- public:
-  explicit X86_64ExceptionSlowPath(size_t stack_adjust) : stack_adjust_(stack_adjust) {}
-  virtual void Emit(Assembler *sp_asm) OVERRIDE;
- private:
-  const size_t stack_adjust_;
-};
-
-void X86_64Assembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
-  X86_64ExceptionSlowPath* slow = new (GetArena()) X86_64ExceptionSlowPath(stack_adjust);
-  buffer_.EnqueueSlowPath(slow);
-  gs()->cmpl(Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>(), true), Immediate(0));
-  j(kNotEqual, slow->Entry());
-}
-
-void X86_64ExceptionSlowPath::Emit(Assembler *sasm) {
-  X86_64Assembler* sp_asm = down_cast<X86_64Assembler*>(sasm);
-#define __ sp_asm->
-  __ Bind(&entry_);
-  // Note: the return value is dead
-  if (stack_adjust_ != 0) {  // Fix up the frame.
-    __ DecreaseFrameSize(stack_adjust_);
-  }
-  // Pass exception as argument in RDI
-  __ gs()->movq(CpuRegister(RDI),
-                Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>(), true));
-  __ gs()->call(
-      Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, pDeliverException), true));
-  // this call should never return
-  __ int3();
-#undef __
-}
-
 void X86_64Assembler::AddConstantArea() {
   ArrayRef<const int32_t> area = constant_area_.GetBuffer();
   for (size_t i = 0, e = area.size(); i < e; i++) {
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 52e39cf..370f49c 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -28,6 +28,7 @@
 #include "offsets.h"
 #include "utils/array_ref.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 
 namespace art {
 namespace x86_64 {
@@ -699,125 +700,6 @@
   }
   void Bind(NearLabel* label);
 
-  //
-  // Overridden common assembler high-level functionality
-  //
-
-  // Emit code that will create an activation on the stack
-  void BuildFrame(size_t frame_size,
-                  ManagedRegister method_reg,
-                  ArrayRef<const ManagedRegister> callee_save_regs,
-                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
-
-  // Emit code that will remove an activation from the stack
-  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
-      OVERRIDE;
-
-  void IncreaseFrameSize(size_t adjust) OVERRIDE;
-  void DecreaseFrameSize(size_t adjust) OVERRIDE;
-
-  // Store routines
-  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
-  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
-  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
-
-  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
-
-  void StoreImmediateToThread64(ThreadOffset64 dest, uint32_t imm, ManagedRegister scratch)
-      OVERRIDE;
-
-  void StoreStackOffsetToThread64(ThreadOffset64 thr_offs, FrameOffset fr_offs,
-                                  ManagedRegister scratch) OVERRIDE;
-
-  void StoreStackPointerToThread64(ThreadOffset64 thr_offs) OVERRIDE;
-
-  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
-                     ManagedRegister scratch) OVERRIDE;
-
-  // Load routines
-  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
-
-  void LoadFromThread64(ManagedRegister dest, ThreadOffset64 src, size_t size) OVERRIDE;
-
-  void LoadRef(ManagedRegister dest, FrameOffset  src) OVERRIDE;
-
-  void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
-               bool unpoison_reference) OVERRIDE;
-
-  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
-
-  void LoadRawPtrFromThread64(ManagedRegister dest, ThreadOffset64 offs) OVERRIDE;
-
-  // Copying routines
-  void Move(ManagedRegister dest, ManagedRegister src, size_t size);
-
-  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset64 thr_offs,
-                              ManagedRegister scratch) OVERRIDE;
-
-  void CopyRawPtrToThread64(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
-      OVERRIDE;
-
-  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
-
-  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void MemoryBarrier(ManagedRegister) OVERRIDE;
-
-  // Sign extension
-  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Zero extension
-  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Exploit fast access in managed code to Thread::Current()
-  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
-  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
-
-  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the handle scope entry to see if the value is
-  // null.
-  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
-                              ManagedRegister in_reg, bool null_allowed) OVERRIDE;
-
-  // Set up out_off to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed.
-  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
-                              ManagedRegister scratch, bool null_allowed) OVERRIDE;
-
-  // src holds a handle scope entry (Object**) load this into dst
-  virtual void LoadReferenceFromHandleScope(ManagedRegister dst,
-                                     ManagedRegister src);
-
-  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
-  // know that src may not be null.
-  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
-  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
-
-  // Call to address held at [base+offset]
-  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void CallFromThread64(ThreadOffset64 offset, ManagedRegister scratch) OVERRIDE;
-
-  // Generate code to check if Thread::Current()->exception_ is non-null
-  // and branch to a ExceptionSlowPath if it is.
-  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
-
   // Add a double to the constant area, returning the offset into
   // the constant area where the literal resides.
   size_t AddDouble(double v) { return constant_area_.AddDouble(v); }
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 788c725..36c966b 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -22,7 +22,9 @@
 
 #include "base/bit_utils.h"
 #include "base/stl_util.h"
+#include "jni_macro_assembler_x86_64.h"
 #include "utils/assembler_test.h"
+#include "utils/jni_macro_assembler_test.h"
 
 namespace art {
 
@@ -1485,6 +1487,62 @@
   DriverFn(&setcc_test_fn, "setcc");
 }
 
+TEST_F(AssemblerX86_64Test, MovzxbRegs) {
+  DriverStr(Repeatrb(&x86_64::X86_64Assembler::movzxb, "movzbl %{reg2}, %{reg1}"), "movzxb");
+}
+
+TEST_F(AssemblerX86_64Test, MovsxbRegs) {
+  DriverStr(Repeatrb(&x86_64::X86_64Assembler::movsxb, "movsbl %{reg2}, %{reg1}"), "movsxb");
+}
+
+TEST_F(AssemblerX86_64Test, Repnescasw) {
+  GetAssembler()->repne_scasw();
+  const char* expected = "repne scasw\n";
+  DriverStr(expected, "Repnescasw");
+}
+
+TEST_F(AssemblerX86_64Test, Repecmpsw) {
+  GetAssembler()->repe_cmpsw();
+  const char* expected = "repe cmpsw\n";
+  DriverStr(expected, "Repecmpsw");
+}
+
+TEST_F(AssemblerX86_64Test, Repecmpsl) {
+  GetAssembler()->repe_cmpsl();
+  const char* expected = "repe cmpsl\n";
+  DriverStr(expected, "Repecmpsl");
+}
+
+TEST_F(AssemblerX86_64Test, Repecmpsq) {
+  GetAssembler()->repe_cmpsq();
+  const char* expected = "repe cmpsq\n";
+  DriverStr(expected, "Repecmpsq");
+}
+
+TEST_F(AssemblerX86_64Test, Cmpb) {
+  GetAssembler()->cmpb(x86_64::Address(x86_64::CpuRegister(x86_64::RDI), 128),
+                       x86_64::Immediate(0));
+  const char* expected = "cmpb $0, 128(%RDI)\n";
+  DriverStr(expected, "cmpb");
+}
+
+class JNIMacroAssemblerX86_64Test : public JNIMacroAssemblerTest<x86_64::X86_64JNIMacroAssembler> {
+ public:
+  using Base = JNIMacroAssemblerTest<x86_64::X86_64JNIMacroAssembler>;
+
+ protected:
+  // Get the typically used name for this architecture, e.g., aarch64, x86-64, ...
+  std::string GetArchitectureString() OVERRIDE {
+    return "x86_64";
+  }
+
+  std::string GetDisassembleParameters() OVERRIDE {
+    return " -D -bbinary -mi386:x86-64 -Mx86-64,addr64,data32 --no-show-raw-insn";
+  }
+
+ private:
+};
+
 static x86_64::X86_64ManagedRegister ManagedFromCpu(x86_64::Register r) {
   return x86_64::X86_64ManagedRegister::FromCpuRegister(r);
 }
@@ -1493,8 +1551,8 @@
   return x86_64::X86_64ManagedRegister::FromXmmRegister(r);
 }
 
-std::string buildframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
-                               x86_64::X86_64Assembler* assembler) {
+std::string buildframe_test_fn(JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+                               x86_64::X86_64JNIMacroAssembler* assembler) {
   // TODO: more interesting spill registers / entry spills.
 
   // Two random spill regs.
@@ -1536,12 +1594,12 @@
   return str.str();
 }
 
-TEST_F(AssemblerX86_64Test, BuildFrame) {
+TEST_F(JNIMacroAssemblerX86_64Test, BuildFrame) {
   DriverFn(&buildframe_test_fn, "BuildFrame");
 }
 
-std::string removeframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
-                                x86_64::X86_64Assembler* assembler) {
+std::string removeframe_test_fn(JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+                                x86_64::X86_64JNIMacroAssembler* assembler) {
   // TODO: more interesting spill registers / entry spills.
 
   // Two random spill regs.
@@ -1567,12 +1625,13 @@
   return str.str();
 }
 
-TEST_F(AssemblerX86_64Test, RemoveFrame) {
+TEST_F(JNIMacroAssemblerX86_64Test, RemoveFrame) {
   DriverFn(&removeframe_test_fn, "RemoveFrame");
 }
 
-std::string increaseframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
-                                  x86_64::X86_64Assembler* assembler) {
+std::string increaseframe_test_fn(
+    JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+    x86_64::X86_64JNIMacroAssembler* assembler) {
   assembler->IncreaseFrameSize(0U);
   assembler->IncreaseFrameSize(kStackAlignment);
   assembler->IncreaseFrameSize(10 * kStackAlignment);
@@ -1586,12 +1645,13 @@
   return str.str();
 }
 
-TEST_F(AssemblerX86_64Test, IncreaseFrame) {
+TEST_F(JNIMacroAssemblerX86_64Test, IncreaseFrame) {
   DriverFn(&increaseframe_test_fn, "IncreaseFrame");
 }
 
-std::string decreaseframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
-                                  x86_64::X86_64Assembler* assembler) {
+std::string decreaseframe_test_fn(
+    JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+    x86_64::X86_64JNIMacroAssembler* assembler) {
   assembler->DecreaseFrameSize(0U);
   assembler->DecreaseFrameSize(kStackAlignment);
   assembler->DecreaseFrameSize(10 * kStackAlignment);
@@ -1605,47 +1665,8 @@
   return str.str();
 }
 
-TEST_F(AssemblerX86_64Test, DecreaseFrame) {
+TEST_F(JNIMacroAssemblerX86_64Test, DecreaseFrame) {
   DriverFn(&decreaseframe_test_fn, "DecreaseFrame");
 }
 
-TEST_F(AssemblerX86_64Test, MovzxbRegs) {
-  DriverStr(Repeatrb(&x86_64::X86_64Assembler::movzxb, "movzbl %{reg2}, %{reg1}"), "movzxb");
-}
-
-TEST_F(AssemblerX86_64Test, MovsxbRegs) {
-  DriverStr(Repeatrb(&x86_64::X86_64Assembler::movsxb, "movsbl %{reg2}, %{reg1}"), "movsxb");
-}
-
-TEST_F(AssemblerX86_64Test, Repnescasw) {
-  GetAssembler()->repne_scasw();
-  const char* expected = "repne scasw\n";
-  DriverStr(expected, "Repnescasw");
-}
-
-TEST_F(AssemblerX86_64Test, Repecmpsw) {
-  GetAssembler()->repe_cmpsw();
-  const char* expected = "repe cmpsw\n";
-  DriverStr(expected, "Repecmpsw");
-}
-
-TEST_F(AssemblerX86_64Test, Repecmpsl) {
-  GetAssembler()->repe_cmpsl();
-  const char* expected = "repe cmpsl\n";
-  DriverStr(expected, "Repecmpsl");
-}
-
-TEST_F(AssemblerX86_64Test, Repecmpsq) {
-  GetAssembler()->repe_cmpsq();
-  const char* expected = "repe cmpsq\n";
-  DriverStr(expected, "Repecmpsq");
-}
-
-TEST_F(AssemblerX86_64Test, Cmpb) {
-  GetAssembler()->cmpb(x86_64::Address(x86_64::CpuRegister(x86_64::RDI), 128),
-                       x86_64::Immediate(0));
-  const char* expected = "cmpb $0, 128(%RDI)\n";
-  DriverStr(expected, "cmpb");
-}
-
 }  // namespace art
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
new file mode 100644
index 0000000..47fb59b
--- /dev/null
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -0,0 +1,603 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni_macro_assembler_x86_64.h"
+
+#include "base/casts.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "memory_region.h"
+#include "thread.h"
+
+namespace art {
+namespace x86_64 {
+
+static dwarf::Reg DWARFReg(Register reg) {
+  return dwarf::Reg::X86_64Core(static_cast<int>(reg));
+}
+static dwarf::Reg DWARFReg(FloatRegister reg) {
+  return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
+}
+
+constexpr size_t kFramePointerSize = 8;
+
+#define __ asm_.
+
+void X86_64JNIMacroAssembler::BuildFrame(size_t frame_size,
+                                         ManagedRegister method_reg,
+                                         ArrayRef<const ManagedRegister> spill_regs,
+                                         const ManagedRegisterEntrySpills& entry_spills) {
+  DCHECK_EQ(CodeSize(), 0U);  // Nothing emitted yet.
+  cfi().SetCurrentCFAOffset(8);  // Return address on stack.
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+  int gpr_count = 0;
+  for (int i = spill_regs.size() - 1; i >= 0; --i) {
+    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
+    if (spill.IsCpuRegister()) {
+      __ pushq(spill.AsCpuRegister());
+      gpr_count++;
+      cfi().AdjustCFAOffset(kFramePointerSize);
+      cfi().RelOffset(DWARFReg(spill.AsCpuRegister().AsRegister()), 0);
+    }
+  }
+  // return address then method on stack.
+  int64_t rest_of_frame = static_cast<int64_t>(frame_size)
+                          - (gpr_count * kFramePointerSize)
+                          - kFramePointerSize /*return address*/;
+  __ subq(CpuRegister(RSP), Immediate(rest_of_frame));
+  cfi().AdjustCFAOffset(rest_of_frame);
+
+  // spill xmms
+  int64_t offset = rest_of_frame;
+  for (int i = spill_regs.size() - 1; i >= 0; --i) {
+    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
+    if (spill.IsXmmRegister()) {
+      offset -= sizeof(double);
+      __ movsd(Address(CpuRegister(RSP), offset), spill.AsXmmRegister());
+      cfi().RelOffset(DWARFReg(spill.AsXmmRegister().AsFloatRegister()), offset);
+    }
+  }
+
+  static_assert(static_cast<size_t>(kX86_64PointerSize) == kFramePointerSize,
+                "Unexpected frame pointer size.");
+
+  __ movq(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister());
+
+  for (size_t i = 0; i < entry_spills.size(); ++i) {
+    ManagedRegisterSpill spill = entry_spills.at(i);
+    if (spill.AsX86_64().IsCpuRegister()) {
+      if (spill.getSize() == 8) {
+        __ movq(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
+                spill.AsX86_64().AsCpuRegister());
+      } else {
+        CHECK_EQ(spill.getSize(), 4);
+        __ movl(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
+                spill.AsX86_64().AsCpuRegister());
+      }
+    } else {
+      if (spill.getSize() == 8) {
+        __ movsd(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
+                 spill.AsX86_64().AsXmmRegister());
+      } else {
+        CHECK_EQ(spill.getSize(), 4);
+        __ movss(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
+                 spill.AsX86_64().AsXmmRegister());
+      }
+    }
+  }
+}
+
+void X86_64JNIMacroAssembler::RemoveFrame(size_t frame_size,
+                                          ArrayRef<const ManagedRegister> spill_regs) {
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+  cfi().RememberState();
+  int gpr_count = 0;
+  // unspill xmms
+  int64_t offset = static_cast<int64_t>(frame_size)
+      - (spill_regs.size() * kFramePointerSize)
+      - 2 * kFramePointerSize;
+  for (size_t i = 0; i < spill_regs.size(); ++i) {
+    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
+    if (spill.IsXmmRegister()) {
+      offset += sizeof(double);
+      __ movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset));
+      cfi().Restore(DWARFReg(spill.AsXmmRegister().AsFloatRegister()));
+    } else {
+      gpr_count++;
+    }
+  }
+  int adjust = static_cast<int>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize;
+  __ addq(CpuRegister(RSP), Immediate(adjust));
+  cfi().AdjustCFAOffset(-adjust);
+  for (size_t i = 0; i < spill_regs.size(); ++i) {
+    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
+    if (spill.IsCpuRegister()) {
+      __ popq(spill.AsCpuRegister());
+      cfi().AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
+      cfi().Restore(DWARFReg(spill.AsCpuRegister().AsRegister()));
+    }
+  }
+  __ ret();
+  // The CFI should be restored for any code that follows the exit block.
+  cfi().RestoreState();
+  cfi().DefCFAOffset(frame_size);
+}
+
+void X86_64JNIMacroAssembler::IncreaseFrameSize(size_t adjust) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  __ addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(adjust)));
+  cfi().AdjustCFAOffset(adjust);
+}
+
+static void DecreaseFrameSizeImpl(size_t adjust, X86_64Assembler* assembler) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  assembler->addq(CpuRegister(RSP), Immediate(adjust));
+  assembler->cfi().AdjustCFAOffset(-adjust);
+}
+
+void X86_64JNIMacroAssembler::DecreaseFrameSize(size_t adjust) {
+  DecreaseFrameSizeImpl(adjust, &asm_);
+}
+
+void X86_64JNIMacroAssembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
+  X86_64ManagedRegister src = msrc.AsX86_64();
+  if (src.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (src.IsCpuRegister()) {
+    if (size == 4) {
+      CHECK_EQ(4u, size);
+      __ movl(Address(CpuRegister(RSP), offs), src.AsCpuRegister());
+    } else {
+      CHECK_EQ(8u, size);
+      __ movq(Address(CpuRegister(RSP), offs), src.AsCpuRegister());
+    }
+  } else if (src.IsRegisterPair()) {
+    CHECK_EQ(0u, size);
+    __ movq(Address(CpuRegister(RSP), offs), src.AsRegisterPairLow());
+    __ movq(Address(CpuRegister(RSP), FrameOffset(offs.Int32Value()+4)),
+            src.AsRegisterPairHigh());
+  } else if (src.IsX87Register()) {
+    if (size == 4) {
+      __ fstps(Address(CpuRegister(RSP), offs));
+    } else {
+      __ fstpl(Address(CpuRegister(RSP), offs));
+    }
+  } else {
+    CHECK(src.IsXmmRegister());
+    if (size == 4) {
+      __ movss(Address(CpuRegister(RSP), offs), src.AsXmmRegister());
+    } else {
+      __ movsd(Address(CpuRegister(RSP), offs), src.AsXmmRegister());
+    }
+  }
+}
+
+void X86_64JNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
+  X86_64ManagedRegister src = msrc.AsX86_64();
+  CHECK(src.IsCpuRegister());
+  __ movl(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
+}
+
+void X86_64JNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
+  X86_64ManagedRegister src = msrc.AsX86_64();
+  CHECK(src.IsCpuRegister());
+  __ movq(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
+}
+
+void X86_64JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest,
+                                                    uint32_t imm,
+                                                    ManagedRegister) {
+  __ movl(Address(CpuRegister(RSP), dest), Immediate(imm));  // TODO(64) movq?
+}
+
+void X86_64JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset64 thr_offs,
+                                                       FrameOffset fr_offs,
+                                                       ManagedRegister mscratch) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  CHECK(scratch.IsCpuRegister());
+  __ leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), fr_offs));
+  __ gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
+}
+
+void X86_64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 thr_offs) {
+  __ gs()->movq(Address::Absolute(thr_offs, true), CpuRegister(RSP));
+}
+
+void X86_64JNIMacroAssembler::StoreSpanning(FrameOffset /*dst*/,
+                                            ManagedRegister /*src*/,
+                                            FrameOffset /*in_off*/,
+                                            ManagedRegister /*scratch*/) {
+  UNIMPLEMENTED(FATAL);  // this case only currently exists for ARM
+}
+
+void X86_64JNIMacroAssembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  if (dest.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (dest.IsCpuRegister()) {
+    if (size == 4) {
+      CHECK_EQ(4u, size);
+      __ movl(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
+    } else {
+      CHECK_EQ(8u, size);
+      __ movq(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
+    }
+  } else if (dest.IsRegisterPair()) {
+    CHECK_EQ(0u, size);
+    __ movq(dest.AsRegisterPairLow(), Address(CpuRegister(RSP), src));
+    __ movq(dest.AsRegisterPairHigh(), Address(CpuRegister(RSP), FrameOffset(src.Int32Value()+4)));
+  } else if (dest.IsX87Register()) {
+    if (size == 4) {
+      __ flds(Address(CpuRegister(RSP), src));
+    } else {
+      __ fldl(Address(CpuRegister(RSP), src));
+    }
+  } else {
+    CHECK(dest.IsXmmRegister());
+    if (size == 4) {
+      __ movss(dest.AsXmmRegister(), Address(CpuRegister(RSP), src));
+    } else {
+      __ movsd(dest.AsXmmRegister(), Address(CpuRegister(RSP), src));
+    }
+  }
+}
+
+void X86_64JNIMacroAssembler::LoadFromThread(ManagedRegister mdest,
+                                             ThreadOffset64 src, size_t size) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  if (dest.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (dest.IsCpuRegister()) {
+    CHECK_EQ(4u, size);
+    __ gs()->movl(dest.AsCpuRegister(), Address::Absolute(src, true));
+  } else if (dest.IsRegisterPair()) {
+    CHECK_EQ(8u, size);
+    __ gs()->movq(dest.AsRegisterPairLow(), Address::Absolute(src, true));
+  } else if (dest.IsX87Register()) {
+    if (size == 4) {
+      __ gs()->flds(Address::Absolute(src, true));
+    } else {
+      __ gs()->fldl(Address::Absolute(src, true));
+    }
+  } else {
+    CHECK(dest.IsXmmRegister());
+    if (size == 4) {
+      __ gs()->movss(dest.AsXmmRegister(), Address::Absolute(src, true));
+    } else {
+      __ gs()->movsd(dest.AsXmmRegister(), Address::Absolute(src, true));
+    }
+  }
+}
+
+void X86_64JNIMacroAssembler::LoadRef(ManagedRegister mdest, FrameOffset src) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  CHECK(dest.IsCpuRegister());
+  __ movq(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
+}
+
+void X86_64JNIMacroAssembler::LoadRef(ManagedRegister mdest,
+                                      ManagedRegister base,
+                                      MemberOffset offs,
+                                      bool unpoison_reference) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
+  __ movl(dest.AsCpuRegister(), Address(base.AsX86_64().AsCpuRegister(), offs));
+  if (unpoison_reference) {
+    __ MaybeUnpoisonHeapReference(dest.AsCpuRegister());
+  }
+}
+
+void X86_64JNIMacroAssembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
+  __ movq(dest.AsCpuRegister(), Address(base.AsX86_64().AsCpuRegister(), offs));
+}
+
+void X86_64JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  CHECK(dest.IsCpuRegister());
+  __ gs()->movq(dest.AsCpuRegister(), Address::Absolute(offs, true));
+}
+
+void X86_64JNIMacroAssembler::SignExtend(ManagedRegister mreg, size_t size) {
+  X86_64ManagedRegister reg = mreg.AsX86_64();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsCpuRegister()) << reg;
+  if (size == 1) {
+    __ movsxb(reg.AsCpuRegister(), reg.AsCpuRegister());
+  } else {
+    __ movsxw(reg.AsCpuRegister(), reg.AsCpuRegister());
+  }
+}
+
+void X86_64JNIMacroAssembler::ZeroExtend(ManagedRegister mreg, size_t size) {
+  X86_64ManagedRegister reg = mreg.AsX86_64();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsCpuRegister()) << reg;
+  if (size == 1) {
+    __ movzxb(reg.AsCpuRegister(), reg.AsCpuRegister());
+  } else {
+    __ movzxw(reg.AsCpuRegister(), reg.AsCpuRegister());
+  }
+}
+
+void X86_64JNIMacroAssembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  X86_64ManagedRegister src = msrc.AsX86_64();
+  if (!dest.Equals(src)) {
+    if (dest.IsCpuRegister() && src.IsCpuRegister()) {
+      __ movq(dest.AsCpuRegister(), src.AsCpuRegister());
+    } else if (src.IsX87Register() && dest.IsXmmRegister()) {
+      // Pass via stack and pop X87 register
+      __ subl(CpuRegister(RSP), Immediate(16));
+      if (size == 4) {
+        CHECK_EQ(src.AsX87Register(), ST0);
+        __ fstps(Address(CpuRegister(RSP), 0));
+        __ movss(dest.AsXmmRegister(), Address(CpuRegister(RSP), 0));
+      } else {
+        CHECK_EQ(src.AsX87Register(), ST0);
+        __ fstpl(Address(CpuRegister(RSP), 0));
+        __ movsd(dest.AsXmmRegister(), Address(CpuRegister(RSP), 0));
+      }
+      __ addq(CpuRegister(RSP), Immediate(16));
+    } else {
+      // TODO: x87, SSE
+      UNIMPLEMENTED(FATAL) << ": Move " << dest << ", " << src;
+    }
+  }
+}
+
+void X86_64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  CHECK(scratch.IsCpuRegister());
+  __ movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), src));
+  __ movl(Address(CpuRegister(RSP), dest), scratch.AsCpuRegister());
+}
+
+void X86_64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                                   ThreadOffset64 thr_offs,
+                                                   ManagedRegister mscratch) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  CHECK(scratch.IsCpuRegister());
+  __ gs()->movq(scratch.AsCpuRegister(), Address::Absolute(thr_offs, true));
+  Store(fr_offs, scratch, 8);
+}
+
+void X86_64JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset64 thr_offs,
+                                                 FrameOffset fr_offs,
+                                                 ManagedRegister mscratch) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  CHECK(scratch.IsCpuRegister());
+  Load(scratch, fr_offs, 8);
+  __ gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
+}
+
+void X86_64JNIMacroAssembler::Copy(FrameOffset dest,
+                                   FrameOffset src,
+                                   ManagedRegister mscratch,
+                                   size_t size) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  if (scratch.IsCpuRegister() && size == 8) {
+    Load(scratch, src, 4);
+    Store(dest, scratch, 4);
+    Load(scratch, FrameOffset(src.Int32Value() + 4), 4);
+    Store(FrameOffset(dest.Int32Value() + 4), scratch, 4);
+  } else {
+    Load(scratch, src, size);
+    Store(dest, scratch, size);
+  }
+}
+
+void X86_64JNIMacroAssembler::Copy(FrameOffset /*dst*/,
+                                   ManagedRegister /*src_base*/,
+                                   Offset /*src_offset*/,
+                                   ManagedRegister /*scratch*/,
+                                   size_t /*size*/) {
+  UNIMPLEMENTED(FATAL);
+}
+
+void X86_64JNIMacroAssembler::Copy(ManagedRegister dest_base,
+                                   Offset dest_offset,
+                                   FrameOffset src,
+                                   ManagedRegister scratch,
+                                   size_t size) {
+  CHECK(scratch.IsNoRegister());
+  CHECK_EQ(size, 4u);
+  __ pushq(Address(CpuRegister(RSP), src));
+  __ popq(Address(dest_base.AsX86_64().AsCpuRegister(), dest_offset));
+}
+
+void X86_64JNIMacroAssembler::Copy(FrameOffset dest,
+                                   FrameOffset src_base,
+                                   Offset src_offset,
+                                   ManagedRegister mscratch,
+                                   size_t size) {
+  CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
+  CHECK_EQ(size, 4u);
+  __ movq(scratch, Address(CpuRegister(RSP), src_base));
+  __ movq(scratch, Address(scratch, src_offset));
+  __ movq(Address(CpuRegister(RSP), dest), scratch);
+}
+
+void X86_64JNIMacroAssembler::Copy(ManagedRegister dest,
+                                   Offset dest_offset,
+                                   ManagedRegister src,
+                                   Offset src_offset,
+                                   ManagedRegister scratch,
+                                   size_t size) {
+  CHECK_EQ(size, 4u);
+  CHECK(scratch.IsNoRegister());
+  __ pushq(Address(src.AsX86_64().AsCpuRegister(), src_offset));
+  __ popq(Address(dest.AsX86_64().AsCpuRegister(), dest_offset));
+}
+
+void X86_64JNIMacroAssembler::Copy(FrameOffset dest,
+                                   Offset dest_offset,
+                                   FrameOffset src,
+                                   Offset src_offset,
+                                   ManagedRegister mscratch,
+                                   size_t size) {
+  CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
+  CHECK_EQ(size, 4u);
+  CHECK_EQ(dest.Int32Value(), src.Int32Value());
+  __ movq(scratch, Address(CpuRegister(RSP), src));
+  __ pushq(Address(scratch, src_offset));
+  __ popq(Address(scratch, dest_offset));
+}
+
+void X86_64JNIMacroAssembler::MemoryBarrier(ManagedRegister) {
+  __ mfence();
+}
+
+void X86_64JNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
+                                                     FrameOffset handle_scope_offset,
+                                                     ManagedRegister min_reg,
+                                                     bool null_allowed) {
+  X86_64ManagedRegister out_reg = mout_reg.AsX86_64();
+  X86_64ManagedRegister in_reg = min_reg.AsX86_64();
+  if (in_reg.IsNoRegister()) {  // TODO(64): && null_allowed
+    // Use out_reg as indicator of null.
+    in_reg = out_reg;
+    // TODO: movzwl
+    __ movl(in_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+  }
+  CHECK(in_reg.IsCpuRegister());
+  CHECK(out_reg.IsCpuRegister());
+  VerifyObject(in_reg, null_allowed);
+  if (null_allowed) {
+    Label null_arg;
+    if (!out_reg.Equals(in_reg)) {
+      __ xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
+    }
+    __ testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
+    __ j(kZero, &null_arg);
+    __ leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+    __ Bind(&null_arg);
+  } else {
+    __ leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+  }
+}
+
+void X86_64JNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off,
+                                                     FrameOffset handle_scope_offset,
+                                                     ManagedRegister mscratch,
+                                                     bool null_allowed) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  CHECK(scratch.IsCpuRegister());
+  if (null_allowed) {
+    Label null_arg;
+    __ movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+    __ testl(scratch.AsCpuRegister(), scratch.AsCpuRegister());
+    __ j(kZero, &null_arg);
+    __ leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+    __ Bind(&null_arg);
+  } else {
+    __ leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+  }
+  Store(out_off, scratch, 8);
+}
+
+// Given a handle scope entry, load the associated reference.
+void X86_64JNIMacroAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
+                                                           ManagedRegister min_reg) {
+  X86_64ManagedRegister out_reg = mout_reg.AsX86_64();
+  X86_64ManagedRegister in_reg = min_reg.AsX86_64();
+  CHECK(out_reg.IsCpuRegister());
+  CHECK(in_reg.IsCpuRegister());
+  Label null_arg;
+  if (!out_reg.Equals(in_reg)) {
+    __ xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
+  }
+  __ testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
+  __ j(kZero, &null_arg);
+  __ movq(out_reg.AsCpuRegister(), Address(in_reg.AsCpuRegister(), 0));
+  __ Bind(&null_arg);
+}
+
+void X86_64JNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references
+}
+
+void X86_64JNIMacroAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references
+}
+
+void X86_64JNIMacroAssembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) {
+  X86_64ManagedRegister base = mbase.AsX86_64();
+  CHECK(base.IsCpuRegister());
+  __ call(Address(base.AsCpuRegister(), offset.Int32Value()));
+  // TODO: place reference map on call
+}
+
+void X86_64JNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
+  CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
+  __ movq(scratch, Address(CpuRegister(RSP), base));
+  __ call(Address(scratch, offset));
+}
+
+void X86_64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset, ManagedRegister /*mscratch*/) {
+  __ gs()->call(Address::Absolute(offset, true));
+}
+
+void X86_64JNIMacroAssembler::GetCurrentThread(ManagedRegister tr) {
+  __ gs()->movq(tr.AsX86_64().AsCpuRegister(),
+                Address::Absolute(Thread::SelfOffset<kX86_64PointerSize>(), true));
+}
+
+void X86_64JNIMacroAssembler::GetCurrentThread(FrameOffset offset, ManagedRegister mscratch) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  __ gs()->movq(scratch.AsCpuRegister(),
+                Address::Absolute(Thread::SelfOffset<kX86_64PointerSize>(), true));
+  __ movq(Address(CpuRegister(RSP), offset), scratch.AsCpuRegister());
+}
+
+// Slowpath entered when Thread::Current()->_exception is non-null
+class X86_64ExceptionSlowPath FINAL : public SlowPath {
+ public:
+  explicit X86_64ExceptionSlowPath(size_t stack_adjust) : stack_adjust_(stack_adjust) {}
+  virtual void Emit(Assembler *sp_asm) OVERRIDE;
+ private:
+  const size_t stack_adjust_;
+};
+
+void X86_64JNIMacroAssembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
+  X86_64ExceptionSlowPath* slow = new (__ GetArena()) X86_64ExceptionSlowPath(stack_adjust);
+  __ GetBuffer()->EnqueueSlowPath(slow);
+  __ gs()->cmpl(Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>(), true), Immediate(0));
+  __ j(kNotEqual, slow->Entry());
+}
+
+#undef __
+
+void X86_64ExceptionSlowPath::Emit(Assembler *sasm) {
+  X86_64Assembler* sp_asm = down_cast<X86_64Assembler*>(sasm);
+#define __ sp_asm->
+  __ Bind(&entry_);
+  // Note: the return value is dead
+  if (stack_adjust_ != 0) {  // Fix up the frame.
+    DecreaseFrameSizeImpl(stack_adjust_, sp_asm);
+  }
+  // Pass exception as argument in RDI
+  __ gs()->movq(CpuRegister(RDI),
+                Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>(), true));
+  __ gs()->call(
+      Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, pDeliverException), true));
+  // this call should never return
+  __ int3();
+#undef __
+}
+
+}  // namespace x86_64
+}  // namespace art
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
new file mode 100644
index 0000000..cc4e57c
--- /dev/null
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_X86_64_JNI_MACRO_ASSEMBLER_X86_64_H_
+#define ART_COMPILER_UTILS_X86_64_JNI_MACRO_ASSEMBLER_X86_64_H_
+
+#include <vector>
+
+#include "assembler_x86_64.h"
+#include "base/arena_containers.h"
+#include "base/enums.h"
+#include "base/macros.h"
+#include "offsets.h"
+#include "utils/array_ref.h"
+#include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
+
+namespace art {
+namespace x86_64 {
+
+class X86_64JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<X86_64Assembler,
+                                                                  PointerSize::k64> {
+ public:
+  explicit X86_64JNIMacroAssembler(ArenaAllocator* arena)
+      : JNIMacroAssemblerFwd<X86_64Assembler, PointerSize::k64>(arena) {}
+  virtual ~X86_64JNIMacroAssembler() {}
+
+  //
+  // Overridden common assembler high-level functionality
+  //
+
+  // Emit code that will create an activation on the stack
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
+                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
+
+  // Emit code that will remove an activation from the stack
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
+      OVERRIDE;
+
+  void IncreaseFrameSize(size_t adjust) OVERRIDE;
+  void DecreaseFrameSize(size_t adjust) OVERRIDE;
+
+  // Store routines
+  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
+  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
+  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
+
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
+
+  void StoreStackOffsetToThread(ThreadOffset64 thr_offs,
+                                FrameOffset fr_offs,
+                                ManagedRegister scratch) OVERRIDE;
+
+  void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE;
+
+  void StoreSpanning(FrameOffset dest,
+                     ManagedRegister src,
+                     FrameOffset in_off,
+                     ManagedRegister scratch) OVERRIDE;
+
+  // Load routines
+  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
+
+  void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) OVERRIDE;
+
+  void LoadRef(ManagedRegister dest, FrameOffset  src) OVERRIDE;
+
+  void LoadRef(ManagedRegister dest,
+               ManagedRegister base,
+               MemberOffset offs,
+               bool unpoison_reference) OVERRIDE;
+
+  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
+
+  void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) OVERRIDE;
+
+  // Copying routines
+  void Move(ManagedRegister dest, ManagedRegister src, size_t size);
+
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset64 thr_offs,
+                            ManagedRegister scratch) OVERRIDE;
+
+  void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
+      OVERRIDE;
+
+  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
+
+  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest,
+            ManagedRegister src_base,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(ManagedRegister dest_base,
+            Offset dest_offset,
+            FrameOffset src,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest,
+            FrameOffset src_base,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(ManagedRegister dest,
+            Offset dest_offset,
+            ManagedRegister src,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest,
+            Offset dest_offset,
+            FrameOffset src,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void MemoryBarrier(ManagedRegister) OVERRIDE;
+
+  // Sign extension
+  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Zero extension
+  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Exploit fast access in managed code to Thread::Current()
+  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
+  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
+
+  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed. in_reg holds a possibly stale reference
+  // that can be used to avoid loading the handle scope entry to see if the value is
+  // null.
+  void CreateHandleScopeEntry(ManagedRegister out_reg,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister in_reg,
+                              bool null_allowed) OVERRIDE;
+
+  // Set up out_off to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed.
+  void CreateHandleScopeEntry(FrameOffset out_off,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister scratch,
+                              bool null_allowed) OVERRIDE;
+
+  // src holds a handle scope entry (Object**) load this into dst
+  virtual void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
+
+  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
+  // know that src may not be null.
+  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
+  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
+
+  // Call to address held at [base+offset]
+  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void CallFromThread(ThreadOffset64 offset, ManagedRegister scratch) OVERRIDE;
+
+  // Generate code to check if Thread::Current()->exception_ is non-null
+  // and branch to a ExceptionSlowPath if it is.
+  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(X86_64JNIMacroAssembler);
+};
+
+}  // namespace x86_64
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_X86_64_JNI_MACRO_ASSEMBLER_X86_64_H_
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 0d1d4d7..eb11f6d 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -516,6 +516,7 @@
       compiled_classes_filename_(nullptr),
       compiled_methods_zip_filename_(nullptr),
       compiled_methods_filename_(nullptr),
+      passes_to_run_filename_(nullptr),
       app_image_(false),
       boot_image_(false),
       multi_image_(false),
@@ -894,6 +895,16 @@
       }
     }
     compiler_options_->force_determinism_ = force_determinism_;
+
+    if (passes_to_run_filename_ != nullptr) {
+      passes_to_run_.reset(ReadCommentedInputFromFile<std::vector<std::string>>(
+          passes_to_run_filename_,
+          nullptr));         // No post-processing.
+      if (passes_to_run_.get() == nullptr) {
+        Usage("Failed to read list of passes to run.");
+      }
+    }
+    compiler_options_->passes_to_run_ = passes_to_run_.get();
   }
 
   static bool SupportsDeterministicCompilation() {
@@ -1093,6 +1104,8 @@
         compiled_methods_filename_ = option.substr(strlen("--compiled-methods=")).data();
       } else if (option.starts_with("--compiled-methods-zip=")) {
         compiled_methods_zip_filename_ = option.substr(strlen("--compiled-methods-zip=")).data();
+      } else if (option.starts_with("--run-passes=")) {
+        passes_to_run_filename_ = option.substr(strlen("--run-passes=")).data();
       } else if (option.starts_with("--base=")) {
         ParseBase(option);
       } else if (option.starts_with("--boot-image=")) {
@@ -2106,13 +2119,15 @@
     if (compiled_methods_filename_ != nullptr) {
       std::string error_msg;
       if (compiled_methods_zip_filename_ != nullptr) {
-        compiled_methods_.reset(ReadCommentedInputFromZip(compiled_methods_zip_filename_,
-                                                          compiled_methods_filename_,
-                                                          nullptr,            // No post-processing.
-                                                          &error_msg));
+        compiled_methods_.reset(ReadCommentedInputFromZip<std::unordered_set<std::string>>(
+            compiled_methods_zip_filename_,
+            compiled_methods_filename_,
+            nullptr,            // No post-processing.
+            &error_msg));
       } else {
-        compiled_methods_.reset(ReadCommentedInputFromFile(compiled_methods_filename_,
-                                                           nullptr));         // No post-processing.
+        compiled_methods_.reset(ReadCommentedInputFromFile<std::unordered_set<std::string>>(
+            compiled_methods_filename_,
+            nullptr));          // No post-processing.
       }
       if (compiled_methods_.get() == nullptr) {
         LOG(ERROR) << "Failed to create list of compiled methods from '"
@@ -2346,7 +2361,8 @@
   static std::unordered_set<std::string>* ReadImageClassesFromFile(
       const char* image_classes_filename) {
     std::function<std::string(const char*)> process = DotToDescriptor;
-    return ReadCommentedInputFromFile(image_classes_filename, &process);
+    return ReadCommentedInputFromFile<std::unordered_set<std::string>>(image_classes_filename,
+                                                                       &process);
   }
 
   // Reads the class names (java.lang.Object) and returns a set of descriptors (Ljava/lang/Object;)
@@ -2355,27 +2371,32 @@
         const char* image_classes_filename,
         std::string* error_msg) {
     std::function<std::string(const char*)> process = DotToDescriptor;
-    return ReadCommentedInputFromZip(zip_filename, image_classes_filename, &process, error_msg);
+    return ReadCommentedInputFromZip<std::unordered_set<std::string>>(zip_filename,
+                                                                      image_classes_filename,
+                                                                      &process,
+                                                                      error_msg);
   }
 
   // Read lines from the given file, dropping comments and empty lines. Post-process each line with
   // the given function.
-  static std::unordered_set<std::string>* ReadCommentedInputFromFile(
+  template <typename T>
+  static T* ReadCommentedInputFromFile(
       const char* input_filename, std::function<std::string(const char*)>* process) {
     std::unique_ptr<std::ifstream> input_file(new std::ifstream(input_filename, std::ifstream::in));
     if (input_file.get() == nullptr) {
       LOG(ERROR) << "Failed to open input file " << input_filename;
       return nullptr;
     }
-    std::unique_ptr<std::unordered_set<std::string>> result(
-        ReadCommentedInputStream(*input_file, process));
+    std::unique_ptr<T> result(
+        ReadCommentedInputStream<T>(*input_file, process));
     input_file->close();
     return result.release();
   }
 
   // Read lines from the given file from the given zip file, dropping comments and empty lines.
   // Post-process each line with the given function.
-  static std::unordered_set<std::string>* ReadCommentedInputFromZip(
+  template <typename T>
+  static T* ReadCommentedInputFromZip(
       const char* zip_filename,
       const char* input_filename,
       std::function<std::string(const char*)>* process,
@@ -2401,16 +2422,16 @@
     const std::string input_string(reinterpret_cast<char*>(input_file->Begin()),
                                    input_file->Size());
     std::istringstream input_stream(input_string);
-    return ReadCommentedInputStream(input_stream, process);
+    return ReadCommentedInputStream<T>(input_stream, process);
   }
 
   // Read lines from the given stream, dropping comments and empty lines. Post-process each line
   // with the given function.
-  static std::unordered_set<std::string>* ReadCommentedInputStream(
+  template <typename T>
+  static T* ReadCommentedInputStream(
       std::istream& in_stream,
       std::function<std::string(const char*)>* process) {
-    std::unique_ptr<std::unordered_set<std::string>> image_classes(
-        new std::unordered_set<std::string>);
+    std::unique_ptr<T> output(new T());
     while (in_stream.good()) {
       std::string dot;
       std::getline(in_stream, dot);
@@ -2419,12 +2440,12 @@
       }
       if (process != nullptr) {
         std::string descriptor((*process)(dot.c_str()));
-        image_classes->insert(descriptor);
+        output->insert(output->end(), descriptor);
       } else {
-        image_classes->insert(dot);
+        output->insert(output->end(), dot);
       }
     }
-    return image_classes.release();
+    return output.release();
   }
 
   void LogCompletionTime() {
@@ -2501,9 +2522,11 @@
   const char* compiled_classes_filename_;
   const char* compiled_methods_zip_filename_;
   const char* compiled_methods_filename_;
+  const char* passes_to_run_filename_;
   std::unique_ptr<std::unordered_set<std::string>> image_classes_;
   std::unique_ptr<std::unordered_set<std::string>> compiled_classes_;
   std::unique_ptr<std::unordered_set<std::string>> compiled_methods_;
+  std::unique_ptr<std::vector<std::string>> passes_to_run_;
   bool app_image_;
   bool boot_image_;
   bool multi_image_;
diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc
index 96c3267..2042934 100644
--- a/dexdump/dexdump.cc
+++ b/dexdump/dexdump.cc
@@ -1060,20 +1060,6 @@
       fprintf(gOutFile, "}, %s", indexBuf.get());
       break;
     }
-    case Instruction::k25x: {      // op vC, {vD, vE, vF, vG} (B: count)
-      u4 arg[Instruction::kMaxVarArgRegs25x];
-      pDecInsn->GetAllArgs25x(arg);
-      fprintf(gOutFile, " v%d, {", arg[0]);
-      for (int i = 0, n = pDecInsn->VRegB(); i < n; i++) {
-        if (i == 0) {
-          fprintf(gOutFile, "v%d", arg[Instruction::kLambdaVirtualRegisterWidth + i]);
-        } else {
-          fprintf(gOutFile, ", v%d", arg[Instruction::kLambdaVirtualRegisterWidth + i]);
-        }
-      }  // for
-      fputc('}', gOutFile);
-      break;
-    }
     case Instruction::k3rc:        // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB
     // NOT SUPPORTED:
     // case Instruction::k3rms:       // [opt] invoke-virtual+super/range
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 6c43e86..4f0e144 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -941,17 +941,11 @@
                 opcode << (op != 0 ? "vsqrt" : "vneg") << (S != 0 ? ".f64" : ".f32");
                 args << d << ", " << m;
               } else if (op5 == 4) {
-                opcode << "vcmp" << (S != 0 ? ".f64" : ".f32");
+                opcode << "vcmp" << ((op != 0) ? "e" : "") << (S != 0 ? ".f64" : ".f32");
                 args << d << ", " << m;
-                if (op != 0) {
-                  args << " (quiet nan)";
-                }
               } else if (op5 == 5) {
-                opcode << "vcmpe" << (S != 0 ? ".f64" : ".f32");
+                opcode << "vcmp" << ((op != 0) ? "e" : "") << (S != 0 ? ".f64" : ".f32");
                 args << d << ", #0.0";
-                if (op != 0) {
-                  args << " (quiet nan)";
-                }
                 if ((instr & 0x2f) != 0) {
                   args << " (UNPREDICTABLE)";
                 }
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 8c3c5e5..77730b9 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -73,9 +73,10 @@
   "kResolutionMethod",
   "kImtConflictMethod",
   "kImtUnimplementedMethod",
-  "kCalleeSaveMethod",
-  "kRefsOnlySaveMethod",
-  "kRefsAndArgsSaveMethod",
+  "kSaveAllCalleeSavesMethod",
+  "kSaveRefsOnlyMethod",
+  "kSaveRefsAndArgsMethod",
+  "kSaveEverythingMethod",
 };
 
 const char* image_roots_descriptions_[] = {
diff --git a/oatdump/oatdump_test.cc b/oatdump/oatdump_test.cc
index 63dc476..db97055 100644
--- a/oatdump/oatdump_test.cc
+++ b/oatdump/oatdump_test.cc
@@ -104,11 +104,13 @@
     // We must set --android-root.
     int link[2];
     if (pipe(link) == -1) {
+      *error_msg = strerror(errno);
       return false;
     }
 
     const pid_t pid = fork();
     if (pid == -1) {
+      *error_msg = strerror(errno);
       return false;
     }
 
@@ -116,10 +118,19 @@
       dup2(link[1], STDOUT_FILENO);
       close(link[0]);
       close(link[1]);
-      bool res = ::art::Exec(exec_argv, error_msg);
-      // Delete the runtime to prevent memory leaks and please valgrind.
-      delete Runtime::Current();
-      exit(res ? 0 : 1);
+      // change process groups, so we don't get reaped by ProcessManager
+      setpgid(0, 0);
+      // Use execv here rather than art::Exec to avoid blocking on waitpid here.
+      std::vector<char*> argv;
+      for (size_t i = 0; i < exec_argv.size(); ++i) {
+        argv.push_back(const_cast<char*>(exec_argv[i].c_str()));
+      }
+      argv.push_back(nullptr);
+      UNUSED(execv(argv[0], &argv[0]));
+      const std::string command_line(Join(exec_argv, ' '));
+      PLOG(ERROR) << "Failed to execv(" << command_line << ")";
+      // _exit to avoid atexit handlers in child.
+      _exit(1);
     } else {
       close(link[1]);
       static const size_t kLineMax = 256;
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 9c813e2..2f8b113 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -109,11 +109,6 @@
   jit/offline_profiling_info.cc \
   jit/profiling_info.cc \
   jit/profile_saver.cc  \
-  lambda/art_lambda_method.cc \
-  lambda/box_table.cc \
-  lambda/closure.cc \
-  lambda/closure_builder.cc \
-  lambda/leaking_allocator.cc \
   jni_internal.cc \
   jobject_comparator.cc \
   linear_alloc.cc \
@@ -132,6 +127,7 @@
   mirror/throwable.cc \
   monitor.cc \
   native_bridge_art_interface.cc \
+  native_stack_dump.cc \
   native/dalvik_system_DexFile.cc \
   native/dalvik_system_VMDebug.cc \
   native/dalvik_system_VMRuntime.cc \
diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc
index ee31c58..a857976 100644
--- a/runtime/arch/arch_test.cc
+++ b/runtime/arch/arch_test.cc
@@ -63,104 +63,97 @@
 // Grab architecture specific constants.
 namespace arm {
 #include "arch/arm/asm_support_arm.h"
-static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveAllCalleeSaves = FRAME_SIZE_SAVE_ALL_CALLEE_SAVES;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+static constexpr size_t kFrameSizeSaveRefsOnly = FRAME_SIZE_SAVE_REFS_ONLY;
+#undef FRAME_SIZE_SAVE_REFS_ONLY
+static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
+#undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
+#undef FRAME_SIZE_SAVE_EVERYTHING
+}  // namespace arm
 
 namespace arm64 {
 #include "arch/arm64/asm_support_arm64.h"
-static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveAllCalleeSaves = FRAME_SIZE_SAVE_ALL_CALLEE_SAVES;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+static constexpr size_t kFrameSizeSaveRefsOnly = FRAME_SIZE_SAVE_REFS_ONLY;
+#undef FRAME_SIZE_SAVE_REFS_ONLY
+static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
+#undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
+#undef FRAME_SIZE_SAVE_EVERYTHING
+}  // namespace arm64
 
 namespace mips {
 #include "arch/mips/asm_support_mips.h"
-static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveAllCalleeSaves = FRAME_SIZE_SAVE_ALL_CALLEE_SAVES;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+static constexpr size_t kFrameSizeSaveRefsOnly = FRAME_SIZE_SAVE_REFS_ONLY;
+#undef FRAME_SIZE_SAVE_REFS_ONLY
+static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
+#undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
+#undef FRAME_SIZE_SAVE_EVERYTHING
+}  // namespace mips
 
 namespace mips64 {
 #include "arch/mips64/asm_support_mips64.h"
-static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveAllCalleeSaves = FRAME_SIZE_SAVE_ALL_CALLEE_SAVES;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+static constexpr size_t kFrameSizeSaveRefsOnly = FRAME_SIZE_SAVE_REFS_ONLY;
+#undef FRAME_SIZE_SAVE_REFS_ONLY
+static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
+#undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
+#undef FRAME_SIZE_SAVE_EVERYTHING
+}  // namespace mips64
 
 namespace x86 {
 #include "arch/x86/asm_support_x86.h"
-static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveAllCalleeSaves = FRAME_SIZE_SAVE_ALL_CALLEE_SAVES;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+static constexpr size_t kFrameSizeSaveRefsOnly = FRAME_SIZE_SAVE_REFS_ONLY;
+#undef FRAME_SIZE_SAVE_REFS_ONLY
+static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
+#undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
+#undef FRAME_SIZE_SAVE_EVERYTHING
+}  // namespace x86
 
 namespace x86_64 {
 #include "arch/x86_64/asm_support_x86_64.h"
-static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveAllCalleeSaves = FRAME_SIZE_SAVE_ALL_CALLEE_SAVES;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+static constexpr size_t kFrameSizeSaveRefsOnly = FRAME_SIZE_SAVE_REFS_ONLY;
+#undef FRAME_SIZE_SAVE_REFS_ONLY
+static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
+#undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
+#undef FRAME_SIZE_SAVE_EVERYTHING
+}  // namespace x86_64
 
 // Check architecture specific constants are sound.
-TEST_F(ArchTest, ARM) {
-  CheckFrameSize(InstructionSet::kArm, Runtime::kSaveAll, arm::kFrameSizeSaveAllCalleeSave);
-  CheckFrameSize(InstructionSet::kArm, Runtime::kRefsOnly, arm::kFrameSizeRefsOnlyCalleeSave);
-  CheckFrameSize(InstructionSet::kArm, Runtime::kRefsAndArgs, arm::kFrameSizeRefsAndArgsCalleeSave);
-}
-
-
-TEST_F(ArchTest, ARM64) {
-  CheckFrameSize(InstructionSet::kArm64, Runtime::kSaveAll, arm64::kFrameSizeSaveAllCalleeSave);
-  CheckFrameSize(InstructionSet::kArm64, Runtime::kRefsOnly, arm64::kFrameSizeRefsOnlyCalleeSave);
-  CheckFrameSize(InstructionSet::kArm64, Runtime::kRefsAndArgs,
-                 arm64::kFrameSizeRefsAndArgsCalleeSave);
-}
-
-TEST_F(ArchTest, MIPS) {
-  CheckFrameSize(InstructionSet::kMips, Runtime::kSaveAll, mips::kFrameSizeSaveAllCalleeSave);
-  CheckFrameSize(InstructionSet::kMips, Runtime::kRefsOnly, mips::kFrameSizeRefsOnlyCalleeSave);
-  CheckFrameSize(InstructionSet::kMips, Runtime::kRefsAndArgs,
-                 mips::kFrameSizeRefsAndArgsCalleeSave);
-}
-
-TEST_F(ArchTest, MIPS64) {
-  CheckFrameSize(InstructionSet::kMips64, Runtime::kSaveAll, mips64::kFrameSizeSaveAllCalleeSave);
-  CheckFrameSize(InstructionSet::kMips64, Runtime::kRefsOnly, mips64::kFrameSizeRefsOnlyCalleeSave);
-  CheckFrameSize(InstructionSet::kMips64, Runtime::kRefsAndArgs,
-                 mips64::kFrameSizeRefsAndArgsCalleeSave);
-}
-
-TEST_F(ArchTest, X86) {
-  CheckFrameSize(InstructionSet::kX86, Runtime::kSaveAll, x86::kFrameSizeSaveAllCalleeSave);
-  CheckFrameSize(InstructionSet::kX86, Runtime::kRefsOnly, x86::kFrameSizeRefsOnlyCalleeSave);
-  CheckFrameSize(InstructionSet::kX86, Runtime::kRefsAndArgs, x86::kFrameSizeRefsAndArgsCalleeSave);
-}
-
-TEST_F(ArchTest, X86_64) {
-  CheckFrameSize(InstructionSet::kX86_64, Runtime::kSaveAll, x86_64::kFrameSizeSaveAllCalleeSave);
-  CheckFrameSize(InstructionSet::kX86_64, Runtime::kRefsOnly, x86_64::kFrameSizeRefsOnlyCalleeSave);
-  CheckFrameSize(InstructionSet::kX86_64, Runtime::kRefsAndArgs,
-                 x86_64::kFrameSizeRefsAndArgsCalleeSave);
-}
+#define TEST_ARCH(Arch, arch)                             \
+  TEST_F(ArchTest, Arch) {                                \
+    CheckFrameSize(InstructionSet::k##Arch,               \
+                   Runtime::kSaveAllCalleeSaves,          \
+                   arch::kFrameSizeSaveAllCalleeSaves);   \
+    CheckFrameSize(InstructionSet::k##Arch,               \
+                   Runtime::kSaveRefsOnly,                \
+                   arch::kFrameSizeSaveRefsOnly);         \
+    CheckFrameSize(InstructionSet::k##Arch,               \
+                   Runtime::kSaveRefsAndArgs,             \
+                   arch::kFrameSizeSaveRefsAndArgs);      \
+    CheckFrameSize(InstructionSet::k##Arch,               \
+                   Runtime::kSaveEverything,              \
+                   arch::kFrameSizeSaveEverything);       \
+  }
+TEST_ARCH(Arm, arm)
+TEST_ARCH(Arm64, arm64)
+TEST_ARCH(Mips, mips)
+TEST_ARCH(Mips64, mips64)
+TEST_ARCH(X86, x86)
+TEST_ARCH(X86_64, x86_64)
 
 }  // namespace art
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index 1fa566b..c03bcae 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -19,9 +19,10 @@
 
 #include "asm_support.h"
 
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 112
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 32
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 112
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 112
+#define FRAME_SIZE_SAVE_REFS_ONLY 32
+#define FRAME_SIZE_SAVE_REFS_AND_ARGS 112
+#define FRAME_SIZE_SAVE_EVERYTHING 192
 
 // Flag for enabling R4 optimization in arm runtime
 // #define ARM_R4_SUSPEND_FLAG
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 34d3158..0fcf866 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -42,30 +42,31 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
      */
-.macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME rTemp
+.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME rTemp
     SPILL_ALL_CALLEE_SAVE_GPRS                    @ 9 words (36 bytes) of callee saves.
     vpush {s16-s31}                               @ 16 words (64 bytes) of floats.
     .cfi_adjust_cfa_offset 64
     sub sp, #12                                   @ 3 words of space, bottom word will hold Method*
     .cfi_adjust_cfa_offset 12
     RUNTIME_CURRENT1 \rTemp                       @ Load Runtime::Current into rTemp.
-    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET] @ rTemp is kSaveAll Method*.
+    @ Load kSaveAllCalleeSaves Method* into rTemp.
+    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET]
     str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
 
      // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 36 + 64 + 12)
-#error "SAVE_ALL_CALLEE_SAVE_FRAME(ARM) size not as expected."
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 36 + 64 + 12)
+#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(ARM) size not as expected."
 #endif
 .endm
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly).
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly).
      */
-.macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME rTemp
+.macro SETUP_SAVE_REFS_ONLY_FRAME rTemp
     push {r5-r8, r10-r11, lr}                     @ 7 words of callee saves
     .cfi_adjust_cfa_offset 28
     .cfi_rel_offset r5, 0
@@ -78,17 +79,18 @@
     sub sp, #4                                    @ bottom word will hold Method*
     .cfi_adjust_cfa_offset 4
     RUNTIME_CURRENT2 \rTemp                       @ Load Runtime::Current into rTemp.
-    ldr \rTemp, [\rTemp, #RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET] @ rTemp is kRefsOnly Method*.
+    @ Load kSaveRefsOnly Method* into rTemp.
+    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET]
     str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 28 + 4)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(ARM) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 28 + 4)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(ARM) size not as expected."
 #endif
 .endm
 
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_ONLY_FRAME
     add sp, #4               @ bottom word holds Method*
     .cfi_adjust_cfa_offset -4
     pop {r5-r8, r10-r11, lr} @ 7 words of callee saves
@@ -102,16 +104,16 @@
     .cfi_adjust_cfa_offset -28
 .endm
 
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
+    RESTORE_SAVE_REFS_ONLY_FRAME
     bx  lr                   @ return
 .endm
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs).
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
      */
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
     push {r1-r3, r5-r8, r10-r11, lr}   @ 10 words of callee saves and args.
     .cfi_adjust_cfa_offset 40
     .cfi_rel_offset r1, 0
@@ -126,30 +128,30 @@
     .cfi_rel_offset lr, 36
     vpush {s0-s15}                     @ 16 words of float args.
     .cfi_adjust_cfa_offset 64
-    sub sp, #8                         @ 2 words of space, bottom word will hold Method*
+    sub sp, #8                         @ 2 words of space, alignment padding and Method*
     .cfi_adjust_cfa_offset 8
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 40 + 64 + 8)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 40 + 64 + 8)
+#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM) size not as expected."
 #endif
 .endm
 
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME rTemp
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME rTemp
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
     RUNTIME_CURRENT3 \rTemp                       @ Load Runtime::Current into rTemp.
-     @ rTemp is kRefsAndArgs Method*.
-    ldr \rTemp, [\rTemp, #RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET]
+    @ Load kSaveRefsAndArgs Method* into rTemp.
+    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
     str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
 .endm
 
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_R0
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
-    str r0, [sp, #0]                   @ Store ArtMethod* to bottom of stack.
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
+    str r0, [sp, #0]                              @ Store ArtMethod* to bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
 .endm
 
-.macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
     add  sp, #8                      @ rewind sp
     .cfi_adjust_cfa_offset -8
     vpop {s0-s15}
@@ -168,6 +170,65 @@
     .cfi_adjust_cfa_offset -40
 .endm
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME rTemp
+    push {r0-r12, lr}                   @ 14 words of callee saves and args.
+    .cfi_adjust_cfa_offset 56
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset r1, 4
+    .cfi_rel_offset r2, 8
+    .cfi_rel_offset r3, 12
+    .cfi_rel_offset r4, 16
+    .cfi_rel_offset r5, 20
+    .cfi_rel_offset r6, 24
+    .cfi_rel_offset r7, 28
+    .cfi_rel_offset r8, 32
+    .cfi_rel_offset r9, 36
+    .cfi_rel_offset r10, 40
+    .cfi_rel_offset r11, 44
+    .cfi_rel_offset ip, 48
+    .cfi_rel_offset lr, 52
+    vpush {s0-s31}                      @ 32 words of float args.
+    .cfi_adjust_cfa_offset 128
+    sub sp, #8                          @ 2 words of space, alignment padding and Method*
+    .cfi_adjust_cfa_offset 8
+    RUNTIME_CURRENT1 \rTemp             @ Load Runtime::Current into rTemp.
+    @ Load kSaveEverything Method* into rTemp.
+    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET]
+    str \rTemp, [sp, #0]                @ Place Method* at bottom of stack.
+    str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
+
+    // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_SAVE_EVERYTHING != 56 + 128 + 8)
+#error "FRAME_SIZE_SAVE_EVERYTHING(ARM) size not as expected."
+#endif
+.endm
+
+.macro RESTORE_SAVE_EVERYTHING_FRAME
+    add  sp, #8                         @ rewind sp
+    .cfi_adjust_cfa_offset -8
+    vpop {s0-s31}
+    .cfi_adjust_cfa_offset -128
+    pop {r0-r12, lr}                    @ 14 words of callee saves
+    .cfi_restore r0
+    .cfi_restore r1
+    .cfi_restore r2
+    .cfi_restore r3
+    .cfi_restore r5
+    .cfi_restore r6
+    .cfi_restore r7
+    .cfi_restore r8
+    .cfi_restore r9
+    .cfi_restore r10
+    .cfi_restore r11
+    .cfi_restore r12
+    .cfi_restore lr
+    .cfi_adjust_cfa_offset -56
+.endm
+
 .macro RETURN_IF_RESULT_IS_ZERO
     cbnz   r0, 1f              @ result non-zero branch over
     bx     lr                  @ return
@@ -187,7 +248,7 @@
 .macro DELIVER_PENDING_EXCEPTION
     .fnend
     .fnstart
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0        @ save callee saves for throw
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0       @ save callee saves for throw
     mov    r0, r9                              @ pass Thread::Current
     b      artDeliverPendingExceptionFromCode  @ artDeliverPendingExceptionFromCode(Thread*)
 .endm
@@ -195,7 +256,7 @@
 .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0        @ save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0       @ save all registers as basis for long jump context
     mov r0, r9                      @ pass Thread::Current
     b   \cxx_name                   @ \cxx_name(Thread*)
 END \c_name
@@ -204,7 +265,7 @@
 .macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r1        @ save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r1       @ save all registers as basis for long jump context
     mov r1, r9                      @ pass Thread::Current
     b   \cxx_name                   @ \cxx_name(Thread*)
 END \c_name
@@ -213,7 +274,7 @@
 .macro TWO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r2       @ save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2       @ save all registers as basis for long jump context
     mov r2, r9                      @ pass Thread::Current
     b   \cxx_name                   @ \cxx_name(Thread*)
 END \c_name
@@ -245,11 +306,11 @@
 .macro  ONE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1 @ save callee saves in case of GC
-    ldr    r1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    SETUP_SAVE_REFS_ONLY_FRAME r1        @ save callee saves in case of GC
+    ldr    r1, [sp, #FRAME_SIZE_SAVE_REFS_ONLY]  @ pass referrer
     mov    r2, r9                        @ pass Thread::Current
     bl     \entrypoint                   @ (uint32_t field_idx, const Method* referrer, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -257,11 +318,11 @@
 .macro  TWO_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2 @ save callee saves in case of GC
-    ldr    r2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
+    ldr    r2, [sp, #FRAME_SIZE_SAVE_REFS_ONLY]  @ pass referrer
     mov    r3, r9                        @ pass Thread::Current
     bl     \entrypoint                   @ (field_idx, Object*, referrer, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -269,14 +330,14 @@
 .macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r3 @ save callee saves in case of GC
-    ldr    r3, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    SETUP_SAVE_REFS_ONLY_FRAME r3        @ save callee saves in case of GC
+    ldr    r3, [sp, #FRAME_SIZE_SAVE_REFS_ONLY]  @ pass referrer
     str    r9, [sp, #-16]!               @ expand the frame and pass Thread::Current
     .cfi_adjust_cfa_offset 16
     bl     \entrypoint                   @ (field_idx, Object*, new_val, referrer, Thread*)
     add    sp, #16                       @ release out args
     .cfi_adjust_cfa_offset -16
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  @ TODO: we can clearly save an add here
+    RESTORE_SAVE_REFS_ONLY_FRAME         @ TODO: we can clearly save an add here
     \return
 END \name
 .endm
@@ -341,12 +402,12 @@
      */
 .macro INVOKE_TRAMPOLINE_BODY cxx_name
     .extern \cxx_name
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2  @ save callee saves in case allocation triggers GC
+    SETUP_SAVE_REFS_AND_ARGS_FRAME r2     @ save callee saves in case allocation triggers GC
     mov    r2, r9                         @ pass Thread::Current
     mov    r3, sp
     bl     \cxx_name                      @ (method_idx, this, Thread*, SP)
     mov    r12, r1                        @ save Method*->code_
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     cbz    r0, 1f                         @ did we find the target? if not go to exception delivery
     bx     r12                            @ tail call to target
 1:
@@ -520,7 +581,7 @@
     ldr    r2, [r9, #THREAD_ID_OFFSET]
     ldrex  r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
     mov    r3, r1
-    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits
+    and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits
     cbnz   r3, .Lnot_unlocked         @ already thin locked
     @ unlocked case - r1: original lock word that's zero except for the read barrier bits.
     orr    r2, r1, r2                 @ r2 holds thread id with count of 0 with preserved read barrier bits
@@ -536,9 +597,9 @@
     cbnz   r2, .Lslow_lock            @ lock word and self thread id's match -> recursive lock
                                       @ else contention, go to slow path
     mov    r3, r1                     @ copy the lock word to check count overflow.
-    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits.
+    and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits.
     add    r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ increment count in lock word placing in r2 to check overflow
-    lsr    r3, r2, LOCK_WORD_READ_BARRIER_STATE_SHIFT  @ if either of the upper two bits (28-29) are set, we overflowed.
+    lsr    r3, r2, #LOCK_WORD_GC_STATE_SHIFT    @ if the first gc state bit is set, we overflowed.
     cbnz   r3, .Lslow_lock            @ if we overflow the count go slow path
     add    r2, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ increment count for real
     strex  r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits
@@ -547,19 +608,19 @@
 .Llock_strex_fail:
     b      .Lretry_lock               @ retry
 .Lslow_lock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1  @ save callee saves in case we block
+    SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case we block
     mov    r1, r9                     @ pass Thread::Current
     bl     artLockObjectFromCode      @ (Object* obj, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_lock_object
 
 ENTRY art_quick_lock_object_no_inline
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1  @ save callee saves in case we block
+    SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case we block
     mov    r1, r9                     @ pass Thread::Current
     bl     artLockObjectFromCode      @ (Object* obj, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_lock_object_no_inline
@@ -581,17 +642,17 @@
     cbnz   r2, .Lslow_unlock          @ if either of the top two bits are set, go slow path
     ldr    r2, [r9, #THREAD_ID_OFFSET]
     mov    r3, r1                     @ copy lock word to check thread id equality
-    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits
+    and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits
     eor    r3, r3, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
     uxth   r3, r3                     @ zero top 16 bits
     cbnz   r3, .Lslow_unlock          @ do lock word and self thread id's match?
     mov    r3, r1                     @ copy lock word to detect transition to unlocked
-    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits
+    and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits
     cmp    r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
     bpl    .Lrecursive_thin_unlock
     @ transition to unlocked
     mov    r3, r1
-    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK  @ r3: zero except for the preserved read barrier bits
+    and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED  @ r3: zero except for the preserved gc bits
     dmb    ish                        @ full (LoadStore|StoreStore) memory barrier
 #ifndef USE_READ_BARRIER
     str    r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
@@ -613,20 +674,20 @@
     b      .Lretry_unlock             @ retry
 .Lslow_unlock:
     @ save callee saves in case exception allocation triggers GC
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1
+    SETUP_SAVE_REFS_ONLY_FRAME r1
     mov    r1, r9                     @ pass Thread::Current
     bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_unlock_object
 
 ENTRY art_quick_unlock_object_no_inline
     @ save callee saves in case exception allocation triggers GC
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1
+    SETUP_SAVE_REFS_ONLY_FRAME r1
     mov    r1, r9                     @ pass Thread::Current
     bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_unlock_object_no_inline
@@ -658,7 +719,7 @@
     .cfi_restore r0
     .cfi_restore r1
     .cfi_restore lr
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r2       @ save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2       @ save all registers as basis for long jump context
     mov r2, r9                      @ pass Thread::Current
     b   artThrowClassCastException  @ (Class*, Class*, Thread*)
     bkpt
@@ -800,7 +861,7 @@
 .Lthrow_array_store_exception:
     pop {r0-r2, lr}
     /* No need to repeat restore cfi directives, the ones above apply here. */
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r3
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r3
     mov r1, r2
     mov r2, r9                     @ pass Thread::Current
     b artThrowArrayStoreException  @ (Class*, Class*, Thread*)
@@ -811,10 +872,10 @@
 .macro ONE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1  @ save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case of GC
     mov    r1, r9                     @ pass Thread::Current
     bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -823,10 +884,10 @@
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2  @ save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME r2     @ save callee saves in case of GC
     mov    r2, r9                     @ pass Thread::Current
     bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -835,11 +896,11 @@
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r3  @ save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME r3     @ save callee saves in case of GC
     mov    r3, r9                     @ pass Thread::Current
     @ (uint32_t type_idx, Method* method, int32_t component_count, Thread*)
     bl     \entrypoint
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -848,13 +909,13 @@
 .macro FOUR_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r12 @ save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME r12    @ save callee saves in case of GC
     str    r9, [sp, #-16]!            @ expand the frame and pass Thread::Current
     .cfi_adjust_cfa_offset 16
     bl     \entrypoint
     add    sp, #16                    @ strip the extra frame
     .cfi_adjust_cfa_offset -16
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -877,12 +938,12 @@
      */
     .extern artGet64StaticFromCode
 ENTRY art_quick_get64_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2 @ save callee saves in case of GC
-    ldr    r1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
+    ldr    r1, [sp, #FRAME_SIZE_SAVE_REFS_ONLY]  @ pass referrer
     mov    r2, r9                        @ pass Thread::Current
     bl     artGet64StaticFromCode        @ (uint32_t field_idx, const Method* referrer, Thread*)
     ldr    r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     cbnz   r2, 1f                        @ success if no exception pending
     bx     lr                            @ return on success
 1:
@@ -903,12 +964,12 @@
      */
     .extern artGet64InstanceFromCode
 ENTRY art_quick_get64_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2 @ save callee saves in case of GC
-    ldr    r2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
+    ldr    r2, [sp, #FRAME_SIZE_SAVE_REFS_ONLY]  @ pass referrer
     mov    r3, r9                        @ pass Thread::Current
     bl     artGet64InstanceFromCode      @ (field_idx, Object*, referrer, Thread*)
     ldr    r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     cbnz   r2, 1f                        @ success if no exception pending
     bx     lr                            @ return on success
 1:
@@ -928,15 +989,15 @@
      */
     .extern artSet64StaticFromCode
 ENTRY art_quick_set64_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1 @ save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME r1        @ save callee saves in case of GC
                                          @ r2:r3 contain the wide argument
-    ldr    r1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    ldr    r1, [sp, #FRAME_SIZE_SAVE_REFS_ONLY]  @ pass referrer
     str    r9, [sp, #-16]!               @ expand the frame and pass Thread::Current
     .cfi_adjust_cfa_offset 16
     bl     artSet64StaticFromCode        @ (field_idx, referrer, new_val, Thread*)
     add    sp, #16                       @ release out args
     .cfi_adjust_cfa_offset -16
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  @ TODO: we can clearly save an add here
+    RESTORE_SAVE_REFS_ONLY_FRAME         @ TODO: we can clearly save an add here
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_set64_static
@@ -953,9 +1014,9 @@
      */
     .extern artSet64InstanceFromCode
 ENTRY art_quick_set64_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r12 @ save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME r12       @ save callee saves in case of GC
                                          @ r2:r3 contain the wide argument
-    ldr    r12, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    ldr    r12, [sp, #FRAME_SIZE_SAVE_REFS_ONLY]  @ pass referrer
     str    r9, [sp, #-12]!               @ expand the frame and pass Thread::Current
     .cfi_adjust_cfa_offset 12
     str    r12, [sp, #-4]!               @ expand the frame and pass the referrer
@@ -963,7 +1024,7 @@
     bl     artSet64InstanceFromCode      @ (field_idx, Object*, new_val, Method* referrer, Thread*)
     add    sp, #16                       @ release out args
     .cfi_adjust_cfa_offset -16
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  @ TODO: we can clearly save an add here
+    RESTORE_SAVE_REFS_ONLY_FRAME         @ TODO: we can clearly save an add here
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_set64_instance
@@ -1079,10 +1140,10 @@
     bx     lr
 
 .Lart_quick_alloc_object_rosalloc_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2  @ save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME r2     @ save callee saves in case of GC
     mov    r2, r9                     @ pass Thread::Current
     bl     artAllocObjectFromCodeRosAlloc     @ (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END art_quick_alloc_object_rosalloc
 
@@ -1164,10 +1225,10 @@
     ldr    r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
     ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
 .Lart_quick_alloc_object_tlab_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2                      // Save callee saves in case of GC.
+    SETUP_SAVE_REFS_ONLY_FRAME r2                             // Save callee saves in case of GC.
     mov    r2, r9                                             // Pass Thread::Current.
     bl     artAllocObjectFromCodeTLAB    // (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END art_quick_alloc_object_tlab
 
@@ -1199,10 +1260,10 @@
     pop    {r0, r1, r3, lr}
     b      .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
 .Lart_quick_alloc_object_region_tlab_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2                      // Save callee saves in case of GC.
+    SETUP_SAVE_REFS_ONLY_FRAME r2                             // Save callee saves in case of GC.
     mov    r2, r9                                             // Pass Thread::Current.
     bl     artAllocObjectFromCodeRegionTLAB    // (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END art_quick_alloc_object_region_tlab
 
@@ -1212,24 +1273,25 @@
     .extern artTestSuspendFromCode
 ENTRY art_quick_test_suspend
 #ifdef ARM_R4_SUSPEND_FLAG
-    ldrh   r0, [rSELF, #THREAD_FLAGS_OFFSET]
-    mov    rSUSPEND, #SUSPEND_CHECK_INTERVAL  @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL
-    cbnz   r0, 1f                             @ check Thread::Current()->suspend_count_ == 0
-    bx     lr                                 @ return if suspend_count_ == 0
+    ldrh   rSUSPEND, [rSELF, #THREAD_FLAGS_OFFSET]
+    cbnz   rSUSPEND, 1f                         @ check Thread::Current()->suspend_count_ == 0
+    mov    rSUSPEND, #SUSPEND_CHECK_INTERVAL    @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL
+    bx     lr                                   @ return if suspend_count_ == 0
 1:
+    mov    rSUSPEND, #SUSPEND_CHECK_INTERVAL    @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL
 #endif
+    SETUP_SAVE_EVERYTHING_FRAME r0              @ save everything for GC stack crawl
     mov    r0, rSELF
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1      @ save callee saves for GC stack crawl
-    @ TODO: save FPRs to enable access in the debugger?
-    bl     artTestSuspendFromCode             @ (Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    bl     artTestSuspendFromCode               @ (Thread*)
+    RESTORE_SAVE_EVERYTHING_FRAME
+    bx     lr
 END art_quick_test_suspend
 
 ENTRY art_quick_implicit_suspend
     mov    r0, rSELF
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1      @ save callee saves for stack crawl
+    SETUP_SAVE_REFS_ONLY_FRAME r1             @ save callee saves for stack crawl
     bl     artTestSuspendFromCode             @ (Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
 END art_quick_implicit_suspend
 
     /*
@@ -1239,15 +1301,15 @@
      */
      .extern artQuickProxyInvokeHandler
 ENTRY art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_R0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
     mov     r2, r9                 @ pass Thread::Current
     mov     r3, sp                 @ pass SP
     blx     artQuickProxyInvokeHandler  @ (Method* proxy method, receiver, Thread*, SP)
     ldr     r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
     // Tear down the callee-save frame. Skip arg registers.
-    add     sp, #(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
-    .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    add     sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
+    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
+    RESTORE_SAVE_REFS_ONLY_FRAME
     cbnz    r2, 1f                 @ success if no exception is pending
     vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
     bx      lr                     @ return on success
@@ -1290,17 +1352,17 @@
 
     .extern artQuickResolutionTrampoline
 ENTRY art_quick_resolution_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2
+    SETUP_SAVE_REFS_AND_ARGS_FRAME r2
     mov     r2, r9                 @ pass Thread::Current
     mov     r3, sp                 @ pass SP
     blx     artQuickResolutionTrampoline  @ (Method* called, receiver, Thread*, SP)
     cbz     r0, 1f                 @ is code pointer null? goto exception
     mov     r12, r0
     ldr  r0, [sp, #0]              @ load resolved method in r0
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     bx      r12                    @ tail-call into actual code
 1:
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_resolution_trampoline
 
@@ -1308,7 +1370,7 @@
      * Called to do a generic JNI down-call
      */
 ENTRY art_quick_generic_jni_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_R0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
 
     // Save rSELF
     mov r11, rSELF
@@ -1375,16 +1437,16 @@
     .cfi_def_cfa_register sp
 
     // Tear down the callee-save frame. Skip arg registers.
-    add     sp, #FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-    .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    add     sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY
+    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY)
+    RESTORE_SAVE_REFS_ONLY_FRAME
 
     // store into fpr, for when it's a fpr return...
     vmov d0, r0, r1
     bx lr      // ret
     // Undo the unwinding information from above since it doesn't apply below.
     .cfi_def_cfa_register r10
-    .cfi_adjust_cfa_offset FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
+    .cfi_adjust_cfa_offset FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY
 
 .Lexception_in_native:
     ldr sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]
@@ -1395,15 +1457,15 @@
 
     .extern artQuickToInterpreterBridge
 ENTRY art_quick_to_interpreter_bridge
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r1
+    SETUP_SAVE_REFS_AND_ARGS_FRAME r1
     mov     r1, r9                 @ pass Thread::Current
     mov     r2, sp                 @ pass SP
     blx     artQuickToInterpreterBridge    @ (Method* method, Thread*, SP)
     ldr     r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
     // Tear down the callee-save frame. Skip arg registers.
-    add     sp, #(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
-    .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    add     sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
+    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
+    RESTORE_SAVE_REFS_ONLY_FRAME
     cbnz    r2, 1f                 @ success if no exception is pending
     vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
     bx      lr                     @ return on success
@@ -1418,22 +1480,22 @@
     .extern artInstrumentationMethodExitFromCode
 ENTRY art_quick_instrumentation_entry
     @ Make stack crawlable and clobber r2 and r3 (post saving)
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2
-    @ preserve r0 (not normally an arg) knowing there is a spare slot in kRefsAndArgs.
+    SETUP_SAVE_REFS_AND_ARGS_FRAME r2
+    @ preserve r0 (not normally an arg) knowing there is a spare slot in kSaveRefsAndArgs.
     str   r0, [sp, #4]
     mov   r2, r9         @ pass Thread::Current
     mov   r3, lr         @ pass LR
     blx   artInstrumentationMethodEntryFromCode  @ (Method*, Object*, Thread*, LR)
     mov   r12, r0        @ r12 holds reference to code
     ldr   r0, [sp, #4]   @ restore r0
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     blx   r12            @ call method with lr set to art_quick_instrumentation_exit
 @ Deliberate fall-through into art_quick_instrumentation_exit.
     .type art_quick_instrumentation_exit, #function
     .global art_quick_instrumentation_exit
 art_quick_instrumentation_exit:
     mov   lr, #0         @ link register is to here, so clobber with 0 for later checks
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2  @ set up frame knowing r2 and r3 must be dead on exit
+    SETUP_SAVE_REFS_ONLY_FRAME r2  @ set up frame knowing r2 and r3 must be dead on exit
     mov   r12, sp        @ remember bottom of caller's frame
     push  {r0-r1}        @ save return value
     .cfi_adjust_cfa_offset 8
@@ -1472,7 +1534,7 @@
      */
     .extern artDeoptimize
 ENTRY art_quick_deoptimize
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0
     mov    r0, r9         @ Set up args.
     blx    artDeoptimize  @ artDeoptimize(Thread*)
 END art_quick_deoptimize
@@ -1483,7 +1545,7 @@
      */
     .extern artDeoptimizeFromCompiledCode
 ENTRY art_quick_deoptimize_from_compiled_code
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0
     mov    r0, r9                         @ Set up args.
     blx    artDeoptimizeFromCompiledCode  @ artDeoptimizeFromCompiledCode(Thread*)
 END art_quick_deoptimize_from_compiled_code
@@ -1772,6 +1834,20 @@
      */
 .macro READ_BARRIER_MARK_REG name, reg
 ENTRY \name
+    // Null check so that we can load the lock word.
+    cmp \reg, #0
+    beq .Lret_rb_\name
+    // Check lock word for mark bit, if marked return.
+    push {r0}
+    ldr r0, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    and r0, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
+    cbz r0, .Lslow_rb_\name
+    // Restore LR and return.
+    pop   {r0}
+    bx    lr
+
+.Lslow_rb_\name:
+    pop   {r0}
     push  {r0-r4, r9, r12, lr}          @ save return address and core caller-save registers
     .cfi_adjust_cfa_offset 32
     .cfi_rel_offset r0, 0
@@ -1831,6 +1907,8 @@
       .endif
     .endif
     pop   {r0-r4, r9, r12, pc}          @ restore caller-save registers and return
+.Lret_rb_\name:
+    bx lr
 END \name
 .endm
 
diff --git a/runtime/arch/arm/quick_method_frame_info_arm.h b/runtime/arch/arm/quick_method_frame_info_arm.h
index 0fb8a63..4b23c77 100644
--- a/runtime/arch/arm/quick_method_frame_info_arm.h
+++ b/runtime/arch/arm/quick_method_frame_info_arm.h
@@ -34,6 +34,9 @@
     (1 << art::arm::R1) | (1 << art::arm::R2) | (1 << art::arm::R3);
 static constexpr uint32_t kArmCalleeSaveAllSpills =
     (1 << art::arm::R4) | (1 << art::arm::R9);
+static constexpr uint32_t kArmCalleeSaveEverythingSpills =
+    (1 << art::arm::R0) | (1 << art::arm::R1) | (1 << art::arm::R2) | (1 << art::arm::R3) |
+    (1 << art::arm::R4) | (1 << art::arm::R9) | (1 << art::arm::R12);
 
 static constexpr uint32_t kArmCalleeSaveFpAlwaysSpills = 0;
 static constexpr uint32_t kArmCalleeSaveFpRefSpills = 0;
@@ -47,17 +50,21 @@
     (1 << art::arm::S20) | (1 << art::arm::S21) | (1 << art::arm::S22) | (1 << art::arm::S23) |
     (1 << art::arm::S24) | (1 << art::arm::S25) | (1 << art::arm::S26) | (1 << art::arm::S27) |
     (1 << art::arm::S28) | (1 << art::arm::S29) | (1 << art::arm::S30) | (1 << art::arm::S31);
+static constexpr uint32_t kArmCalleeSaveFpEverythingSpills =
+    kArmCalleeSaveFpArgSpills | kArmCalleeSaveFpAllSpills;
 
 constexpr uint32_t ArmCalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
   return kArmCalleeSaveAlwaysSpills | kArmCalleeSaveRefSpills |
-      (type == Runtime::kRefsAndArgs ? kArmCalleeSaveArgSpills : 0) |
-      (type == Runtime::kSaveAll ? kArmCalleeSaveAllSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kArmCalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kArmCalleeSaveAllSpills : 0) |
+      (type == Runtime::kSaveEverything ? kArmCalleeSaveEverythingSpills : 0);
 }
 
 constexpr uint32_t ArmCalleeSaveFpSpills(Runtime::CalleeSaveType type) {
   return kArmCalleeSaveFpAlwaysSpills | kArmCalleeSaveFpRefSpills |
-      (type == Runtime::kRefsAndArgs ? kArmCalleeSaveFpArgSpills: 0) |
-      (type == Runtime::kSaveAll ? kArmCalleeSaveFpAllSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kArmCalleeSaveFpArgSpills: 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kArmCalleeSaveFpAllSpills : 0) |
+      (type == Runtime::kSaveEverything ? kArmCalleeSaveFpEverythingSpills : 0);
 }
 
 constexpr uint32_t ArmCalleeSaveFrameSize(Runtime::CalleeSaveType type) {
diff --git a/runtime/arch/arm64/asm_support_arm64.h b/runtime/arch/arm64/asm_support_arm64.h
index 989ecc6..5e7b51d 100644
--- a/runtime/arch/arm64/asm_support_arm64.h
+++ b/runtime/arch/arm64/asm_support_arm64.h
@@ -19,8 +19,9 @@
 
 #include "asm_support.h"
 
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 176
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 96
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 224
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 176
+#define FRAME_SIZE_SAVE_REFS_ONLY 96
+#define FRAME_SIZE_SAVE_REFS_AND_ARGS 224
+#define FRAME_SIZE_SAVE_EVERYTHING 512
 
 #endif  // ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_H_
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index a5be52d..bdad966 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -21,25 +21,25 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
      */
-.macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+    // art::Runtime** xIP0 = &art::Runtime::instance_
     adrp xIP0, :got:_ZN3art7Runtime9instance_E
     ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
 
     // Our registers aren't intermixed - just spill in order.
-    ldr xIP0, [xIP0]  // xIP0 = & (art::Runtime * art::Runtime.instance_) .
+    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
 
-    // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
-    // Loads appropriate callee-save-method.
-    ldr xIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET ]
+    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveAllCalleeSaves];
+    ldr xIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET]
 
     sub sp, sp, #176
     .cfi_adjust_cfa_offset 176
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 176)
-#error "SAVE_ALL_CALLEE_SAVE_FRAME(ARM64) size not as expected."
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 176)
+#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(ARM64) size not as expected."
 #endif
 
     // Stack alignment filler [sp, #8].
@@ -74,7 +74,7 @@
     .cfi_rel_offset x29, 160
     .cfi_rel_offset x30, 168
 
-    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs].
+    // Store ArtMethod* Runtime::callee_save_methods_[kSaveAllCalleeSaves].
     str xIP0, [sp]
     // Place sp in Thread::Current()->top_quick_frame.
     mov xIP0, sp
@@ -83,25 +83,25 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly).
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly).
      */
-.macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro SETUP_SAVE_REFS_ONLY_FRAME
+    // art::Runtime** xIP0 = &art::Runtime::instance_
     adrp xIP0, :got:_ZN3art7Runtime9instance_E
     ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
 
     // Our registers aren't intermixed - just spill in order.
-    ldr xIP0, [xIP0]  // xIP0 = & (art::Runtime * art::Runtime.instance_) .
+    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
 
-    // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefOnly]  .
-    // Loads appropriate callee-save-method.
-    ldr xIP0, [xIP0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET ]
+    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefOnly];
+    ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET]
 
     sub sp, sp, #96
     .cfi_adjust_cfa_offset 96
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 96)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(ARM64) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 96)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(ARM64) size not as expected."
 #endif
 
     // GP callee-saves.
@@ -126,7 +126,7 @@
     .cfi_rel_offset x29, 80
     .cfi_rel_offset x30, 88
 
-    // Store ArtMethod* Runtime::callee_save_methods_[kRefsOnly].
+    // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsOnly].
     stp xIP0, x20, [sp]
     .cfi_rel_offset x20, 8
 
@@ -136,7 +136,7 @@
 .endm
 
 // TODO: Probably no need to restore registers preserved by aapcs64.
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_ONLY_FRAME
     // Callee-saves.
     ldr x20, [sp, #8]
     .cfi_restore x20
@@ -165,24 +165,24 @@
     .cfi_adjust_cfa_offset -96
 .endm
 
-.macro POP_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro POP_SAVE_REFS_ONLY_FRAME
     add sp, sp, #96
     .cfi_adjust_cfa_offset - 96
 .endm
 
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
+    RESTORE_SAVE_REFS_ONLY_FRAME
     ret
 .endm
 
 
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
     sub sp, sp, #224
     .cfi_adjust_cfa_offset 224
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 224)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM64) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 224)
+#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM64) size not as expected."
 #endif
 
     // Stack alignment filler [sp, #8].
@@ -235,30 +235,31 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs).
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
      *
      * TODO This is probably too conservative - saving FP & LR.
      */
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME
+    // art::Runtime** xIP0 = &art::Runtime::instance_
     adrp xIP0, :got:_ZN3art7Runtime9instance_E
     ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
 
     // Our registers aren't intermixed - just spill in order.
-    ldr xIP0, [xIP0]  // xIP0 = & (art::Runtime * art::Runtime.instance_) .
+    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
 
-    // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
-    ldr xIP0, [xIP0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET ]
+    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefAndArgs];
+    ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
 
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
 
-    str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs]
+    str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsAndArgs].
     // Place sp in Thread::Current()->top_quick_frame.
     mov xIP0, sp
     str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
 .endm
 
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_X0
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
     str x0, [sp, #0]  // Store ArtMethod* to bottom of stack.
     // Place sp in Thread::Current()->top_quick_frame.
     mov xIP0, sp
@@ -266,7 +267,7 @@
 .endm
 
 // TODO: Probably no need to restore registers preserved by aapcs64.
-.macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
     // FP args.
     ldp d0, d1, [sp, #16]
     ldp d2, d3, [sp, #32]
@@ -316,6 +317,204 @@
     .cfi_adjust_cfa_offset -224
 .endm
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME
+    sub sp, sp, #512
+    .cfi_adjust_cfa_offset 512
+
+    // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_SAVE_EVERYTHING != 512)
+#error "FRAME_SIZE_SAVE_EVERYTHING(ARM64) size not as expected."
+#endif
+
+    // Save FP registers.
+    stp d0, d1,   [sp, #8]
+    stp d2, d3,   [sp, #24]
+    stp d4, d5,   [sp, #40]
+    stp d6, d7,   [sp, #56]
+    stp d8, d9,   [sp, #72]
+    stp d10, d11, [sp, #88]
+    stp d12, d13, [sp, #104]
+    stp d14, d15, [sp, #120]
+    stp d16, d17, [sp, #136]
+    stp d18, d19, [sp, #152]
+    stp d20, d21, [sp, #168]
+    stp d22, d23, [sp, #184]
+    stp d24, d25, [sp, #200]
+    stp d26, d27, [sp, #216]
+    stp d28, d29, [sp, #232]
+    stp d30, d31, [sp, #248]
+
+    // Save core registers.
+    str x0,       [sp, #264]
+    .cfi_rel_offset x0, 264
+
+    stp x1, x2,   [sp, #272]
+    .cfi_rel_offset x1, 272
+    .cfi_rel_offset x2, 280
+
+    stp x3, x4,   [sp, #288]
+    .cfi_rel_offset x3, 288
+    .cfi_rel_offset x4, 296
+
+    stp x5, x6,   [sp, #304]
+    .cfi_rel_offset x5, 304
+    .cfi_rel_offset x6, 312
+
+    stp x7, x8,   [sp, #320]
+    .cfi_rel_offset x7, 320
+    .cfi_rel_offset x8, 328
+
+    stp x9, x10,  [sp, #336]
+    .cfi_rel_offset x9, 336
+    .cfi_rel_offset x10, 344
+
+    stp x11, x12, [sp, #352]
+    .cfi_rel_offset x11, 352
+    .cfi_rel_offset x12, 360
+
+    stp x13, x14, [sp, #368]
+    .cfi_rel_offset x13, 368
+    .cfi_rel_offset x14, 376
+
+    stp x15, x16, [sp, #384]
+    .cfi_rel_offset x15, 384
+    .cfi_rel_offset x16, 392
+
+    stp x17, x18, [sp, #400]
+    .cfi_rel_offset x17, 400
+    .cfi_rel_offset x18, 408
+
+    stp x19, x20, [sp, #416]
+    .cfi_rel_offset x19, 416
+    .cfi_rel_offset x20, 424
+
+    stp x21, x22, [sp, #432]
+    .cfi_rel_offset x21, 432
+    .cfi_rel_offset x22, 440
+
+    stp x23, x24, [sp, #448]
+    .cfi_rel_offset x23, 448
+    .cfi_rel_offset x24, 456
+
+    stp x25, x26, [sp, #464]
+    .cfi_rel_offset x25, 464
+    .cfi_rel_offset x26, 472
+
+    stp x27, x28, [sp, #480]
+    .cfi_rel_offset x27, 480
+    .cfi_rel_offset x28, 488
+
+    stp x29, xLR, [sp, #496]
+    .cfi_rel_offset x29, 496
+    .cfi_rel_offset x30, 504
+
+    // art::Runtime** xIP0 = &art::Runtime::instance_
+    adrp xIP0, :got:_ZN3art7Runtime9instance_E
+    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
+
+    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
+
+    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveEverything];
+    ldr xIP0, [xIP0, RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET]
+
+    // Store ArtMethod* Runtime::callee_save_methods_[kSaveEverything].
+    str xIP0, [sp]
+    // Place sp in Thread::Current()->top_quick_frame.
+    mov xIP0, sp
+    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
+.endm
+
+.macro RESTORE_SAVE_EVERYTHING_FRAME
+    // Restore FP registers.
+    ldp d0, d1,   [sp, #8]
+    ldp d2, d3,   [sp, #24]
+    ldp d4, d5,   [sp, #40]
+    ldp d6, d7,   [sp, #56]
+    ldp d8, d9,   [sp, #72]
+    ldp d10, d11, [sp, #88]
+    ldp d12, d13, [sp, #104]
+    ldp d14, d15, [sp, #120]
+    ldp d16, d17, [sp, #136]
+    ldp d18, d19, [sp, #152]
+    ldp d20, d21, [sp, #168]
+    ldp d22, d23, [sp, #184]
+    ldp d24, d25, [sp, #200]
+    ldp d26, d27, [sp, #216]
+    ldp d28, d29, [sp, #232]
+    ldp d30, d31, [sp, #248]
+
+    // Restore core registers.
+    ldr x0,       [sp, #264]
+    .cfi_restore x0
+
+    ldp x1, x2,   [sp, #272]
+    .cfi_restore x1
+    .cfi_restore x2
+
+    ldp x3, x4,   [sp, #288]
+    .cfi_restore x3
+    .cfi_restore x4
+
+    ldp x5, x6,   [sp, #304]
+    .cfi_restore x5
+    .cfi_restore x6
+
+    ldp x7, x8,   [sp, #320]
+    .cfi_restore x7
+    .cfi_restore x8
+
+    ldp x9, x10,  [sp, #336]
+    .cfi_restore x9
+    .cfi_restore x10
+
+    ldp x11, x12, [sp, #352]
+    .cfi_restore x11
+    .cfi_restore x12
+
+    ldp x13, x14, [sp, #368]
+    .cfi_restore x13
+    .cfi_restore x14
+
+    ldp x15, x16, [sp, #384]
+    .cfi_restore x15
+    .cfi_restore x16
+
+    ldp x17, x18, [sp, #400]
+    .cfi_restore x17
+    .cfi_restore x18
+
+    ldp x19, x20, [sp, #416]
+    .cfi_restore x19
+    .cfi_restore x20
+
+    ldp x21, x22, [sp, #432]
+    .cfi_restore x21
+    .cfi_restore x22
+
+    ldp x23, x24, [sp, #448]
+    .cfi_restore x23
+    .cfi_restore x24
+
+    ldp x25, x26, [sp, #464]
+    .cfi_restore x25
+    .cfi_restore x26
+
+    ldp x27, x28, [sp, #480]
+    .cfi_restore x27
+    .cfi_restore x28
+
+    ldp x29, xLR, [sp, #496]
+    .cfi_restore x29
+    .cfi_restore x30
+
+    add sp, sp, #512
+    .cfi_adjust_cfa_offset -512
+.endm
+
 .macro RETURN_IF_RESULT_IS_ZERO
     cbnz x0, 1f                // result non-zero branch over
     ret                        // return
@@ -333,7 +532,7 @@
      * exception is Thread::Current()->exception_
      */
 .macro DELIVER_PENDING_EXCEPTION
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     mov x0, xSELF
 
     // Point of no return.
@@ -368,7 +567,7 @@
 .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov x0, xSELF                     // pass Thread::Current
     b   \cxx_name                     // \cxx_name(Thread*)
 END \c_name
@@ -377,7 +576,7 @@
 .macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context.
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context.
     mov x1, xSELF                     // pass Thread::Current.
     b   \cxx_name                     // \cxx_name(arg, Thread*).
     brk 0
@@ -387,7 +586,7 @@
 .macro TWO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov x2, xSELF                     // pass Thread::Current
     b   \cxx_name                     // \cxx_name(arg1, arg2, Thread*)
     brk 0
@@ -458,7 +657,7 @@
      */
 .macro INVOKE_TRAMPOLINE_BODY cxx_name
     .extern \cxx_name
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // save callee saves in case allocation triggers GC
+    SETUP_SAVE_REFS_AND_ARGS_FRAME        // save callee saves in case allocation triggers GC
     // Helper signature is always
     // (method_idx, *this_object, *caller_method, *self, sp)
 
@@ -466,7 +665,7 @@
     mov    x3, sp
     bl     \cxx_name                      // (method_idx, this, Thread*, SP)
     mov    xIP0, x1                       // save Method*->code_
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     cbz    x0, 1f                         // did we find the target? if not go to exception delivery
     br     xIP0                           // tail call to target
 1:
@@ -1090,7 +1289,7 @@
     ldr    w2, [xSELF, #THREAD_ID_OFFSET] // TODO: Can the thread ID really change during the loop?
     ldxr   w1, [x4]
     mov    x3, x1
-    and    w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  // zero the read barrier bits
+    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
     cbnz   w3, .Lnot_unlocked         // already thin locked
     // unlocked case - x1: original lock word that's zero except for the read barrier bits.
     orr    x2, x1, x2                 // x2 holds thread id with count of 0 with preserved read barrier bits
@@ -1106,9 +1305,9 @@
     cbnz   w2, .Lslow_lock            // lock word and self thread id's match -> recursive lock
                                       // else contention, go to slow path
     mov    x3, x1                     // copy the lock word to check count overflow.
-    and    w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  // zero the read barrier bits.
+    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits.
     add    w2, w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // increment count in lock word placing in w2 to check overflow
-    lsr    w3, w2, LOCK_WORD_READ_BARRIER_STATE_SHIFT  // if either of the upper two bits (28-29) are set, we overflowed.
+    lsr    w3, w2, #LOCK_WORD_GC_STATE_SHIFT     // if the first gc state bit is set, we overflowed.
     cbnz   w3, .Lslow_lock            // if we overflow the count go slow path
     add    w2, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // increment count for real
     stxr   w3, w2, [x4]
@@ -1117,18 +1316,18 @@
 .Llock_stxr_fail:
     b      .Lretry_lock               // retry
 .Lslow_lock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case we block
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case we block
     mov    x1, xSELF                  // pass Thread::Current
     bl     artLockObjectFromCode      // (Object* obj, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_lock_object
 
 ENTRY art_quick_lock_object_no_inline
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case we block
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case we block
     mov    x1, xSELF                  // pass Thread::Current
     bl     artLockObjectFromCode      // (Object* obj, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_lock_object_no_inline
 
@@ -1152,17 +1351,17 @@
     cbnz   w2, .Lslow_unlock          // if either of the top two bits are set, go slow path
     ldr    w2, [xSELF, #THREAD_ID_OFFSET]
     mov    x3, x1                     // copy lock word to check thread id equality
-    and    w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  // zero the read barrier bits
+    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
     eor    w3, w3, w2                 // lock_word.ThreadId() ^ self->ThreadId()
     uxth   w3, w3                     // zero top 16 bits
     cbnz   w3, .Lslow_unlock          // do lock word and self thread id's match?
     mov    x3, x1                     // copy lock word to detect transition to unlocked
-    and    w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  // zero the read barrier bits
+    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
     cmp    w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
     bpl    .Lrecursive_thin_unlock
     // transition to unlocked
     mov    x3, x1
-    and    w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK  // w3: zero except for the preserved read barrier bits
+    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED  // w3: zero except for the preserved read barrier bits
     dmb    ish                        // full (LoadStore|StoreStore) memory barrier
 #ifndef USE_READ_BARRIER
     str    w3, [x4]
@@ -1183,18 +1382,18 @@
 .Lunlock_stxr_fail:
     b      .Lretry_unlock               // retry
 .Lslow_unlock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case exception allocation triggers GC
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case exception allocation triggers GC
     mov    x1, xSELF                  // pass Thread::Current
     bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_unlock_object
 
 ENTRY art_quick_unlock_object_no_inline
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case exception allocation triggers GC
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case exception allocation triggers GC
     mov    x1, xSELF                  // pass Thread::Current
     bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_unlock_object_no_inline
 
@@ -1239,7 +1438,7 @@
     .cfi_restore x1
     .cfi_adjust_cfa_offset -32
 
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov x2, xSELF                     // pass Thread::Current
     b artThrowClassCastException      // (Class*, Class*, Thread*)
     brk 0                             // We should not return here...
@@ -1427,7 +1626,7 @@
     .cfi_restore x1
     .cfi_adjust_cfa_offset -32
 
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     mov x1, x2                    // Pass value.
     mov x2, xSELF                 // Pass Thread::Current.
     b artThrowArrayStoreException // (Object*, Object*, Thread*).
@@ -1438,10 +1637,10 @@
 .macro ONE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
     mov    x1, xSELF                  // pass Thread::Current
     bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -1450,10 +1649,10 @@
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
     mov    x2, xSELF                  // pass Thread::Current
     bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -1462,10 +1661,10 @@
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
     mov    x3, xSELF                  // pass Thread::Current
     bl     \entrypoint
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -1474,10 +1673,10 @@
 .macro FOUR_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
     mov    x4, xSELF                  // pass Thread::Current
     bl     \entrypoint                //
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
     DELIVER_PENDING_EXCEPTION
 END \name
@@ -1487,11 +1686,11 @@
 .macro ONE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
-    ldr    x1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
+    ldr    x1, [sp, #FRAME_SIZE_SAVE_REFS_ONLY] // Load referrer
     mov    x2, xSELF                  // pass Thread::Current
     bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*, SP)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -1499,11 +1698,11 @@
 .macro TWO_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
-    ldr    x2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
+    ldr    x2, [sp, #FRAME_SIZE_SAVE_REFS_ONLY] // Load referrer
     mov    x3, xSELF                  // pass Thread::Current
     bl     \entrypoint
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -1511,11 +1710,11 @@
 .macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
-    ldr    x3, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
+    ldr    x3, [sp, #FRAME_SIZE_SAVE_REFS_ONLY] // Load referrer
     mov    x4, xSELF                  // pass Thread::Current
     bl     \entrypoint
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -1573,12 +1772,12 @@
 // This is separated out as the argument order is different.
     .extern artSet64StaticFromCode
 ENTRY art_quick_set64_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
-    ldr    x1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
+    ldr    x1, [sp, #FRAME_SIZE_SAVE_REFS_ONLY] // Load referrer
                                       // x2 contains the parameter
     mov    x3, xSELF                  // pass Thread::Current
     bl     artSet64StaticFromCode
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_set64_static
 
@@ -1590,7 +1789,20 @@
 ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
 // Generate the allocation entrypoints for each allocator.
-GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
+// Comment out allocators that have arm64 specific asm.
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) implemented in asm
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB) implemented in asm
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
 
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
 ENTRY art_quick_alloc_object_rosalloc
@@ -1690,13 +1902,78 @@
     mov    x0, x3                                             // Set the return value and return.
     ret
 .Lart_quick_alloc_object_rosalloc_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME      // save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME             // save callee saves in case of GC
     mov    x2, xSELF                       // pass Thread::Current
     bl     artAllocObjectFromCodeRosAlloc  // (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END art_quick_alloc_object_rosalloc
 
+
+// The common fast path code for art_quick_alloc_array_region_tlab.
+.macro ALLOC_ARRAY_TLAB_FAST_PATH slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
+    // Check null class
+    cbz    \wClass, \slowPathLabel
+    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED \slowPathLabel, \xClass, \wClass, \xCount, \wCount, \xTemp0, \wTemp0, \xTemp1, \wTemp1, \xTemp2, \wTemp2
+.endm
+
+// The common fast path code for art_quick_alloc_array_region_tlab.
+.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
+    // Array classes are never finalizable or uninitialized, no need to check.
+    ldr    \wTemp0, [\xClass, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Load component type
+    UNPOISON_HEAP_REF \wTemp0
+    ldr    \wTemp0, [\xTemp0, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
+    lsr    \xTemp0, \xTemp0, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16
+                                                              // bits.
+                                                              // xCount is holding a 32 bit value,
+                                                              // it can not overflow.
+    lsl    \xTemp1, \xCount, \xTemp0                          // Calculate data size
+    // Add array data offset and alignment.
+    add    \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
+#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
+#error Long array data offset must be 4 greater than int array data offset.
+#endif
+
+    add    \xTemp0, \xTemp0, #1                               // Add 4 to the length only if the
+                                                              // component size shift is 3
+                                                              // (for 64 bit alignment).
+    and    \xTemp0, \xTemp0, #4
+    add    \xTemp1, \xTemp1, \xTemp0
+    and    \xTemp1, \xTemp1, #OBJECT_ALIGNMENT_MASK_TOGGLED   // Round up the object size by the
+                                                              // object alignment. (addr + 7) & ~7.
+                                                              // Add by 7 is done above.
+
+    cmp    \xTemp1, #MIN_LARGE_OBJECT_THRESHOLD               // Possibly a large object, go slow
+    bhs    \slowPathLabel                                     // path.
+
+    ldr    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Check tlab for space, note that
+                                                              // we use (end - begin) to handle
+                                                              // negative size arrays. It is
+                                                              // assumed that a negative size will
+                                                              // always be greater unsigned than
+                                                              // region size.
+    ldr    \xTemp2, [xSELF, #THREAD_LOCAL_END_OFFSET]
+    sub    \xTemp2, \xTemp2, \xTemp0
+    cmp    \xTemp1, \xTemp2
+    bhi    \slowPathLabel
+
+    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
+                                                              // Move old thread_local_pos to x0
+                                                              // for the return value.
+    mov    x0, \xTemp0
+    add    \xTemp0, \xTemp0, \xTemp1
+    str    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Store new thread_local_pos.
+    ldr    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]     // Increment thread_local_objects.
+    add    \xTemp0, \xTemp0, #1
+    str    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
+    POISON_HEAP_REF \wClass
+    str    \wClass, [x0, #MIRROR_OBJECT_CLASS_OFFSET]         // Store the class pointer.
+    str    \wCount, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]         // Store the array length.
+                                                              // Fence.
+    dmb    ishst
+    ret
+.endm
+
 // The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
 //
 // x0: type_idx/return value, x1: ArtMethod*, x2: Class*, xSELF(x19): Thread::Current
@@ -1704,8 +1981,11 @@
 // Need to preserve x0 and x1 to the slow path.
 .macro ALLOC_OBJECT_TLAB_FAST_PATH slowPathLabel
     cbz    x2, \slowPathLabel                                 // Check null class
-                                                              // Check class status.
-    ldr    w3, [x2, #MIRROR_CLASS_STATUS_OFFSET]
+    ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED \slowPathLabel
+.endm
+
+.macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel
+    ldr    w3, [x2, #MIRROR_CLASS_STATUS_OFFSET]              // Check class status.
     cmp    x3, #MIRROR_CLASS_STATUS_INITIALIZED
     bne    \slowPathLabel
                                                               // Add a fake dependence from the
@@ -1718,6 +1998,10 @@
                                                               // a load-acquire for the status).
     eor    x3, x3, x3
     add    x2, x2, x3
+    ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED \slowPathLabel
+.endm
+
+.macro ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED slowPathLabel
                                                               // Check access flags has
                                                               // kAccClassIsFinalizable.
     ldr    w3, [x2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
@@ -1772,31 +2056,44 @@
     ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
     ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
 .Lart_quick_alloc_object_tlab_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // Save callee saves in case of GC.
+    SETUP_SAVE_REFS_ONLY_FRAME           // Save callee saves in case of GC.
     mov    x2, xSELF                     // Pass Thread::Current.
     bl     artAllocObjectFromCodeTLAB    // (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END art_quick_alloc_object_tlab
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
-ENTRY art_quick_alloc_object_region_tlab
+// The common code for art_quick_alloc_object_*region_tlab
+.macro GENERATE_ALLOC_OBJECT_REGION_TLAB name, entrypoint, fast_path, is_resolved
+ENTRY \name
     // Fast path region tlab allocation.
-    // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
+    // x0: type_idx/resolved class/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
+    // If is_resolved is 1 then x0 is the resolved type, otherwise it is the index.
     // x2-x7: free.
 #if !defined(USE_READ_BARRIER)
     mvn    x0, xzr                                            // Read barrier must be enabled here.
     ret                                                       // Return -1.
 #endif
+.if \is_resolved
+    mov    x2, x0 // class is actually stored in x0 already
+.else
     ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
                                                               // Load the class (x2)
     ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
-                                                              // Read barrier for class load.
+.endif
+    // Most common case: GC is not marking.
     ldr    w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
-    cbnz   x3, .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
-.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
-    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
-.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
+    cbnz   x3, .Lmarking\name
+.Ldo_allocation\name:
+    \fast_path .Lslow_path\name
+.Lmarking\name:
+    // GC is marking, check the lock word of the class for the mark bit.
+    // If the class is null, go slow path. The check is required to read the lock word.
+    cbz    w2, .Lslow_path\name
+    // Class is not null, check mark bit in lock word.
+    ldr    w3, [x2, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    // If the bit is not zero, do the allocation.
+    tbnz    w3, #LOCK_WORD_MARK_BIT_SHIFT, .Ldo_allocation\name
                                                               // The read barrier slow path. Mark
                                                               // the class.
     stp    x0, x1, [sp, #-32]!                                // Save registers (x0, x1, lr).
@@ -1807,35 +2104,97 @@
     ldp    x0, x1, [sp, #0]                                   // Restore registers.
     ldr    xLR, [sp, #16]
     add    sp, sp, #32
-    b      .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
-.Lart_quick_alloc_object_region_tlab_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          // Save callee saves in case of GC.
+    b      .Ldo_allocation\name
+.Lslow_path\name:
+    SETUP_SAVE_REFS_ONLY_FRAME                 // Save callee saves in case of GC.
     mov    x2, xSELF                           // Pass Thread::Current.
-    bl     artAllocObjectFromCodeRegionTLAB    // (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    bl     \entrypoint                         // (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_alloc_object_region_tlab
+END \name
+.endm
+
+GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_region_tlab, artAllocObjectFromCodeRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH, 0
+GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 1
+GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED, 1
+
+// The common code for art_quick_alloc_array_*region_tlab
+.macro GENERATE_ALLOC_ARRAY_REGION_TLAB name, entrypoint, fast_path, is_resolved
+ENTRY \name
+    // Fast path array allocation for region tlab allocation.
+    // x0: uint32_t type_idx
+    // x1: int32_t component_count
+    // x2: ArtMethod* method
+    // x3-x7: free.
+#if !defined(USE_READ_BARRIER)
+    mvn    x0, xzr                                            // Read barrier must be enabled here.
+    ret                                                       // Return -1.
+#endif
+.if \is_resolved
+    mov    x3, x0
+    // If already resolved, class is stored in x0
+.else
+    ldr    x3, [x2, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
+                                                              // Load the class (x2)
+    ldr    w3, [x3, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+.endif
+    // Most common case: GC is not marking.
+    ldr    w4, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
+    cbnz   x4, .Lmarking\name
+.Ldo_allocation\name:
+    \fast_path .Lslow_path\name, x3, w3, x1, w1, x4, w4, x5, w5, x6, w6
+.Lmarking\name:
+    // GC is marking, check the lock word of the class for the mark bit.
+    // If the class is null, go slow path. The check is required to read the lock word.
+    cbz    w3, .Lslow_path\name
+    // Class is not null, check mark bit in lock word.
+    ldr    w4, [x3, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    // If the bit is not zero, do the allocation.
+    tbnz   w4, #LOCK_WORD_MARK_BIT_SHIFT, .Ldo_allocation\name
+                                                              // The read barrier slow path. Mark
+                                                              // the class.
+    stp    x0, x1, [sp, #-32]!                                // Save registers (x0, x1, x2, lr).
+    stp    x2, xLR, [sp, #16]
+    mov    x0, x3                                             // Pass the class as the first param.
+    bl     artReadBarrierMark
+    mov    x3, x0                                             // Get the (marked) class back.
+    ldp    x2, xLR, [sp, #16]
+    ldp    x0, x1, [sp], #32                                  // Restore registers.
+    b      .Ldo_allocation\name
+.Lslow_path\name:
+    // x0: uint32_t type_idx / mirror::Class* klass (if resolved)
+    // x1: int32_t component_count
+    // x2: ArtMethod* method
+    // x3: Thread* self
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
+    mov    x3, xSELF                  // pass Thread::Current
+    bl     \entrypoint
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+END \name
+.endm
+
+GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_region_tlab, artAllocArrayFromCodeRegionTLAB, ALLOC_ARRAY_TLAB_FAST_PATH, 0
+// TODO: art_quick_alloc_array_resolved_region_tlab seems to not get called. Investigate compiler.
+GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED, 1
 
     /*
      * Called by managed code when the thread has been asked to suspend.
      */
     .extern artTestSuspendFromCode
 ENTRY art_quick_test_suspend
-    ldrh   w0, [xSELF, #THREAD_FLAGS_OFFSET]  // get xSELF->state_and_flags.as_struct.flags
-    cbnz   w0, .Lneed_suspend                 // check flags == 0
-    ret                                       // return if flags == 0
-.Lneed_suspend:
+    SETUP_SAVE_EVERYTHING_FRAME               // save callee saves for stack crawl
     mov    x0, xSELF
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          // save callee saves for stack crawl
     bl     artTestSuspendFromCode             // (Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    RESTORE_SAVE_EVERYTHING_FRAME
+    ret
 END art_quick_test_suspend
 
 ENTRY art_quick_implicit_suspend
     mov    x0, xSELF
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          // save callee saves for stack crawl
+    SETUP_SAVE_REFS_ONLY_FRAME                // save callee saves for stack crawl
     bl     artTestSuspendFromCode             // (Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
 END art_quick_implicit_suspend
 
      /*
@@ -1845,17 +2204,17 @@
      */
      .extern artQuickProxyInvokeHandler
 ENTRY art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_X0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
     mov     x2, xSELF                   // pass Thread::Current
     mov     x3, sp                      // pass SP
     bl      artQuickProxyInvokeHandler  // (Method* proxy method, receiver, Thread*, SP)
     ldr     x2, [xSELF, THREAD_EXCEPTION_OFFSET]
     cbnz    x2, .Lexception_in_proxy    // success if no exception is pending
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME // Restore frame
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME    // Restore frame
     fmov    d0, x0                      // Store result in d0 in case it was float or double
     ret                                 // return on success
 .Lexception_in_proxy:
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_proxy_invoke_handler
 
@@ -1895,17 +2254,17 @@
 END art_quick_imt_conflict_trampoline
 
 ENTRY art_quick_resolution_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     mov x2, xSELF
     mov x3, sp
     bl artQuickResolutionTrampoline  // (called, receiver, Thread*, SP)
     cbz x0, 1f
     mov xIP0, x0            // Remember returned code pointer in xIP0.
     ldr x0, [sp, #0]        // artQuickResolutionTrampoline puts called method in *SP.
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     br xIP0
 1:
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_resolution_trampoline
 
@@ -1965,7 +2324,7 @@
      * Called to do a generic JNI down-call
      */
 ENTRY art_quick_generic_jni_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_X0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
 
     // Save SP , so we can have static CFI info.
     mov x28, sp
@@ -2037,7 +2396,7 @@
     .cfi_def_cfa_register sp
 
     // Tear down the callee-save frame.
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
 
     // store into fpr, for when it's a fpr return...
     fmov d0, x0
@@ -2059,7 +2418,7 @@
  * x1..x7, d0..d7 = arguments to that method.
  */
 ENTRY art_quick_to_interpreter_bridge
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME   // Set up frame and save arguments.
+    SETUP_SAVE_REFS_AND_ARGS_FRAME         // Set up frame and save arguments.
 
     //  x0 will contain mirror::ArtMethod* method.
     mov x1, xSELF                          // How to get Thread::Current() ???
@@ -2069,7 +2428,7 @@
     //                                      mirror::ArtMethod** sp)
     bl   artQuickToInterpreterBridge
 
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // TODO: no need to restore arguments in this case.
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME       // TODO: no need to restore arguments in this case.
 
     fmov d0, x0
 
@@ -2082,7 +2441,7 @@
 //
     .extern artInstrumentationMethodEntryFromCode
 ENTRY art_quick_instrumentation_entry
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
 
     mov   x20, x0             // Preserve method reference in a callee-save.
 
@@ -2093,7 +2452,7 @@
     mov   xIP0, x0            // x0 = result of call.
     mov   x0, x20             // Reload method reference.
 
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // Note: will restore xSELF
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME  // Note: will restore xSELF
     adr   xLR, art_quick_instrumentation_exit
     br    xIP0                // Tail-call method with lr set to art_quick_instrumentation_exit.
 END art_quick_instrumentation_entry
@@ -2102,7 +2461,7 @@
 ENTRY art_quick_instrumentation_exit
     mov   xLR, #0             // Clobber LR for later checks.
 
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
 
     // We need to save x0 and d0. We could use a callee-save from SETUP_REF_ONLY, but then
     // we would need to fully restore it. As there are a lot of callee-save registers, it seems
@@ -2125,7 +2484,7 @@
     ldr   x0, [sp], 16        // Restore integer result, and drop stack area.
     .cfi_adjust_cfa_offset 16
 
-    POP_REFS_ONLY_CALLEE_SAVE_FRAME
+    POP_SAVE_REFS_ONLY_FRAME
 
     br    xIP0                // Tail-call out.
 END art_quick_instrumentation_exit
@@ -2136,7 +2495,7 @@
      */
     .extern artDeoptimize
 ENTRY art_quick_deoptimize
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     mov    x0, xSELF          // Pass thread.
     bl     artDeoptimize      // artDeoptimize(Thread*)
     brk 0
@@ -2148,7 +2507,7 @@
      */
     .extern artDeoptimizeFromCompiledCode
 ENTRY art_quick_deoptimize_from_compiled_code
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     mov    x0, xSELF                      // Pass thread.
     bl     artDeoptimizeFromCompiledCode  // artDeoptimizeFromCompiledCode(Thread*)
     brk 0
@@ -2265,6 +2624,8 @@
      */
 .macro READ_BARRIER_MARK_REG name, wreg, xreg
 ENTRY \name
+    // Reference is null, no work to do at all.
+    cbz \wreg, .Lret_rb_\name
     /*
      * Allocate 46 stack slots * 8 = 368 bytes:
      * - 20 slots for core registers X0-X19
@@ -2272,6 +2633,11 @@
      * -  1 slot for return address register XLR
      * -  1 padding slot for 16-byte stack alignment
      */
+    // Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler.
+    ldr   wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    tbz   wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lslow_path_rb_\name
+    ret
+.Lslow_path_rb_\name:
     // Save all potentially live caller-save core registers.
     stp   x0, x1,   [sp, #-368]!
     .cfi_adjust_cfa_offset 368
@@ -2360,6 +2726,7 @@
     .cfi_restore x30
     add sp, sp, #368
     .cfi_adjust_cfa_offset -368
+.Lret_rb_\name:
     ret
 END \name
 .endm
diff --git a/runtime/arch/arm64/quick_method_frame_info_arm64.h b/runtime/arch/arm64/quick_method_frame_info_arm64.h
index b3d250b..36f283b 100644
--- a/runtime/arch/arm64/quick_method_frame_info_arm64.h
+++ b/runtime/arch/arm64/quick_method_frame_info_arm64.h
@@ -29,7 +29,7 @@
 static constexpr uint32_t kArm64CalleeSaveAlwaysSpills =
     // Note: ArtMethod::GetReturnPcOffsetInBytes() rely on the assumption that
     // LR is always saved on the top of the frame for all targets.
-    // That is, lr = *(sp + framesize - pointsize).
+    // That is, lr = *(sp + framesize - pointer_size).
     (1 << art::arm64::LR);
 // Callee saved registers
 static constexpr uint32_t kArm64CalleeSaveRefSpills =
@@ -44,6 +44,14 @@
     (1 << art::arm64::X7);
 static constexpr uint32_t kArm64CalleeSaveAllSpills =
     (1 << art::arm64::X19);
+static constexpr uint32_t kArm64CalleeSaveEverythingSpills =
+    (1 << art::arm64::X0) | (1 << art::arm64::X1) | (1 << art::arm64::X2) |
+    (1 << art::arm64::X3) | (1 << art::arm64::X4) | (1 << art::arm64::X5) |
+    (1 << art::arm64::X6) | (1 << art::arm64::X7) | (1 << art::arm64::X8) |
+    (1 << art::arm64::X9) | (1 << art::arm64::X10) | (1 << art::arm64::X11) |
+    (1 << art::arm64::X12) | (1 << art::arm64::X13) | (1 << art::arm64::X14) |
+    (1 << art::arm64::X15) | (1 << art::arm64::X16) | (1 << art::arm64::X17) |
+    (1 << art::arm64::X18) | (1 << art::arm64::X19);
 
 static constexpr uint32_t kArm64CalleeSaveFpAlwaysSpills = 0;
 static constexpr uint32_t kArm64CalleeSaveFpRefSpills = 0;
@@ -55,17 +63,31 @@
     (1 << art::arm64::D8)  | (1 << art::arm64::D9)  | (1 << art::arm64::D10) |
     (1 << art::arm64::D11)  | (1 << art::arm64::D12)  | (1 << art::arm64::D13) |
     (1 << art::arm64::D14)  | (1 << art::arm64::D15);
+static constexpr uint32_t kArm64CalleeSaveFpEverythingSpills =
+    (1 << art::arm64::D0) | (1 << art::arm64::D1) | (1 << art::arm64::D2) |
+    (1 << art::arm64::D3) | (1 << art::arm64::D4) | (1 << art::arm64::D5) |
+    (1 << art::arm64::D6) | (1 << art::arm64::D7) | (1 << art::arm64::D8) |
+    (1 << art::arm64::D9) | (1 << art::arm64::D10) | (1 << art::arm64::D11) |
+    (1 << art::arm64::D12) | (1 << art::arm64::D13) | (1 << art::arm64::D14) |
+    (1 << art::arm64::D15) | (1 << art::arm64::D16) | (1 << art::arm64::D17) |
+    (1 << art::arm64::D18) | (1 << art::arm64::D19) | (1 << art::arm64::D20) |
+    (1 << art::arm64::D21) | (1 << art::arm64::D22) | (1 << art::arm64::D23) |
+    (1 << art::arm64::D24) | (1 << art::arm64::D25) | (1 << art::arm64::D26) |
+    (1 << art::arm64::D27) | (1 << art::arm64::D28) | (1 << art::arm64::D29) |
+    (1 << art::arm64::D30) | (1 << art::arm64::D31);
 
 constexpr uint32_t Arm64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
   return kArm64CalleeSaveAlwaysSpills | kArm64CalleeSaveRefSpills |
-      (type == Runtime::kRefsAndArgs ? kArm64CalleeSaveArgSpills : 0) |
-      (type == Runtime::kSaveAll ? kArm64CalleeSaveAllSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kArm64CalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kArm64CalleeSaveAllSpills : 0) |
+      (type == Runtime::kSaveEverything ? kArm64CalleeSaveEverythingSpills : 0);
 }
 
 constexpr uint32_t Arm64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
   return kArm64CalleeSaveFpAlwaysSpills | kArm64CalleeSaveFpRefSpills |
-      (type == Runtime::kRefsAndArgs ? kArm64CalleeSaveFpArgSpills: 0) |
-      (type == Runtime::kSaveAll ? kArm64CalleeSaveFpAllSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kArm64CalleeSaveFpArgSpills: 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kArm64CalleeSaveFpAllSpills : 0) |
+      (type == Runtime::kSaveEverything ? kArm64CalleeSaveFpEverythingSpills : 0);
 }
 
 constexpr uint32_t Arm64CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
diff --git a/runtime/arch/mips/asm_support_mips.h b/runtime/arch/mips/asm_support_mips.h
index 453056d..135b074 100644
--- a/runtime/arch/mips/asm_support_mips.h
+++ b/runtime/arch/mips/asm_support_mips.h
@@ -19,8 +19,9 @@
 
 #include "asm_support.h"
 
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 96
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 48
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 80
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 96
+#define FRAME_SIZE_SAVE_REFS_ONLY 48
+#define FRAME_SIZE_SAVE_REFS_AND_ARGS 80
+#define FRAME_SIZE_SAVE_EVERYTHING 256
 
 #endif  // ART_RUNTIME_ARCH_MIPS_ASM_SUPPORT_MIPS_H_
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index c1b8044..3d393f6 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -30,19 +30,19 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
      * Callee-save: $s0-$s8 + $gp + $ra, 11 total + 1 word for Method*
      * Clobbers $t0 and $sp
      * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
-     * Reserves FRAME_SIZE_SAVE_ALL_CALLEE_SAVE + ARG_SLOT_SIZE bytes on the stack
+     * Reserves FRAME_SIZE_SAVE_ALL_CALLEE_SAVES + ARG_SLOT_SIZE bytes on the stack
      */
-.macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     addiu  $sp, $sp, -96
     .cfi_adjust_cfa_offset 96
 
      // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 96)
-#error "SAVE_ALL_CALLEE_SAVE_FRAME(MIPS) size not as expected."
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 96)
+#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(MIPS) size not as expected."
 #endif
 
     sw     $ra, 92($sp)
@@ -79,7 +79,7 @@
 
     lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
     lw $t0, 0($t0)
-    lw $t0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($t0)
+    lw $t0, RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
     addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
@@ -88,20 +88,20 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly). Restoration assumes non-moving GC.
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly). Restoration assumes non-moving GC.
      * Does not include rSUSPEND or rSELF
      * callee-save: $s2-$s8 + $gp + $ra, 9 total + 2 words padding + 1 word to hold Method*
      * Clobbers $t0 and $sp
      * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
-     * Reserves FRAME_SIZE_REFS_ONLY_CALLEE_SAVE + ARG_SLOT_SIZE bytes on the stack
+     * Reserves FRAME_SIZE_SAVE_REFS_ONLY + ARG_SLOT_SIZE bytes on the stack
      */
-.macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro SETUP_SAVE_REFS_ONLY_FRAME
     addiu  $sp, $sp, -48
     .cfi_adjust_cfa_offset 48
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 48)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(MIPS) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 48)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(MIPS) size not as expected."
 #endif
 
     sw     $ra, 44($sp)
@@ -126,14 +126,14 @@
 
     lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
     lw $t0, 0($t0)
-    lw $t0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($t0)
+    lw $t0, RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
     addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
     .cfi_adjust_cfa_offset ARG_SLOT_SIZE
 .endm
 
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_ONLY_FRAME
     addiu  $sp, $sp, ARG_SLOT_SIZE                # remove argument slots on the stack
     .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
     lw     $ra, 44($sp)
@@ -158,24 +158,24 @@
     .cfi_adjust_cfa_offset -48
 .endm
 
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
+    RESTORE_SAVE_REFS_ONLY_FRAME
     jalr   $zero, $ra
     nop
 .endm
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs).
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
      * callee-save: $a1-$a3, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
      */
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
     addiu  $sp, $sp, -80
     .cfi_adjust_cfa_offset 80
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 80)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(MIPS) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 80)
+#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(MIPS) size not as expected."
 #endif
 
     sw     $ra, 76($sp)
@@ -209,17 +209,17 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes non-moving GC.
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). Restoration assumes non-moving GC.
      * callee-save: $a1-$a3, $f12-$f15, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
      * Clobbers $t0 and $sp
      * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
-     * Reserves FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE + ARG_SLOT_SIZE bytes on the stack
+     * Reserves FRAME_SIZE_SAVE_REFS_AND_ARGS + ARG_SLOT_SIZE bytes on the stack
      */
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
     lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
     lw $t0, 0($t0)
-    lw $t0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($t0)
+    lw $t0, RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
     addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
@@ -228,22 +228,22 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes non-moving GC.
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). Restoration assumes non-moving GC.
      * callee-save: $a1-$a3, $f12-$f15, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
      * Clobbers $sp
      * Use $a0 as the Method* and loads it into bottom of stack.
      * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
-     * Reserves FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE + ARG_SLOT_SIZE bytes on the stack
+     * Reserves FRAME_SIZE_SAVE_REFS_AND_ARGS + ARG_SLOT_SIZE bytes on the stack
      */
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
     sw $a0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
     addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
     .cfi_adjust_cfa_offset ARG_SLOT_SIZE
 .endm
 
-.macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
     addiu  $sp, $sp, ARG_SLOT_SIZE                # remove argument slots on the stack
     .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
     lw     $ra, 76($sp)
@@ -277,11 +277,208 @@
 .endm
 
     /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything).
+     * Callee-save: $at, $v0-$v1, $a0-$a3, $t0-$t7, $s0-$s7, $t8-$t9, $gp, $fp $ra, $f0-$f31;
+     *              28(GPR)+ 32(FPR) + 3 words for padding and 1 word for Method*
+     * Clobbers $t0 and $t1.
+     * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
+     * Reserves FRAME_SIZE_SAVE_EVERYTHING + ARG_SLOT_SIZE bytes on the stack.
+     * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME
+    addiu  $sp, $sp, -256
+    .cfi_adjust_cfa_offset 256
+
+     // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_SAVE_EVERYTHING != 256)
+#error "FRAME_SIZE_SAVE_EVERYTHING(MIPS) size not as expected."
+#endif
+
+    sw     $ra, 252($sp)
+    .cfi_rel_offset 31, 252
+    sw     $fp, 248($sp)
+    .cfi_rel_offset 30, 248
+    sw     $gp, 244($sp)
+    .cfi_rel_offset 28, 244
+    sw     $t9, 240($sp)
+    .cfi_rel_offset 25, 240
+    sw     $t8, 236($sp)
+    .cfi_rel_offset 24, 236
+    sw     $s7, 232($sp)
+    .cfi_rel_offset 23, 232
+    sw     $s6, 228($sp)
+    .cfi_rel_offset 22, 228
+    sw     $s5, 224($sp)
+    .cfi_rel_offset 21, 224
+    sw     $s4, 220($sp)
+    .cfi_rel_offset 20, 220
+    sw     $s3, 216($sp)
+    .cfi_rel_offset 19, 216
+    sw     $s2, 212($sp)
+    .cfi_rel_offset 18, 212
+    sw     $s1, 208($sp)
+    .cfi_rel_offset 17, 208
+    sw     $s0, 204($sp)
+    .cfi_rel_offset 16, 204
+    sw     $t7, 200($sp)
+    .cfi_rel_offset 15, 200
+    sw     $t6, 196($sp)
+    .cfi_rel_offset 14, 196
+    sw     $t5, 192($sp)
+    .cfi_rel_offset 13, 192
+    sw     $t4, 188($sp)
+    .cfi_rel_offset 12, 188
+    sw     $t3, 184($sp)
+    .cfi_rel_offset 11, 184
+    sw     $t2, 180($sp)
+    .cfi_rel_offset 10, 180
+    sw     $t1, 176($sp)
+    .cfi_rel_offset 9, 176
+    sw     $t0, 172($sp)
+    .cfi_rel_offset 8, 172
+    sw     $a3, 168($sp)
+    .cfi_rel_offset 7, 168
+    sw     $a2, 164($sp)
+    .cfi_rel_offset 6, 164
+    sw     $a1, 160($sp)
+    .cfi_rel_offset 5, 160
+    sw     $a0, 156($sp)
+    .cfi_rel_offset 4, 156
+    sw     $v1, 152($sp)
+    .cfi_rel_offset 3, 152
+    sw     $v0, 148($sp)
+    .cfi_rel_offset 2, 148
+
+    // Set up $gp, clobbering $ra and using the branch delay slot for a useful instruction.
+    bal 1f
+    .set push
+    .set noat
+    sw     $at, 144($sp)
+    .cfi_rel_offset 1, 144
+    .set pop
+1:
+    .cpload $ra
+
+    SDu $f30, $f31, 136, $sp, $t1
+    SDu $f28, $f29, 128, $sp, $t1
+    SDu $f26, $f27, 120, $sp, $t1
+    SDu $f24, $f25, 112, $sp, $t1
+    SDu $f22, $f23, 104, $sp, $t1
+    SDu $f20, $f21, 96,  $sp, $t1
+    SDu $f18, $f19, 88,  $sp, $t1
+    SDu $f16, $f17, 80,  $sp, $t1
+    SDu $f14, $f15, 72,  $sp, $t1
+    SDu $f12, $f13, 64,  $sp, $t1
+    SDu $f10, $f11, 56,  $sp, $t1
+    SDu $f8, $f9, 48,  $sp, $t1
+    SDu $f6, $f7, 40,  $sp, $t1
+    SDu $f4, $f5, 32,  $sp, $t1
+    SDu $f2, $f3, 24,  $sp, $t1
+    SDu $f0, $f1, 16,  $sp, $t1
+
+    # 3 words padding and 1 word for holding Method*
+
+    lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
+    lw $t0, 0($t0)
+    lw $t0, RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET($t0)
+    sw $t0, 0($sp)                                # Place Method* at bottom of stack.
+    sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
+    addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
+    .cfi_adjust_cfa_offset ARG_SLOT_SIZE
+.endm
+
+.macro RESTORE_SAVE_EVERYTHING_FRAME
+    addiu  $sp, $sp, ARG_SLOT_SIZE                # remove argument slots on the stack
+    .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
+
+    LDu $f30, $f31, 136, $sp, $t1
+    LDu $f28, $f29, 128, $sp, $t1
+    LDu $f26, $f27, 120, $sp, $t1
+    LDu $f24, $f25, 112, $sp, $t1
+    LDu $f22, $f23, 104, $sp, $t1
+    LDu $f20, $f21, 96,  $sp, $t1
+    LDu $f18, $f19, 88,  $sp, $t1
+    LDu $f16, $f17, 80,  $sp, $t1
+    LDu $f14, $f15, 72,  $sp, $t1
+    LDu $f12, $f13, 64,  $sp, $t1
+    LDu $f10, $f11, 56,  $sp, $t1
+    LDu $f8, $f9, 48,  $sp, $t1
+    LDu $f6, $f7, 40,  $sp, $t1
+    LDu $f4, $f5, 32,  $sp, $t1
+    LDu $f2, $f3, 24,  $sp, $t1
+    LDu $f0, $f1, 16,  $sp, $t1
+
+    lw     $ra, 252($sp)
+    .cfi_restore 31
+    lw     $fp, 248($sp)
+    .cfi_restore 30
+    lw     $gp, 244($sp)
+    .cfi_restore 28
+    lw     $t9, 240($sp)
+    .cfi_restore 25
+    lw     $t8, 236($sp)
+    .cfi_restore 24
+    lw     $s7, 232($sp)
+    .cfi_restore 23
+    lw     $s6, 228($sp)
+    .cfi_restore 22
+    lw     $s5, 224($sp)
+    .cfi_restore 21
+    lw     $s4, 220($sp)
+    .cfi_restore 20
+    lw     $s3, 216($sp)
+    .cfi_restore 19
+    lw     $s2, 212($sp)
+    .cfi_restore 18
+    lw     $s1, 208($sp)
+    .cfi_restore 17
+    lw     $s0, 204($sp)
+    .cfi_restore 16
+    lw     $t7, 200($sp)
+    .cfi_restore 15
+    lw     $t6, 196($sp)
+    .cfi_restore 14
+    lw     $t5, 192($sp)
+    .cfi_restore 13
+    lw     $t4, 188($sp)
+    .cfi_restore 12
+    lw     $t3, 184($sp)
+    .cfi_restore 11
+    lw     $t2, 180($sp)
+    .cfi_restore 10
+    lw     $t1, 176($sp)
+    .cfi_restore 9
+    lw     $t0, 172($sp)
+    .cfi_restore 8
+    lw     $a3, 168($sp)
+    .cfi_restore 7
+    lw     $a2, 164($sp)
+    .cfi_restore 6
+    lw     $a1, 160($sp)
+    .cfi_restore 5
+    lw     $a0, 156($sp)
+    .cfi_restore 4
+    lw     $v1, 152($sp)
+    .cfi_restore 3
+    lw     $v0, 148($sp)
+    .cfi_restore 2
+    .set push
+    .set noat
+    lw     $at, 144($sp)
+    .cfi_restore 1
+    .set pop
+
+    addiu  $sp, $sp, 256            # pop frame
+    .cfi_adjust_cfa_offset -256
+.endm
+
+    /*
      * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
      * exception is Thread::Current()->exception_
      */
 .macro DELIVER_PENDING_EXCEPTION
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME     # save callee saves for throw
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME    # save callee saves for throw
     la      $t9, artDeliverPendingExceptionFromCode
     jalr    $zero, $t9                   # artDeliverPendingExceptionFromCode(Thread*)
     move    $a0, rSELF                   # pass Thread::Current
@@ -289,7 +486,7 @@
 
 .macro RETURN_IF_NO_EXCEPTION
     lw     $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     bnez   $t0, 1f                       # success if no exception is pending
     nop
     jalr   $zero, $ra
@@ -299,7 +496,7 @@
 .endm
 
 .macro RETURN_IF_ZERO
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     bnez   $v0, 1f                       # success?
     nop
     jalr   $zero, $ra                    # return on success
@@ -309,7 +506,7 @@
 .endm
 
 .macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     beqz   $v0, 1f                       # success?
     nop
     jalr   $zero, $ra                    # return on success
@@ -489,7 +686,7 @@
      * the bottom of the thread. On entry a0 holds Throwable*
      */
 ENTRY art_quick_deliver_exception
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artDeliverExceptionFromCode
     jalr $zero, $t9                 # artDeliverExceptionFromCode(Throwable*, Thread*)
     move $a1, rSELF                 # pass Thread::Current
@@ -500,7 +697,7 @@
      */
     .extern artThrowNullPointerExceptionFromCode
 ENTRY art_quick_throw_null_pointer_exception
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowNullPointerExceptionFromCode
     jalr $zero, $t9                 # artThrowNullPointerExceptionFromCode(Thread*)
     move $a0, rSELF                 # pass Thread::Current
@@ -512,7 +709,7 @@
      */
     .extern artThrowNullPointerExceptionFromSignal
 ENTRY art_quick_throw_null_pointer_exception_from_signal
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowNullPointerExceptionFromSignal
     jalr $zero, $t9                 # artThrowNullPointerExceptionFromSignal(uintptr_t, Thread*)
     move $a1, rSELF                 # pass Thread::Current
@@ -523,7 +720,7 @@
      */
     .extern artThrowDivZeroFromCode
 ENTRY art_quick_throw_div_zero
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowDivZeroFromCode
     jalr $zero, $t9                 # artThrowDivZeroFromCode(Thread*)
     move $a0, rSELF                 # pass Thread::Current
@@ -534,7 +731,7 @@
      */
     .extern artThrowArrayBoundsFromCode
 ENTRY art_quick_throw_array_bounds
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowArrayBoundsFromCode
     jalr $zero, $t9                 # artThrowArrayBoundsFromCode(index, limit, Thread*)
     move $a2, rSELF                 # pass Thread::Current
@@ -546,7 +743,7 @@
      */
     .extern artThrowStringBoundsFromCode
 ENTRY art_quick_throw_string_bounds
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowStringBoundsFromCode
     jalr $zero, $t9                 # artThrowStringBoundsFromCode(index, limit, Thread*)
     move $a2, rSELF                 # pass Thread::Current
@@ -557,7 +754,7 @@
      */
     .extern artThrowStackOverflowFromCode
 ENTRY art_quick_throw_stack_overflow
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowStackOverflowFromCode
     jalr $zero, $t9                 # artThrowStackOverflowFromCode(Thread*)
     move $a0, rSELF                 # pass Thread::Current
@@ -568,7 +765,7 @@
      */
     .extern artThrowNoSuchMethodFromCode
 ENTRY art_quick_throw_no_such_method
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowNoSuchMethodFromCode
     jalr $zero, $t9                 # artThrowNoSuchMethodFromCode(method_idx, Thread*)
     move $a1, rSELF                 # pass Thread::Current
@@ -591,13 +788,13 @@
      */
 .macro INVOKE_TRAMPOLINE_BODY cxx_name
     .extern \cxx_name
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  # save callee saves in case allocation triggers GC
+    SETUP_SAVE_REFS_AND_ARGS_FRAME         # save callee saves in case allocation triggers GC
     move  $a2, rSELF                       # pass Thread::Current
     la    $t9, \cxx_name
     jalr  $t9                              # (method_idx, this, Thread*, $sp)
     addiu $a3, $sp, ARG_SLOT_SIZE          # pass $sp (remove arg slots)
     move  $a0, $v0                         # save target Method*
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     beqz  $v0, 1f
     move  $t9, $v1                         # save $v0->code_
     jalr  $zero, $t9
@@ -908,11 +1105,11 @@
      */
     .extern artHandleFillArrayDataFromCode
 ENTRY art_quick_handle_fill_data
-    lw     $a2, 0($sp)                    # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case exception allocation triggers GC
+    lw     $a2, 0($sp)                # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case exception allocation triggers GC
     la     $t9, artHandleFillArrayDataFromCode
-    jalr   $t9                            # (payload offset, Array*, method, Thread*)
-    move   $a3, rSELF                     # pass Thread::Current
+    jalr   $t9                        # (payload offset, Array*, method, Thread*)
+    move   $a3, rSELF                 # pass Thread::Current
     RETURN_IF_ZERO
 END art_quick_handle_fill_data
 
@@ -923,7 +1120,7 @@
 ENTRY art_quick_lock_object
     beqz    $a0, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case we block
+    SETUP_SAVE_REFS_ONLY_FRAME            # save callee saves in case we block
     la      $t9, artLockObjectFromCode
     jalr    $t9                           # (Object* obj, Thread*)
     move    $a1, rSELF                    # pass Thread::Current
@@ -933,7 +1130,7 @@
 ENTRY art_quick_lock_object_no_inline
     beqz    $a0, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case we block
+    SETUP_SAVE_REFS_ONLY_FRAME            # save callee saves in case we block
     la      $t9, artLockObjectFromCode
     jalr    $t9                           # (Object* obj, Thread*)
     move    $a1, rSELF                    # pass Thread::Current
@@ -947,7 +1144,7 @@
 ENTRY art_quick_unlock_object
     beqz    $a0, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case exception allocation triggers GC
+    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case exception allocation triggers GC
     la      $t9, artUnlockObjectFromCode
     jalr    $t9                       # (Object* obj, Thread*)
     move    $a1, rSELF                # pass Thread::Current
@@ -957,7 +1154,7 @@
 ENTRY art_quick_unlock_object_no_inline
     beqz    $a0, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case exception allocation triggers GC
+    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case exception allocation triggers GC
     la      $t9, artUnlockObjectFromCode
     jalr    $t9                       # (Object* obj, Thread*)
     move    $a1, rSELF                # pass Thread::Current
@@ -993,7 +1190,7 @@
     lw     $a0, 0($sp)
     addiu  $sp, $sp, 32
     .cfi_adjust_cfa_offset -32
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowClassCastException
     jalr $zero, $t9                 # artThrowClassCastException (Class*, Class*, Thread*)
     move $a2, rSELF                 # pass Thread::Current
@@ -1146,7 +1343,7 @@
     .cfi_adjust_cfa_offset -32
     bnez   $v0, .Ldo_aput
     nop
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     move $a1, $a2
     la   $t9, artThrowArrayStoreException
     jalr $zero, $t9                 # artThrowArrayStoreException(Class*, Class*, Thread*)
@@ -1159,7 +1356,7 @@
     .extern artGetBooleanStaticFromCode
 ENTRY art_quick_get_boolean_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGetBooleanStaticFromCode
     jalr   $t9                           # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
@@ -1171,7 +1368,7 @@
     .extern artGetByteStaticFromCode
 ENTRY art_quick_get_byte_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGetByteStaticFromCode
     jalr   $t9                           # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
@@ -1184,7 +1381,7 @@
     .extern artGetCharStaticFromCode
 ENTRY art_quick_get_char_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGetCharStaticFromCode
     jalr   $t9                           # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
@@ -1196,7 +1393,7 @@
     .extern artGetShortStaticFromCode
 ENTRY art_quick_get_short_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGetShortStaticFromCode
     jalr   $t9                           # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
@@ -1209,7 +1406,7 @@
     .extern artGet32StaticFromCode
 ENTRY art_quick_get32_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGet32StaticFromCode
     jalr   $t9                           # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
@@ -1222,7 +1419,7 @@
     .extern artGet64StaticFromCode
 ENTRY art_quick_get64_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGet64StaticFromCode
     jalr   $t9                           # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
@@ -1235,7 +1432,7 @@
     .extern artGetObjStaticFromCode
 ENTRY art_quick_get_obj_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGetObjStaticFromCode
     jalr   $t9                           # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
@@ -1248,7 +1445,7 @@
     .extern artGetBooleanInstanceFromCode
 ENTRY art_quick_get_boolean_instance
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGetBooleanInstanceFromCode
     jalr   $t9                           # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1260,7 +1457,7 @@
     .extern artGetByteInstanceFromCode
 ENTRY art_quick_get_byte_instance
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGetByteInstanceFromCode
     jalr   $t9                           # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1273,7 +1470,7 @@
     .extern artGetCharInstanceFromCode
 ENTRY art_quick_get_char_instance
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGetCharInstanceFromCode
     jalr   $t9                           # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1285,7 +1482,7 @@
     .extern artGetShortInstanceFromCode
 ENTRY art_quick_get_short_instance
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGetShortInstanceFromCode
     jalr   $t9                           # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1298,7 +1495,7 @@
     .extern artGet32InstanceFromCode
 ENTRY art_quick_get32_instance
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGet32InstanceFromCode
     jalr   $t9                           # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1311,7 +1508,7 @@
     .extern artGet64InstanceFromCode
 ENTRY art_quick_get64_instance
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGet64InstanceFromCode
     jalr   $t9                           # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1324,7 +1521,7 @@
     .extern artGetObjInstanceFromCode
 ENTRY art_quick_get_obj_instance
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGetObjInstanceFromCode
     jalr   $t9                           # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1337,7 +1534,7 @@
     .extern artSet8StaticFromCode
 ENTRY art_quick_set8_static
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artSet8StaticFromCode
     jalr   $t9                           # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1350,7 +1547,7 @@
     .extern artSet16StaticFromCode
 ENTRY art_quick_set16_static
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artSet16StaticFromCode
     jalr   $t9                           # (field_idx, new_val, referrer, Thread*, $sp)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1363,7 +1560,7 @@
     .extern artSet32StaticFromCode
 ENTRY art_quick_set32_static
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artSet32StaticFromCode
     jalr   $t9                           # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1377,7 +1574,7 @@
 ENTRY art_quick_set64_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
                                          # 64 bit new_val is in a2:a3 pair
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artSet64StaticFromCode
     jalr   $t9                           # (field_idx, referrer, new_val, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
@@ -1390,7 +1587,7 @@
     .extern artSetObjStaticFromCode
 ENTRY art_quick_set_obj_static
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artSetObjStaticFromCode
     jalr   $t9                           # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1403,7 +1600,7 @@
     .extern artSet8InstanceFromCode
 ENTRY art_quick_set8_instance
     lw     $a3, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artSet8InstanceFromCode
     jalr   $t9                           # (field_idx, Object*, new_val, referrer, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
@@ -1416,7 +1613,7 @@
     .extern artSet16InstanceFromCode
 ENTRY art_quick_set16_instance
     lw     $a3, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artSet16InstanceFromCode
     jalr   $t9                           # (field_idx, Object*, new_val, referrer, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
@@ -1429,7 +1626,7 @@
     .extern artSet32InstanceFromCode
 ENTRY art_quick_set32_instance
     lw     $a3, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artSet32InstanceFromCode
     jalr   $t9                           # (field_idx, Object*, new_val, referrer, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
@@ -1443,7 +1640,7 @@
 ENTRY art_quick_set64_instance
     lw     $t1, 0($sp)                   # load referrer's Method*
                                          # 64 bit new_val is in a2:a3 pair
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     sw     rSELF, 20($sp)                # pass Thread::Current
     la     $t9, artSet64InstanceFromCode
     jalr   $t9                           # (field_idx, Object*, new_val, referrer, Thread*)
@@ -1457,7 +1654,7 @@
     .extern artSetObjInstanceFromCode
 ENTRY art_quick_set_obj_instance
     lw     $a3, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artSetObjInstanceFromCode
     jalr   $t9                           # (field_idx, Object*, new_val, referrer, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
@@ -1468,7 +1665,7 @@
 .macro ONE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case of GC
     la      $t9, \entrypoint
     jalr    $t9
     move    $a1, rSELF                # pass Thread::Current
@@ -1479,7 +1676,7 @@
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case of GC
     la      $t9, \entrypoint
     jalr    $t9
     move    $a2, rSELF                # pass Thread::Current
@@ -1490,7 +1687,7 @@
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case of GC
     la      $t9, \entrypoint
     jalr    $t9
     move    $a3, rSELF                # pass Thread::Current
@@ -1501,7 +1698,7 @@
 .macro FOUR_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case of GC
     la      $t9, \entrypoint
     jalr    $t9
     sw      rSELF, 16($sp)            # pass Thread::Current
@@ -1612,7 +1809,7 @@
 
   .Lart_quick_alloc_object_rosalloc_slow_path:
 
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
     la    $t9, artAllocObjectFromCodeRosAlloc
     jalr  $t9
     move  $a2, $s1                                                # Pass self as argument.
@@ -1652,18 +1849,20 @@
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
      */
     .extern artTestSuspendFromCode
-ENTRY art_quick_test_suspend
-    lh     $a0, THREAD_FLAGS_OFFSET(rSELF)
-    bnez   $a0, 1f
+ENTRY_NO_GP art_quick_test_suspend
+    lh     rSUSPEND, THREAD_FLAGS_OFFSET(rSELF)
+    bnez   rSUSPEND, 1f
     addiu  rSUSPEND, $zero, SUSPEND_CHECK_INTERVAL   # reset rSUSPEND to SUSPEND_CHECK_INTERVAL
     jalr   $zero, $ra
     nop
 1:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          # save callee saves for stack crawl
+    SETUP_SAVE_EVERYTHING_FRAME                      # save everything for stack crawl
     la     $t9, artTestSuspendFromCode
-    jalr   $t9                                 # (Thread*)
+    jalr   $t9                                       # (Thread*)
     move   $a0, rSELF
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    RESTORE_SAVE_EVERYTHING_FRAME
+    jalr   $zero, $ra
+    nop
 END art_quick_test_suspend
 
     /*
@@ -1672,13 +1871,13 @@
      */
     .extern artQuickProxyInvokeHandler
 ENTRY art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
     move    $a2, rSELF                  # pass Thread::Current
     la      $t9, artQuickProxyInvokeHandler
     jalr    $t9                         # (Method* proxy method, receiver, Thread*, SP)
     addiu   $a3, $sp, ARG_SLOT_SIZE     # pass $sp (remove arg slots)
     lw      $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     bnez    $t0, 1f
     # don't care if $v0 and/or $v1 are modified, when exception branch taken
     MTD     $v0, $v1, $f0, $f1          # move float value to return value
@@ -1729,26 +1928,26 @@
 
     .extern artQuickResolutionTrampoline
 ENTRY art_quick_resolution_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     move    $a2, rSELF                    # pass Thread::Current
     la      $t9, artQuickResolutionTrampoline
     jalr    $t9                           # (Method* called, receiver, Thread*, SP)
     addiu   $a3, $sp, ARG_SLOT_SIZE       # pass $sp (remove arg slots)
     beqz    $v0, 1f
     lw      $a0, ARG_SLOT_SIZE($sp)       # load resolved method to $a0
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     move    $t9, $v0               # code pointer must be in $t9 to generate the global pointer
     jalr    $zero, $t9             # tail call to method
     nop
 1:
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_resolution_trampoline
 
     .extern artQuickGenericJniTrampoline
     .extern artQuickGenericJniEndTrampoline
 ENTRY art_quick_generic_jni_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
     move    $s8, $sp               # save $sp to $s8
     move    $s3, $gp               # save $gp to $s3
 
@@ -1795,7 +1994,7 @@
     move    $sp, $s8               # tear down the alloca
 
     # tear dpown the callee-save frame
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
 
     MTD     $v0, $v1, $f0, $f1     # move float value to return value
     jalr    $zero, $ra
@@ -1809,13 +2008,13 @@
 
     .extern artQuickToInterpreterBridge
 ENTRY art_quick_to_interpreter_bridge
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     move    $a1, rSELF                          # pass Thread::Current
     la      $t9, artQuickToInterpreterBridge
     jalr    $t9                                 # (Method* method, Thread*, SP)
     addiu   $a2, $sp, ARG_SLOT_SIZE             # pass $sp (remove arg slots)
     lw      $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     bnez    $t0, 1f
     # don't care if $v0 and/or $v1 are modified, when exception branch taken
     MTD     $v0, $v1, $f0, $f1                  # move float value to return value
@@ -1831,7 +2030,7 @@
     .extern artInstrumentationMethodEntryFromCode
     .extern artInstrumentationMethodExitFromCode
 ENTRY art_quick_instrumentation_entry
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     sw       $a0, 28($sp)   # save arg0 in free arg slot
     move     $a3, $ra       # pass $ra
     la       $t9, artInstrumentationMethodEntryFromCode
@@ -1839,7 +2038,7 @@
     move     $a2, rSELF     # pass Thread::Current
     move     $t9, $v0       # $t9 holds reference to code
     lw       $a0, 28($sp)   # restore arg0 from free arg slot
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     jalr     $t9            # call method
     nop
 END art_quick_instrumentation_entry
@@ -1851,7 +2050,7 @@
     .cpload  $t9
     move     $ra, $zero     # link register is to here, so clobber with 0 for later checks
 
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
     addiu    $sp, $sp, -16  # allocate temp storage on the stack
     .cfi_adjust_cfa_offset 16
     sw       $v0, ARG_SLOT_SIZE+12($sp)
@@ -1872,8 +2071,8 @@
     lw       $v1, ARG_SLOT_SIZE+8($sp)
     l.d      $f0, ARG_SLOT_SIZE($sp)
     jalr     $zero, $t9     # return
-    addiu    $sp, $sp, ARG_SLOT_SIZE+FRAME_SIZE_REFS_ONLY_CALLEE_SAVE+16  # restore stack
-    .cfi_adjust_cfa_offset -(ARG_SLOT_SIZE+FRAME_SIZE_REFS_ONLY_CALLEE_SAVE+16)
+    addiu    $sp, $sp, ARG_SLOT_SIZE+FRAME_SIZE_SAVE_REFS_ONLY+16  # restore stack
+    .cfi_adjust_cfa_offset -(ARG_SLOT_SIZE+FRAME_SIZE_SAVE_REFS_ONLY+16)
 END art_quick_instrumentation_exit
 
     /*
@@ -1882,7 +2081,7 @@
      */
     .extern artDeoptimize
 ENTRY art_quick_deoptimize
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la       $t9, artDeoptimize
     jalr     $t9            # artDeoptimize(Thread*)
                             # Returns caller method's frame size.
@@ -1895,7 +2094,7 @@
      */
     .extern artDeoptimizeFromCompiledCode
 ENTRY art_quick_deoptimize_from_compiled_code
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la       $t9, artDeoptimizeFromCompiledCode
     jalr     $t9                            # artDeoptimizeFromCompiledCode(Thread*)
                                             # Returns caller method's frame size.
diff --git a/runtime/arch/mips/quick_method_frame_info_mips.h b/runtime/arch/mips/quick_method_frame_info_mips.h
index 7b0623b..90e7b20 100644
--- a/runtime/arch/mips/quick_method_frame_info_mips.h
+++ b/runtime/arch/mips/quick_method_frame_info_mips.h
@@ -34,6 +34,12 @@
     (1 << art::mips::A1) | (1 << art::mips::A2) | (1 << art::mips::A3);
 static constexpr uint32_t kMipsCalleeSaveAllSpills =
     (1 << art::mips::S0) | (1 << art::mips::S1);
+static constexpr uint32_t kMipsCalleeSaveEverythingSpills =
+    (1 << art::mips::AT) | (1 << art::mips::V0) | (1 << art::mips::V1) |
+    (1 << art::mips::A0) | (1 << art::mips::A1) | (1 << art::mips::A2) | (1 << art::mips::A3) |
+    (1 << art::mips::T0) | (1 << art::mips::T1) | (1 << art::mips::T2) | (1 << art::mips::T3) |
+    (1 << art::mips::T4) | (1 << art::mips::T5) | (1 << art::mips::T6) | (1 << art::mips::T7) |
+    (1 << art::mips::S0) | (1 << art::mips::S1) | (1 << art::mips::T8) | (1 << art::mips::T9);
 
 static constexpr uint32_t kMipsCalleeSaveFpAlwaysSpills = 0;
 static constexpr uint32_t kMipsCalleeSaveFpRefSpills = 0;
@@ -43,17 +49,28 @@
     (1 << art::mips::F20) | (1 << art::mips::F21) | (1 << art::mips::F22) | (1 << art::mips::F23) |
     (1 << art::mips::F24) | (1 << art::mips::F25) | (1 << art::mips::F26) | (1 << art::mips::F27) |
     (1 << art::mips::F28) | (1 << art::mips::F29) | (1 << art::mips::F30) | (1 << art::mips::F31);
+static constexpr uint32_t kMipsCalleeSaveFpEverythingSpills =
+    (1 << art::mips::F0) | (1 << art::mips::F1) | (1 << art::mips::F2) | (1 << art::mips::F3) |
+    (1 << art::mips::F4) | (1 << art::mips::F5) | (1 << art::mips::F6) | (1 << art::mips::F7) |
+    (1 << art::mips::F8) | (1 << art::mips::F9) | (1 << art::mips::F10) | (1 << art::mips::F11) |
+    (1 << art::mips::F12) | (1 << art::mips::F13) | (1 << art::mips::F14) | (1 << art::mips::F15) |
+    (1 << art::mips::F16) | (1 << art::mips::F17) | (1 << art::mips::F18) | (1 << art::mips::F19) |
+    (1 << art::mips::F20) | (1 << art::mips::F21) | (1 << art::mips::F22) | (1 << art::mips::F23) |
+    (1 << art::mips::F24) | (1 << art::mips::F25) | (1 << art::mips::F26) | (1 << art::mips::F27) |
+    (1 << art::mips::F28) | (1 << art::mips::F29) | (1 << art::mips::F30) | (1 << art::mips::F31);
 
 constexpr uint32_t MipsCalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
   return kMipsCalleeSaveAlwaysSpills | kMipsCalleeSaveRefSpills |
-      (type == Runtime::kRefsAndArgs ? kMipsCalleeSaveArgSpills : 0) |
-      (type == Runtime::kSaveAll ? kMipsCalleeSaveAllSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kMipsCalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kMipsCalleeSaveAllSpills : 0) |
+      (type == Runtime::kSaveEverything ? kMipsCalleeSaveEverythingSpills : 0);
 }
 
 constexpr uint32_t MipsCalleeSaveFPSpills(Runtime::CalleeSaveType type) {
   return kMipsCalleeSaveFpAlwaysSpills | kMipsCalleeSaveFpRefSpills |
-      (type == Runtime::kRefsAndArgs ? kMipsCalleeSaveFpArgSpills : 0) |
-      (type == Runtime::kSaveAll ? kMipsCalleeSaveAllFPSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kMipsCalleeSaveFpArgSpills : 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kMipsCalleeSaveAllFPSpills : 0) |
+      (type == Runtime::kSaveEverything ? kMipsCalleeSaveFpEverythingSpills : 0);
 }
 
 constexpr uint32_t MipsCalleeSaveFrameSize(Runtime::CalleeSaveType type) {
diff --git a/runtime/arch/mips/thread_mips.cc b/runtime/arch/mips/thread_mips.cc
index 06d6211..0a9ab7a 100644
--- a/runtime/arch/mips/thread_mips.cc
+++ b/runtime/arch/mips/thread_mips.cc
@@ -25,7 +25,7 @@
 void Thread::InitCpu() {
   CHECK_EQ(THREAD_FLAGS_OFFSET, ThreadFlagsOffset<PointerSize::k32>().Int32Value());
   CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<PointerSize::k32>().Int32Value());
-  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<PointerSize::k64>().Int32Value());
+  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<PointerSize::k32>().Int32Value());
 }
 
 void Thread::CleanupCpu() {
diff --git a/runtime/arch/mips64/asm_support_mips64.h b/runtime/arch/mips64/asm_support_mips64.h
index 995fcf3..9063d20 100644
--- a/runtime/arch/mips64/asm_support_mips64.h
+++ b/runtime/arch/mips64/asm_support_mips64.h
@@ -20,10 +20,12 @@
 #include "asm_support.h"
 
 // 64 ($f24-$f31) + 64 ($s0-$s7) + 8 ($gp) + 8 ($s8) + 8 ($ra) + 1x8 bytes padding
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 160
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 160
 // 48 ($s2-$s7) + 8 ($gp) + 8 ($s8) + 8 ($ra) + 1x8 bytes padding
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 80
+#define FRAME_SIZE_SAVE_REFS_ONLY 80
 // $f12-$f19, $a1-$a7, $s2-$s7 + $gp + $s8 + $ra, 16 total + 1x8 bytes padding + method*
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 208
+#define FRAME_SIZE_SAVE_REFS_AND_ARGS 208
+// $f0-$f31, $at, $v0-$v1, $a0-$a7, $t0-$t3, $s0-$s7, $t8-$t9, $gp, $s8, $ra + padding + method*
+#define FRAME_SIZE_SAVE_EVERYTHING 496
 
 #endif  // ART_RUNTIME_ARCH_MIPS64_ASM_SUPPORT_MIPS64_H_
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index ae69620..9774eb9 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -41,16 +41,16 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
      * callee-save: padding + $f24-$f31 + $s0-$s7 + $gp + $ra + $s8 = 19 total + 1x8 bytes padding
      */
-.macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     daddiu $sp, $sp, -160
     .cfi_adjust_cfa_offset 160
 
      // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 160)
-#error "SAVE_ALL_CALLEE_SAVE_FRAME(MIPS64) size not as expected."
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 160)
+#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(MIPS64) size not as expected."
 #endif
 
     sd     $ra, 152($sp)
@@ -89,25 +89,25 @@
     # load appropriate callee-save-method
     ld      $t1, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $t1, 0($t1)
-    ld      $t1, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($t1)
+    ld      $t1, RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET($t1)
     sd      $t1, 0($sp)                                # Place ArtMethod* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly). Restoration assumes
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly). Restoration assumes
      * non-moving GC.
      * Does not include rSUSPEND or rSELF
      * callee-save: padding + $s2-$s7 + $gp + $ra + $s8 = 9 total + 1x8 bytes padding
      */
-.macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro SETUP_SAVE_REFS_ONLY_FRAME
     daddiu $sp, $sp, -80
     .cfi_adjust_cfa_offset 80
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 80)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(MIPS64) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 80)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(MIPS64) size not as expected."
 #endif
 
     sd     $ra, 72($sp)
@@ -131,12 +131,12 @@
     # load appropriate callee-save-method
     ld      $t1, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $t1, 0($t1)
-    ld      $t1, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($t1)
+    ld      $t1, RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET($t1)
     sd      $t1, 0($sp)                                # Place Method* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
 
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_ONLY_FRAME
     ld     $ra, 72($sp)
     .cfi_restore 31
     ld     $s8, 64($sp)
@@ -160,7 +160,7 @@
     .cpreturn
 .endm
 
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+.macro RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
     ld     $ra, 72($sp)
     .cfi_restore 31
     ld     $s8, 64($sp)
@@ -186,15 +186,15 @@
 .endm
 
 // This assumes the top part of these stack frame types are identical.
-#define REFS_AND_ARGS_MINUS_REFS_SIZE (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
+#define REFS_AND_ARGS_MINUS_REFS_SIZE (FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
 
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
     daddiu  $sp, $sp, -208
     .cfi_adjust_cfa_offset 208
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 208)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(MIPS64) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 208)
+#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(MIPS64) size not as expected."
 #endif
 
     sd     $ra, 200($sp)           # = kQuickCalleeSaveFrame_RefAndArgs_LrOffset
@@ -244,27 +244,27 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). Restoration assumes
      * non-moving GC.
      * callee-save: padding + $f12-$f19 + $a1-$a7 + $s2-$s7 + $gp + $ra + $s8 = 24 total + 1 words padding + Method*
      */
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
     # load appropriate callee-save-method
     ld      $t1, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $t1, 0($t1)
-    ld      $t1, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($t1)
+    ld      $t1, RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET($t1)
     sd      $t1, 0($sp)                                # Place Method* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
 
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
     sd      $a0, 0($sp)                                # Place Method* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
 
-.macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
     ld     $ra, 200($sp)
     .cfi_restore 31
     ld     $s8, 192($sp)
@@ -314,13 +314,235 @@
 .endm
 
     /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything).
+     * callee-save: $at + $v0-$v1 + $a0-$a7 + $t0-$t3 + $s0-$s7 + $t8-$t9 + $gp + $s8 + $ra + $s8,
+     *              $f0-$f31; 28(GPR)+ 32(FPR) + 1x8 bytes padding + method*
+     * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME
+    daddiu $sp, $sp, -496
+    .cfi_adjust_cfa_offset 496
+
+     // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_SAVE_EVERYTHING != 496)
+#error "FRAME_SIZE_SAVE_EVERYTHING(MIPS64) size not as expected."
+#endif
+
+    // Save core registers.
+    sd     $ra, 488($sp)
+    .cfi_rel_offset 31, 488
+    sd     $s8, 480($sp)
+    .cfi_rel_offset 30, 480
+    sd     $t9, 464($sp)
+    .cfi_rel_offset 25, 464
+    sd     $t8, 456($sp)
+    .cfi_rel_offset 24, 456
+    sd     $s7, 448($sp)
+    .cfi_rel_offset 23, 448
+    sd     $s6, 440($sp)
+    .cfi_rel_offset 22, 440
+    sd     $s5, 432($sp)
+    .cfi_rel_offset 21, 432
+    sd     $s4, 424($sp)
+    .cfi_rel_offset 20, 424
+    sd     $s3,  416($sp)
+    .cfi_rel_offset 19, 416
+    sd     $s2,  408($sp)
+    .cfi_rel_offset 18, 408
+    sd     $s1,  400($sp)
+    .cfi_rel_offset 17, 400
+    sd     $s0,  392($sp)
+    .cfi_rel_offset 16, 392
+    sd     $t3,  384($sp)
+    .cfi_rel_offset 15, 384
+    sd     $t2,  376($sp)
+    .cfi_rel_offset 14, 376
+    sd     $t1,  368($sp)
+    .cfi_rel_offset 13, 368
+    sd     $t0,  360($sp)
+    .cfi_rel_offset 12, 360
+    sd     $a7, 352($sp)
+    .cfi_rel_offset 11, 352
+    sd     $a6, 344($sp)
+    .cfi_rel_offset 10, 344
+    sd     $a5, 336($sp)
+    .cfi_rel_offset 9, 336
+    sd     $a4, 328($sp)
+    .cfi_rel_offset 8, 328
+    sd     $a3,  320($sp)
+    .cfi_rel_offset 7, 320
+    sd     $a2,  312($sp)
+    .cfi_rel_offset 6, 312
+    sd     $a1,  304($sp)
+    .cfi_rel_offset 5, 304
+    sd     $a0,  296($sp)
+    .cfi_rel_offset 4, 296
+    sd     $v1,  288($sp)
+    .cfi_rel_offset 3, 288
+    sd     $v0,  280($sp)
+    .cfi_rel_offset 2, 280
+
+    // Set up $gp, clobbering $ra and using the branch delay slot for a useful instruction.
+    bal 1f
+    .set push
+    .set noat
+    sd     $at,  272($sp)
+    .cfi_rel_offset 1, 272
+    .set pop
+1:
+    .cpsetup $ra, 472, 1b
+
+    // Save FP registers.
+    s.d    $f31, 264($sp)
+    s.d    $f30, 256($sp)
+    s.d    $f29, 248($sp)
+    s.d    $f28, 240($sp)
+    s.d    $f27, 232($sp)
+    s.d    $f26, 224($sp)
+    s.d    $f25, 216($sp)
+    s.d    $f24, 208($sp)
+    s.d    $f23, 200($sp)
+    s.d    $f22, 192($sp)
+    s.d    $f21, 184($sp)
+    s.d    $f20, 176($sp)
+    s.d    $f19, 168($sp)
+    s.d    $f18, 160($sp)
+    s.d    $f17, 152($sp)
+    s.d    $f16, 144($sp)
+    s.d    $f15, 136($sp)
+    s.d    $f14, 128($sp)
+    s.d    $f13, 120($sp)
+    s.d    $f12, 112($sp)
+    s.d    $f11, 104($sp)
+    s.d    $f10, 96($sp)
+    s.d    $f9, 88($sp)
+    s.d    $f8, 80($sp)
+    s.d    $f7, 72($sp)
+    s.d    $f6, 64($sp)
+    s.d    $f5, 56($sp)
+    s.d    $f4, 48($sp)
+    s.d    $f3, 40($sp)
+    s.d    $f2, 32($sp)
+    s.d    $f1, 24($sp)
+    s.d    $f0, 16($sp)
+
+    # load appropriate callee-save-method
+    ld      $t1, %got(_ZN3art7Runtime9instance_E)($gp)
+    ld      $t1, 0($t1)
+    ld      $t1, RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET($t1)
+    sd      $t1, 0($sp)                                # Place ArtMethod* at bottom of stack.
+    # Place sp in Thread::Current()->top_quick_frame.
+    sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)
+.endm
+
+.macro RESTORE_SAVE_EVERYTHING_FRAME
+    // Restore FP registers.
+    l.d    $f31, 264($sp)
+    l.d    $f30, 256($sp)
+    l.d    $f29, 248($sp)
+    l.d    $f28, 240($sp)
+    l.d    $f27, 232($sp)
+    l.d    $f26, 224($sp)
+    l.d    $f25, 216($sp)
+    l.d    $f24, 208($sp)
+    l.d    $f23, 200($sp)
+    l.d    $f22, 192($sp)
+    l.d    $f21, 184($sp)
+    l.d    $f20, 176($sp)
+    l.d    $f19, 168($sp)
+    l.d    $f18, 160($sp)
+    l.d    $f17, 152($sp)
+    l.d    $f16, 144($sp)
+    l.d    $f15, 136($sp)
+    l.d    $f14, 128($sp)
+    l.d    $f13, 120($sp)
+    l.d    $f12, 112($sp)
+    l.d    $f11, 104($sp)
+    l.d    $f10, 96($sp)
+    l.d    $f9, 88($sp)
+    l.d    $f8, 80($sp)
+    l.d    $f7, 72($sp)
+    l.d    $f6, 64($sp)
+    l.d    $f5, 56($sp)
+    l.d    $f4, 48($sp)
+    l.d    $f3, 40($sp)
+    l.d    $f2, 32($sp)
+    l.d    $f1, 24($sp)
+    l.d    $f0, 16($sp)
+
+    // Restore core registers.
+    .cpreturn
+    ld     $ra, 488($sp)
+    .cfi_restore 31
+    ld     $s8, 480($sp)
+    .cfi_restore 30
+    ld     $t9, 464($sp)
+    .cfi_restore 25
+    ld     $t8, 456($sp)
+    .cfi_restore 24
+    ld     $s7, 448($sp)
+    .cfi_restore 23
+    ld     $s6, 440($sp)
+    .cfi_restore 22
+    ld     $s5, 432($sp)
+    .cfi_restore 21
+    ld     $s4, 424($sp)
+    .cfi_restore 20
+    ld     $s3,  416($sp)
+    .cfi_restore 19
+    ld     $s2,  408($sp)
+    .cfi_restore 18
+    ld     $s1,  400($sp)
+    .cfi_restore 17
+    ld     $s0,  392($sp)
+    .cfi_restore 16
+    ld     $t3,  384($sp)
+    .cfi_restore 15
+    ld     $t2,  376($sp)
+    .cfi_restore 14
+    ld     $t1,  368($sp)
+    .cfi_restore 13
+    ld     $t0,  360($sp)
+    .cfi_restore 12
+    ld     $a7, 352($sp)
+    .cfi_restore 11
+    ld     $a6, 344($sp)
+    .cfi_restore 10
+    ld     $a5, 336($sp)
+    .cfi_restore 9
+    ld     $a4, 328($sp)
+    .cfi_restore 8
+    ld     $a3,  320($sp)
+    .cfi_restore 7
+    ld     $a2,  312($sp)
+    .cfi_restore 6
+    ld     $a1,  304($sp)
+    .cfi_restore 5
+    ld     $a0,  296($sp)
+    .cfi_restore 4
+    ld     $v1,  288($sp)
+    .cfi_restore 3
+    ld     $v0,  280($sp)
+    .cfi_restore 2
+    .set push
+    .set noat
+    ld     $at,  272($sp)
+    .cfi_restore 1
+    .set pop
+
+    daddiu $sp, $sp, 496
+    .cfi_adjust_cfa_offset -496
+.endm
+
+    /*
      * Macro that set calls through to artDeliverPendingExceptionFromCode,
      * where the pending
      * exception is Thread::Current()->exception_
      */
 .macro DELIVER_PENDING_EXCEPTION
     SETUP_GP
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME     # save callee saves for throw
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME    # save callee saves for throw
     dla     $t9, artDeliverPendingExceptionFromCode
     jalr    $zero, $t9                   # artDeliverPendingExceptionFromCode(Thread*)
     move    $a0, rSELF                   # pass Thread::Current
@@ -328,7 +550,7 @@
 
 .macro RETURN_IF_NO_EXCEPTION
     ld     $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     bne    $t0, $zero, 1f                      # success if no exception is pending
     nop
     jalr   $zero, $ra
@@ -338,7 +560,7 @@
 .endm
 
 .macro RETURN_IF_ZERO
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     bne    $v0, $zero, 1f                # success?
     nop
     jalr   $zero, $ra                    # return on success
@@ -348,7 +570,7 @@
 .endm
 
 .macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     beq    $v0, $zero, 1f                # success?
     nop
     jalr   $zero, $ra                    # return on success
@@ -574,7 +796,7 @@
      * the bottom of the thread. On entry a0 holds Throwable*
      */
 ENTRY art_quick_deliver_exception
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artDeliverExceptionFromCode
     jalr $zero, $t9                 # artDeliverExceptionFromCode(Throwable*, Thread*)
     move $a1, rSELF                 # pass Thread::Current
@@ -586,7 +808,7 @@
     .extern artThrowNullPointerExceptionFromCode
 ENTRY art_quick_throw_null_pointer_exception
 .Lart_quick_throw_null_pointer_exception_gp_set:
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowNullPointerExceptionFromCode
     jalr $zero, $t9                 # artThrowNullPointerExceptionFromCode(Thread*)
     move $a0, rSELF                 # pass Thread::Current
@@ -597,7 +819,7 @@
      */
     .extern artThrowNullPointerExceptionFromSignal
 ENTRY art_quick_throw_null_pointer_exception_from_signal
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowNullPointerExceptionFromSignal
     jalr $zero, $t9                 # artThrowNullPointerExceptionFromSignal(uinptr_t, Thread*)
     move $a1, rSELF                 # pass Thread::Current
@@ -608,7 +830,7 @@
      */
     .extern artThrowDivZeroFromCode
 ENTRY art_quick_throw_div_zero
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowDivZeroFromCode
     jalr $zero, $t9                 # artThrowDivZeroFromCode(Thread*)
     move $a0, rSELF                 # pass Thread::Current
@@ -621,7 +843,7 @@
     .extern artThrowArrayBoundsFromCode
 ENTRY art_quick_throw_array_bounds
 .Lart_quick_throw_array_bounds_gp_set:
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowArrayBoundsFromCode
     jalr $zero, $t9                 # artThrowArrayBoundsFromCode(index, limit, Thread*)
     move $a2, rSELF                 # pass Thread::Current
@@ -634,7 +856,7 @@
     .extern artThrowStringBoundsFromCode
 ENTRY art_quick_throw_string_bounds
 .Lart_quick_throw_string_bounds_gp_set:
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowStringBoundsFromCode
     jalr $zero, $t9                 # artThrowStringBoundsFromCode(index, limit, Thread*)
     move $a2, rSELF                 # pass Thread::Current
@@ -645,7 +867,7 @@
      */
     .extern artThrowStackOverflowFromCode
 ENTRY art_quick_throw_stack_overflow
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowStackOverflowFromCode
     jalr $zero, $t9                 # artThrowStackOverflowFromCode(Thread*)
     move $a0, rSELF                 # pass Thread::Current
@@ -656,7 +878,7 @@
      */
     .extern artThrowNoSuchMethodFromCode
 ENTRY art_quick_throw_no_such_method
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowNoSuchMethodFromCode
     jalr $zero, $t9                 # artThrowNoSuchMethodFromCode(method_idx, Thread*)
     move $a1, rSELF                 # pass Thread::Current
@@ -680,13 +902,13 @@
      */
 .macro INVOKE_TRAMPOLINE_BODY cxx_name
     .extern \cxx_name
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  # save callee saves in case allocation triggers GC
+    SETUP_SAVE_REFS_AND_ARGS_FRAME         # save callee saves in case allocation triggers GC
     move  $a2, rSELF                       # pass Thread::Current
     jal   \cxx_name                        # (method_idx, this, Thread*, $sp)
     move  $a3, $sp                         # pass $sp
     move  $a0, $v0                         # save target Method*
     move  $t9, $v1                         # save $v0->code_
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     beq   $v0, $zero, 1f
     nop
     jalr  $zero, $t9
@@ -975,8 +1197,8 @@
      */
     .extern artHandleFillArrayDataFromCode
 ENTRY art_quick_handle_fill_data
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case exception allocation triggers GC
-    ld      $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case exception allocation triggers GC
+    ld      $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)         # pass referrer's Method*
     jal     artHandleFillArrayDataFromCode              # (payload offset, Array*, method, Thread*)
     move    $a3, rSELF                                  # pass Thread::Current
     RETURN_IF_ZERO
@@ -989,7 +1211,7 @@
 ENTRY art_quick_lock_object
     beq     $a0, $zero, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case we block
+    SETUP_SAVE_REFS_ONLY_FRAME            # save callee saves in case we block
     jal     artLockObjectFromCode         # (Object* obj, Thread*)
     move    $a1, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -998,7 +1220,7 @@
 ENTRY art_quick_lock_object_no_inline
     beq     $a0, $zero, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case we block
+    SETUP_SAVE_REFS_ONLY_FRAME            # save callee saves in case we block
     jal     artLockObjectFromCode         # (Object* obj, Thread*)
     move    $a1, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1011,7 +1233,7 @@
 ENTRY art_quick_unlock_object
     beq     $a0, $zero, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case exception allocation triggers GC
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case exception allocation triggers GC
     jal     artUnlockObjectFromCode    # (Object* obj, Thread*)
     move    $a1, rSELF                 # pass Thread::Current
     RETURN_IF_ZERO
@@ -1020,7 +1242,7 @@
 ENTRY art_quick_unlock_object_no_inline
     beq     $a0, $zero, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case exception allocation triggers GC
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case exception allocation triggers GC
     jal     artUnlockObjectFromCode    # (Object* obj, Thread*)
     move    $a1, rSELF                 # pass Thread::Current
     RETURN_IF_ZERO
@@ -1053,7 +1275,7 @@
     daddiu $sp, $sp, 32
     .cfi_adjust_cfa_offset -32
     SETUP_GP
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowClassCastException
     jalr $zero, $t9                 # artThrowClassCastException (Class*, Class*, Thread*)
     move $a2, rSELF                 # pass Thread::Current
@@ -1201,7 +1423,7 @@
     SETUP_GP
     bne    $v0, $zero, .Ldo_aput
     nop
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     move   $a1, $a2
     dla  $t9, artThrowArrayStoreException
     jalr $zero, $t9                 # artThrowArrayStoreException(Class*, Class*, Thread*)
@@ -1213,8 +1435,8 @@
      */
     .extern artGetBooleanStaticFromCode
 ENTRY art_quick_get_boolean_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetBooleanStaticFromCode   # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1225,8 +1447,8 @@
      */
     .extern artGetByteStaticFromCode
 ENTRY art_quick_get_byte_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetByteStaticFromCode      # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1237,8 +1459,8 @@
      */
     .extern artGetCharStaticFromCode
 ENTRY art_quick_get_char_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetCharStaticFromCode      # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1249,8 +1471,8 @@
      */
     .extern artGetShortStaticFromCode
 ENTRY art_quick_get_short_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetShortStaticFromCode     # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1261,8 +1483,8 @@
      */
     .extern artGet32StaticFromCode
 ENTRY art_quick_get32_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGet32StaticFromCode        # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1273,8 +1495,8 @@
      */
     .extern artGet64StaticFromCode
 ENTRY art_quick_get64_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGet64StaticFromCode        # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1285,8 +1507,8 @@
      */
     .extern artGetObjStaticFromCode
 ENTRY art_quick_get_obj_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetObjStaticFromCode       # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1297,8 +1519,8 @@
      */
     .extern artGetBooleanInstanceFromCode
 ENTRY art_quick_get_boolean_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetBooleanInstanceFromCode # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1309,8 +1531,8 @@
      */
     .extern artGetByteInstanceFromCode
 ENTRY art_quick_get_byte_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetByteInstanceFromCode    # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1321,8 +1543,8 @@
      */
     .extern artGetCharInstanceFromCode
 ENTRY art_quick_get_char_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetCharInstanceFromCode    # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1333,8 +1555,8 @@
      */
     .extern artGetShortInstanceFromCode
 ENTRY art_quick_get_short_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetShortInstanceFromCode   # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1345,8 +1567,8 @@
      */
     .extern artGet32InstanceFromCode
 ENTRY art_quick_get32_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGet32InstanceFromCode      # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1357,8 +1579,8 @@
      */
     .extern artGet64InstanceFromCode
 ENTRY art_quick_get64_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGet64InstanceFromCode      # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1369,8 +1591,8 @@
      */
     .extern artGetObjInstanceFromCode
 ENTRY art_quick_get_obj_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetObjInstanceFromCode     # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1381,8 +1603,8 @@
      */
     .extern artSet8StaticFromCode
 ENTRY art_quick_set8_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet8StaticFromCode         # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1393,8 +1615,8 @@
      */
     .extern artSet16StaticFromCode
 ENTRY art_quick_set16_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet16StaticFromCode        # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1405,8 +1627,8 @@
      */
     .extern artSet32StaticFromCode
 ENTRY art_quick_set32_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet32StaticFromCode        # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1417,9 +1639,9 @@
      */
     .extern artSet64StaticFromCode
 ENTRY art_quick_set64_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
                                          # a2 contains the new val
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet64StaticFromCode        # (field_idx, referrer, new_val, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1430,8 +1652,8 @@
      */
     .extern artSetObjStaticFromCode
 ENTRY art_quick_set_obj_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSetObjStaticFromCode       # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1442,8 +1664,8 @@
      */
     .extern artSet8InstanceFromCode
 ENTRY art_quick_set8_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a3, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet8InstanceFromCode       # (field_idx, Object*, new_val, referrer, Thread*)
     move   $a4, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1454,8 +1676,8 @@
      */
     .extern artSet16InstanceFromCode
 ENTRY art_quick_set16_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a3, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet16InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*)
     move   $a4, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1466,8 +1688,8 @@
      */
     .extern artSet32InstanceFromCode
 ENTRY art_quick_set32_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a3, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet32InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*)
     move   $a4, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1478,8 +1700,8 @@
      */
     .extern artSet64InstanceFromCode
 ENTRY art_quick_set64_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a3, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet64InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*)
     move   $a4, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1490,8 +1712,8 @@
      */
     .extern artSetObjInstanceFromCode
 ENTRY art_quick_set_obj_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a3, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSetObjInstanceFromCode     # (field_idx, Object*, new_val, referrer, Thread*)
     move   $a4, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1501,7 +1723,7 @@
 .macro ONE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case of GC
     jal     \entrypoint
     move    $a1, rSELF                 # pass Thread::Current
     \return
@@ -1512,7 +1734,7 @@
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case of GC
     jal     \entrypoint
     move    $a2, rSELF                 # pass Thread::Current
     \return
@@ -1522,7 +1744,7 @@
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case of GC
     jal     \entrypoint
     move    $a3, rSELF                 # pass Thread::Current
     \return
@@ -1532,7 +1754,7 @@
 .macro FOUR_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case of GC
     jal     \entrypoint
     move    $a4, rSELF                 # pass Thread::Current
     \return
@@ -1634,7 +1856,7 @@
     .cpreturn                                    # Restore gp from t8 in branch delay slot.
 
 .Lart_quick_alloc_object_rosalloc_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
     jal    artAllocObjectFromCodeRosAlloc
     move   $a2 ,$s1                              # Pass self as argument.
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
@@ -1673,17 +1895,19 @@
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
      */
     .extern artTestSuspendFromCode
-ENTRY art_quick_test_suspend
-    lh     $a0, THREAD_FLAGS_OFFSET(rSELF)
-    bne    $a0, $zero, 1f
+ENTRY_NO_GP art_quick_test_suspend
+    lh     rSUSPEND, THREAD_FLAGS_OFFSET(rSELF)
+    bne    rSUSPEND, $zero, 1f
     daddiu rSUSPEND, $zero, SUSPEND_CHECK_INTERVAL   # reset rSUSPEND to SUSPEND_CHECK_INTERVAL
     jalr   $zero, $ra
-    .cpreturn                                 # Restore gp from t8 in branch delay slot.
+    nop
 1:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME         # save callee saves for stack crawl
+    SETUP_SAVE_EVERYTHING_FRAME               # save everything for stack crawl
     jal    artTestSuspendFromCode             # (Thread*)
     move   $a0, rSELF
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    RESTORE_SAVE_EVERYTHING_FRAME
+    jalr   $zero, $ra
+    nop
 END art_quick_test_suspend
 
     /*
@@ -1692,13 +1916,13 @@
      */
     .extern artQuickProxyInvokeHandler
 ENTRY art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
     move    $a2, rSELF             # pass Thread::Current
     jal     artQuickProxyInvokeHandler  # (Method* proxy method, receiver, Thread*, SP)
     move    $a3, $sp               # pass $sp
     ld      $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
     daddiu  $sp, $sp, REFS_AND_ARGS_MINUS_REFS_SIZE  # skip a0-a7 and f12-f19
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     bne     $t0, $zero, 1f
     dmtc1   $v0, $f0               # place return value to FP return value
     jalr    $zero, $ra
@@ -1747,26 +1971,26 @@
 
     .extern artQuickResolutionTrampoline
 ENTRY art_quick_resolution_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     move    $a2, rSELF             # pass Thread::Current
     jal     artQuickResolutionTrampoline  # (Method* called, receiver, Thread*, SP)
     move    $a3, $sp               # pass $sp
     beq     $v0, $zero, 1f
     ld      $a0, 0($sp)            # load resolved method in $a0
                                    # artQuickResolutionTrampoline puts resolved method in *SP
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     move    $t9, $v0               # code pointer must be in $t9 to generate the global pointer
     jalr    $zero, $t9             # tail call to method
     nop
 1:
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_resolution_trampoline
 
     .extern artQuickGenericJniTrampoline
     .extern artQuickGenericJniEndTrampoline
 ENTRY art_quick_generic_jni_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
     move    $s8, $sp               # save $sp
 
     # prepare for call to artQuickGenericJniTrampoline(Thread*, SP)
@@ -1816,7 +2040,7 @@
     move    $sp, $s8               # tear down the alloca
 
     # tear dpown the callee-save frame
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
 
     jalr    $zero, $ra
     dmtc1   $v0, $f0               # place return value to FP return value
@@ -1829,13 +2053,13 @@
 
     .extern artQuickToInterpreterBridge
 ENTRY art_quick_to_interpreter_bridge
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     move    $a1, rSELF             # pass Thread::Current
     jal     artQuickToInterpreterBridge    # (Method* method, Thread*, SP)
     move    $a2, $sp               # pass $sp
     ld      $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
     daddiu  $sp, $sp, REFS_AND_ARGS_MINUS_REFS_SIZE  # skip a0-a7 and f12-f19
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     bne     $t0, $zero, 1f
     dmtc1   $v0, $f0               # place return value to FP return value
     jalr    $zero, $ra
@@ -1850,7 +2074,7 @@
     .extern artInstrumentationMethodEntryFromCode
     .extern artInstrumentationMethodExitFromCode
 ENTRY art_quick_instrumentation_entry
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     daddiu   $sp, $sp, -16     # space for saving arg0
     .cfi_adjust_cfa_offset 16
     sd       $a0, 0($sp)       # save arg0
@@ -1861,7 +2085,7 @@
     ld       $a0, 0($sp)       # restore arg0
     daddiu   $sp, $sp, 16      # remove args
     .cfi_adjust_cfa_offset -16
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     jalr     $t9               # call method
     nop
 END art_quick_instrumentation_entry
@@ -1871,7 +2095,7 @@
     .cfi_startproc
     SETUP_GP
     move     $ra, $zero        # link register is to here, so clobber with 0 for later checks
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
     move     $t0, $sp          # remember bottom of caller's frame
     daddiu   $sp, $sp, -16     # save return values and set up args
     .cfi_adjust_cfa_offset 16
@@ -1891,8 +2115,9 @@
     ld       $v0, 0($sp)       # restore return values
     l.d      $f0, 8($sp)
     jalr     $zero, $t9        # return
-    daddiu   $sp, $sp, 16+FRAME_SIZE_REFS_ONLY_CALLEE_SAVE  # 16 bytes of saved values + ref_only callee save frame
-    .cfi_adjust_cfa_offset -(16+FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
+    # restore stack, 16 bytes of saved values + ref_only callee save frame
+    daddiu   $sp, $sp, 16+FRAME_SIZE_SAVE_REFS_ONLY
+    .cfi_adjust_cfa_offset -(16+FRAME_SIZE_SAVE_REFS_ONLY)
 END art_quick_instrumentation_exit
 
     /*
@@ -1902,7 +2127,7 @@
     .extern artDeoptimize
     .extern artEnterInterpreterFromDeoptimize
 ENTRY art_quick_deoptimize
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     jal      artDeoptimize     # artDeoptimize(Thread*, SP)
                                # Returns caller method's frame size.
     move     $a0, rSELF        # pass Thread::current
@@ -1914,7 +2139,7 @@
      */
     .extern artDeoptimizeFromCompiledCode
 ENTRY art_quick_deoptimize_from_compiled_code
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     jal      artDeoptimizeFromCompiledCode    # artDeoptimizeFromCompiledCode(Thread*, SP)
                                               # Returns caller method's frame size.
     move     $a0, rSELF                       # pass Thread::current
diff --git a/runtime/arch/mips64/quick_method_frame_info_mips64.h b/runtime/arch/mips64/quick_method_frame_info_mips64.h
index b7dc57f..397776e 100644
--- a/runtime/arch/mips64/quick_method_frame_info_mips64.h
+++ b/runtime/arch/mips64/quick_method_frame_info_mips64.h
@@ -25,6 +25,8 @@
 namespace art {
 namespace mips64 {
 
+static constexpr uint32_t kMips64CalleeSaveAlwaysSpills =
+    (1 << art::mips64::RA);
 static constexpr uint32_t kMips64CalleeSaveRefSpills =
     (1 << art::mips64::S2) | (1 << art::mips64::S3) | (1 << art::mips64::S4) |
     (1 << art::mips64::S5) | (1 << art::mips64::S6) | (1 << art::mips64::S7) |
@@ -35,6 +37,14 @@
     (1 << art::mips64::A7);
 static constexpr uint32_t kMips64CalleeSaveAllSpills =
     (1 << art::mips64::S0) | (1 << art::mips64::S1);
+static constexpr uint32_t kMips64CalleeSaveEverythingSpills =
+    (1 << art::mips64::AT) | (1 << art::mips64::V0) | (1 << art::mips64::V1) |
+    (1 << art::mips64::A0) | (1 << art::mips64::A1) | (1 << art::mips64::A2) |
+    (1 << art::mips64::A3) | (1 << art::mips64::A4) | (1 << art::mips64::A5) |
+    (1 << art::mips64::A6) | (1 << art::mips64::A7) | (1 << art::mips64::T0) |
+    (1 << art::mips64::T1) | (1 << art::mips64::T2) | (1 << art::mips64::T3) |
+    (1 << art::mips64::S0) | (1 << art::mips64::S1) | (1 << art::mips64::T8) |
+    (1 << art::mips64::T9);
 
 static constexpr uint32_t kMips64CalleeSaveFpRefSpills = 0;
 static constexpr uint32_t kMips64CalleeSaveFpArgSpills =
@@ -46,17 +56,31 @@
     (1 << art::mips64::F24) | (1 << art::mips64::F25) | (1 << art::mips64::F26) |
     (1 << art::mips64::F27) | (1 << art::mips64::F28) | (1 << art::mips64::F29) |
     (1 << art::mips64::F30) | (1 << art::mips64::F31);
+static constexpr uint32_t kMips64CalleeSaveFpEverythingSpills =
+    (1 << art::mips64::F0) | (1 << art::mips64::F1) | (1 << art::mips64::F2) |
+    (1 << art::mips64::F3) | (1 << art::mips64::F4) | (1 << art::mips64::F5) |
+    (1 << art::mips64::F6) | (1 << art::mips64::F7) | (1 << art::mips64::F8) |
+    (1 << art::mips64::F9) | (1 << art::mips64::F10) | (1 << art::mips64::F11) |
+    (1 << art::mips64::F12) | (1 << art::mips64::F13) | (1 << art::mips64::F14) |
+    (1 << art::mips64::F15) | (1 << art::mips64::F16) | (1 << art::mips64::F17) |
+    (1 << art::mips64::F18) | (1 << art::mips64::F19) | (1 << art::mips64::F20) |
+    (1 << art::mips64::F21) | (1 << art::mips64::F22) | (1 << art::mips64::F23) |
+    (1 << art::mips64::F24) | (1 << art::mips64::F25) | (1 << art::mips64::F26) |
+    (1 << art::mips64::F27) | (1 << art::mips64::F28) | (1 << art::mips64::F29) |
+    (1 << art::mips64::F30) | (1 << art::mips64::F31);
 
 constexpr uint32_t Mips64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
-  return kMips64CalleeSaveRefSpills |
-      (type == Runtime::kRefsAndArgs ? kMips64CalleeSaveArgSpills : 0) |
-      (type == Runtime::kSaveAll ? kMips64CalleeSaveAllSpills : 0) | (1 << art::mips64::RA);
+  return kMips64CalleeSaveAlwaysSpills | kMips64CalleeSaveRefSpills |
+      (type == Runtime::kSaveRefsAndArgs ? kMips64CalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kMips64CalleeSaveAllSpills : 0) |
+      (type == Runtime::kSaveEverything ? kMips64CalleeSaveEverythingSpills : 0);
 }
 
 constexpr uint32_t Mips64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
   return kMips64CalleeSaveFpRefSpills |
-      (type == Runtime::kRefsAndArgs ? kMips64CalleeSaveFpArgSpills: 0) |
-      (type == Runtime::kSaveAll ? kMips64CalleeSaveFpAllSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kMips64CalleeSaveFpArgSpills: 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kMips64CalleeSaveFpAllSpills : 0) |
+      (type == Runtime::kSaveEverything ? kMips64CalleeSaveFpEverythingSpills : 0);
 }
 
 constexpr uint32_t Mips64CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index 290769b..fa86bf4 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -87,6 +87,27 @@
   ONE_ARG_DOWNCALL art_quick_alloc_string_from_string ## c_suffix, artAllocStringFromStringFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
 .macro GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
+GENERATE_ALLOC_ENTRYPOINTS_FOR_REGION_TLAB_ALLOCATOR
+.endm
+
+.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_REGION_TLAB_ALLOCATOR
+// This is to be separately defined for each architecture to allow a hand-written assembly fast path.
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
+.endm
+
+.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc)
@@ -219,20 +240,6 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_instrumented, RegionInstrumented)
 
-// This is to be separately defined for each architecture to allow a hand-written assembly fast path.
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
-
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab_instrumented, RegionTLABInstrumented)
diff --git a/runtime/arch/x86/asm_support_x86.h b/runtime/arch/x86/asm_support_x86.h
index b0a6017..2bba08d 100644
--- a/runtime/arch/x86/asm_support_x86.h
+++ b/runtime/arch/x86/asm_support_x86.h
@@ -19,10 +19,9 @@
 
 #include "asm_support.h"
 
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 32
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 32
-
-// 32 bytes for GPRs and 32 bytes for FPRs.
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE (32 + 32)
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 32
+#define FRAME_SIZE_SAVE_REFS_ONLY 32
+#define FRAME_SIZE_SAVE_REFS_AND_ARGS (32 + 32)
+#define FRAME_SIZE_SAVE_EVERYTHING (48 + 64)
 
 #endif  // ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_H_
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index 533905e..3efeb40 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -71,18 +71,9 @@
 
 namespace art {
 
-#if defined(__APPLE__) && defined(__x86_64__)
-// mac symbols have a prefix of _ on x86_64
-extern "C" void _art_quick_throw_null_pointer_exception_from_signal();
-extern "C" void _art_quick_throw_stack_overflow();
-extern "C" void _art_quick_test_suspend();
-#define EXT_SYM(sym) _ ## sym
-#else
 extern "C" void art_quick_throw_null_pointer_exception_from_signal();
 extern "C" void art_quick_throw_stack_overflow();
 extern "C" void art_quick_test_suspend();
-#define EXT_SYM(sym) sym
-#endif
 
 // Note this is different from the others (no underscore on 64 bit mac) due to
 // the way the symbol is defined in the .S file.
@@ -320,7 +311,7 @@
   uc->CTX_ESP = reinterpret_cast<uintptr_t>(next_sp);
 
   uc->CTX_EIP = reinterpret_cast<uintptr_t>(
-      EXT_SYM(art_quick_throw_null_pointer_exception_from_signal));
+      art_quick_throw_null_pointer_exception_from_signal);
   // Pass the faulting address as the first argument of
   // art_quick_throw_null_pointer_exception_from_signal.
 #if defined(__x86_64__)
@@ -397,7 +388,7 @@
     *next_sp = retaddr;
     uc->CTX_ESP = reinterpret_cast<uintptr_t>(next_sp);
 
-    uc->CTX_EIP = reinterpret_cast<uintptr_t>(EXT_SYM(art_quick_test_suspend));
+    uc->CTX_EIP = reinterpret_cast<uintptr_t>(art_quick_test_suspend);
 
     // Now remove the suspend trigger that caused this fault.
     Thread::Current()->RemoveSuspendTrigger();
@@ -443,7 +434,7 @@
   // the previous frame.
 
   // Now arrange for the signal handler to return to art_quick_throw_stack_overflow.
-  uc->CTX_EIP = reinterpret_cast<uintptr_t>(EXT_SYM(art_quick_throw_stack_overflow));
+  uc->CTX_EIP = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
 
   return true;
 }
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 77e04e7..2e9682e 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -22,9 +22,9 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
      */
-MACRO2(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME, got_reg, temp_reg)
+MACRO2(SETUP_SAVE_ALL_CALLEE_SAVES_FRAME, got_reg, temp_reg)
     PUSH edi  // Save callee saves (ebx is saved/restored by the upcall)
     PUSH esi
     PUSH ebp
@@ -35,22 +35,22 @@
     movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
     movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
     // Push save all callee-save method.
-    pushl RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
+    pushl RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET(REG_VAR(temp_reg))
     CFI_ADJUST_CFA_OFFSET(4)
     // Store esp as the top quick frame.
     movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +4: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 3*4 + 16 + 4)
-#error "SAVE_ALL_CALLEE_SAVE_FRAME(X86) size not as expected."
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 3*4 + 16 + 4)
+#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(X86) size not as expected."
 #endif
 END_MACRO
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly)
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly)
      */
-MACRO2(SETUP_REFS_ONLY_CALLEE_SAVE_FRAME, got_reg, temp_reg)
+MACRO2(SETUP_SAVE_REFS_ONLY_FRAME, got_reg, temp_reg)
     PUSH edi  // Save callee saves (ebx is saved/restored by the upcall)
     PUSH esi
     PUSH ebp
@@ -61,24 +61,24 @@
     movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
     movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
     // Push save all callee-save method.
-    pushl RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
+    pushl RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET(REG_VAR(temp_reg))
     CFI_ADJUST_CFA_OFFSET(4)
     // Store esp as the top quick frame.
     movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +4: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 3*4 + 16 + 4)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(X86) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 3*4 + 16 + 4)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(X86) size not as expected."
 #endif
 END_MACRO
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly)
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly)
      * and preserves the value of got_reg at entry.
      */
-MACRO2(SETUP_REFS_ONLY_CALLEE_SAVE_FRAME_PRESERVE_GOT_REG, got_reg, temp_reg)
+MACRO2(SETUP_SAVE_REFS_ONLY_FRAME_PRESERVE_GOT_REG, got_reg, temp_reg)
     PUSH edi  // Save callee saves (ebx is saved/restored by the upcall)
     PUSH esi
     PUSH ebp
@@ -91,7 +91,7 @@
     movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
     movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
     // Push save all callee-save method.
-    pushl RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
+    pushl RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET(REG_VAR(temp_reg))
     CFI_ADJUST_CFA_OFFSET(4)
     // Store esp as the top quick frame.
     movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
@@ -101,12 +101,12 @@
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +4: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 3*4 + 16 + 4)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(X86) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 3*4 + 16 + 4)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(X86) size not as expected."
 #endif
 END_MACRO
 
-MACRO0(RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME)
+MACRO0(RESTORE_SAVE_REFS_ONLY_FRAME)
     addl MACRO_LITERAL(16), %esp  // Unwind stack up to saved values
     CFI_ADJUST_CFA_OFFSET(-16)
     POP ebp  // Restore callee saves (ebx is saved/restored by the upcall)
@@ -116,9 +116,9 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs)
      */
-MACRO2(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME, got_reg, temp_reg)
+MACRO2(SETUP_SAVE_REFS_AND_ARGS_FRAME, got_reg, temp_reg)
     PUSH edi  // Save callee saves
     PUSH esi
     PUSH ebp
@@ -139,23 +139,23 @@
     movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
     movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
     // Push save all callee-save method.
-    pushl RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
+    pushl RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(REG_VAR(temp_reg))
     CFI_ADJUST_CFA_OFFSET(4)
     // Store esp as the stop quick frame.
     movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +4: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 7*4 + 4*8 + 4)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 7*4 + 4*8 + 4)
+#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(X86) size not as expected."
 #endif
 END_MACRO
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs) where the method is passed in EAX.
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs) where the method is passed in EAX.
      */
-MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_EAX)
+MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_EAX)
     // Save callee and GPR args, mixed together to agree with core spills bitmap.
     PUSH edi  // Save callee saves
     PUSH esi
@@ -179,7 +179,7 @@
     movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
 END_MACRO
 
-MACRO0(RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME)
+MACRO0(RESTORE_SAVE_REFS_AND_ARGS_FRAME)
     // Restore FPRs. EAX is still on the stack.
     movsd 4(%esp), %xmm0
     movsd 12(%esp), %xmm1
@@ -200,7 +200,7 @@
 // Restore register and jump to routine
 // Inputs:  EDI contains pointer to code.
 // Notes: Need to pop EAX too (restores Method*)
-MACRO0(RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME_AND_JUMP)
+MACRO0(RESTORE_SAVE_REFS_AND_ARGS_FRAME_AND_JUMP)
     POP eax  // Restore Method*
 
     // Restore FPRs.
@@ -222,11 +222,79 @@
 END_MACRO
 
     /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     */
+MACRO2(SETUP_SAVE_EVERYTHING_FRAME, got_reg, temp_reg)
+    // Save core registers.
+    PUSH edi
+    PUSH esi
+    PUSH ebp
+    PUSH ebx
+    PUSH edx
+    PUSH ecx
+    PUSH eax
+    // Create space for FPR registers and stack alignment padding.
+    subl MACRO_LITERAL(12 + 8 * 8), %esp
+    CFI_ADJUST_CFA_OFFSET(12 + 8 * 8)
+    // Save FPRs.
+    movsd %xmm0, 12(%esp)
+    movsd %xmm1, 20(%esp)
+    movsd %xmm2, 28(%esp)
+    movsd %xmm3, 36(%esp)
+    movsd %xmm4, 44(%esp)
+    movsd %xmm5, 52(%esp)
+    movsd %xmm6, 60(%esp)
+    movsd %xmm7, 68(%esp)
+
+    SETUP_GOT_NOSAVE RAW_VAR(got_reg)
+    // Load Runtime::instance_ from GOT.
+    movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
+    movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
+    // Push save everything callee-save method.
+    pushl RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET(REG_VAR(temp_reg))
+    CFI_ADJUST_CFA_OFFSET(4)
+    // Store esp as the stop quick frame.
+    movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
+
+    // Ugly compile-time check, but we only have the preprocessor.
+    // Last +4: implicit return address pushed on stack when caller made call.
+#if (FRAME_SIZE_SAVE_EVERYTHING != 7*4 + 8*8 + 12 + 4 + 4)
+#error "FRAME_SIZE_SAVE_EVERYTHING(X86) size not as expected."
+#endif
+END_MACRO
+
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
+    // Restore FPRs. Method and padding is still on the stack.
+    movsd 16(%esp), %xmm0
+    movsd 24(%esp), %xmm1
+    movsd 32(%esp), %xmm2
+    movsd 40(%esp), %xmm3
+    movsd 48(%esp), %xmm4
+    movsd 56(%esp), %xmm5
+    movsd 64(%esp), %xmm6
+    movsd 72(%esp), %xmm7
+
+    // Remove save everything callee save method, stack alignment padding and FPRs.
+    addl MACRO_LITERAL(16 + 8 * 8), %esp
+    CFI_ADJUST_CFA_OFFSET(-(16 + 8 * 8))
+
+    // Restore core registers.
+    POP eax
+    POP ecx
+    POP edx
+    POP ebx
+    POP ebp
+    POP esi
+    POP edi
+END_MACRO
+
+    /*
      * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
      * exception is Thread::Current()->exception_.
      */
 MACRO0(DELIVER_PENDING_EXCEPTION)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx  // save callee saves for throw
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save callee saves for throw
     // Outgoing argument set up
     subl MACRO_LITERAL(12), %esp               // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
@@ -238,20 +306,20 @@
 
 MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  ebx, ebx  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
     // Outgoing argument set up
-    subl MACRO_LITERAL(12), %esp                // alignment padding
+    subl MACRO_LITERAL(12), %esp               // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
-    pushl %fs:THREAD_SELF_OFFSET                // pass Thread::Current()
+    pushl %fs:THREAD_SELF_OFFSET               // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    call CALLVAR(cxx_name)                      // cxx_name(Thread*)
+    call CALLVAR(cxx_name)                     // cxx_name(Thread*)
     UNREACHABLE
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
     mov %esp, %ecx
     // Outgoing argument set up
     subl MACRO_LITERAL(8), %esp                // alignment padding
@@ -266,7 +334,7 @@
 
 MACRO2(TWO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
     // Outgoing argument set up
     PUSH eax                                   // alignment padding
     pushl %fs:THREAD_SELF_OFFSET               // pass Thread::Current()
@@ -337,7 +405,7 @@
      * pointing back to the original caller.
      */
 MACRO1(INVOKE_TRAMPOLINE_BODY, cxx_name)
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME ebx, ebx
+    SETUP_SAVE_REFS_AND_ARGS_FRAME ebx, ebx
     movl %esp, %edx  // remember SP
 
     // Outgoing argument set up
@@ -661,25 +729,9 @@
     ret
 END_FUNCTION art_quick_invoke_static_stub
 
-MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx  // save ref containing registers for GC
-    // Outgoing argument set up
-    subl MACRO_LITERAL(12), %esp                // push padding
-    CFI_ADJUST_CFA_OFFSET(12)
-    pushl %fs:THREAD_SELF_OFFSET                // pass Thread::Current()
-    CFI_ADJUST_CFA_OFFSET(4)
-    call CALLVAR(cxx_name)                      // cxx_name(Thread*)
-    addl MACRO_LITERAL(16), %esp                // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME         // restore frame up to return address
-    CALL_MACRO(return_macro)                    // return or deliver exception
-    END_FUNCTION VAR(c_name)
-END_MACRO
-
 MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx         // save ref containing registers for GC
     // Outgoing argument set up
     subl MACRO_LITERAL(8), %esp                  // push padding
     CFI_ADJUST_CFA_OFFSET(8)
@@ -689,14 +741,14 @@
     call CALLVAR(cxx_name)                       // cxx_name(arg1, Thread*)
     addl MACRO_LITERAL(16), %esp                 // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME          // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                 // restore frame up to return address
     CALL_MACRO(return_macro)                     // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx         // save ref containing registers for GC
     // Outgoing argument set up
     PUSH eax                                     // push padding
     pushl %fs:THREAD_SELF_OFFSET                 // pass Thread::Current()
@@ -706,14 +758,14 @@
     call CALLVAR(cxx_name)                       // cxx_name(arg1, arg2, Thread*)
     addl MACRO_LITERAL(16), %esp                 // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME          // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                 // restore frame up to return address
     CALL_MACRO(return_macro)                     // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx         // save ref containing registers for GC
     // Outgoing argument set up
     pushl %fs:THREAD_SELF_OFFSET                 // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
@@ -723,14 +775,14 @@
     call CALLVAR(cxx_name)                       // cxx_name(arg1, arg2, arg3, Thread*)
     addl MACRO_LITERAL(16), %esp                 // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME          // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                 // restore frame up to return address
     CALL_MACRO(return_macro)                     // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME_PRESERVE_GOT_REG  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME_PRESERVE_GOT_REG ebx, ebx  // save ref containing registers for GC
 
     // Outgoing argument set up
     subl MACRO_LITERAL(12), %esp                 // alignment padding
@@ -744,16 +796,16 @@
     call CALLVAR(cxx_name)                       // cxx_name(arg1, arg2, arg3, arg4, Thread*)
     addl MACRO_LITERAL(32), %esp                 // pop arguments
     CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME          // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                 // restore frame up to return address
     CALL_MACRO(return_macro)                     // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx       // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx               // save ref containing registers for GC
     // Outgoing argument set up
-    mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %ecx  // get referrer
+    mov FRAME_SIZE_SAVE_REFS_ONLY(%esp), %ecx         // get referrer
     PUSH eax                                          // push padding
     pushl %fs:THREAD_SELF_OFFSET                      // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
@@ -762,16 +814,16 @@
     call CALLVAR(cxx_name)                            // cxx_name(arg1, referrer, Thread*)
     addl MACRO_LITERAL(16), %esp                      // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME               // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                      // restore frame up to return address
     CALL_MACRO(return_macro)                          // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx        // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx               // save ref containing registers for GC
     // Outgoing argument set up
-    mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %edx  // get referrer
+    mov FRAME_SIZE_SAVE_REFS_ONLY(%esp), %edx         // get referrer
     pushl %fs:THREAD_SELF_OFFSET                      // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH edx                                          // pass referrer
@@ -780,16 +832,16 @@
     call CALLVAR(cxx_name)                            // cxx_name(arg1, arg2, referrer, Thread*)
     addl MACRO_LITERAL(16), %esp                      // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME               // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                      // restore frame up to return address
     CALL_MACRO(return_macro)                          // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx        // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx               // save ref containing registers for GC
     // Outgoing argument set up
-    mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %ebx  // get referrer
+    mov FRAME_SIZE_SAVE_REFS_ONLY(%esp), %ebx         // get referrer
     subl MACRO_LITERAL(12), %esp                      // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET                      // pass Thread::Current()
@@ -802,7 +854,7 @@
                                                       //          Thread*)
     addl LITERAL(32), %esp                            // pop arguments
     CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME               // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                      // restore frame up to return address
     CALL_MACRO(return_macro)                          // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
@@ -918,7 +970,7 @@
     ret
 .Lart_quick_alloc_object_rosalloc_slow_path:
     POP edi
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx          // save ref containing registers for GC
     // Outgoing argument set up
     PUSH eax                      // alignment padding
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
@@ -926,9 +978,9 @@
     PUSH ecx
     PUSH eax
     call SYMBOL(artAllocObjectFromCodeRosAlloc)  // cxx_name(arg0, arg1, Thread*)
-    addl LITERAL(16), %esp        // pop arguments
+    addl LITERAL(16), %esp                       // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME          // resotre frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                 // restore frame up to return address
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER      // return or deliver exception
 END_FUNCTION art_quick_alloc_object_rosalloc
 
@@ -936,59 +988,59 @@
 //
 // EAX: type_idx/return_value, ECX: ArtMethod*, EDX: the class.
 MACRO1(ALLOC_OBJECT_TLAB_FAST_PATH, slowPathLabel)
-    testl %edx, %edx                                           // Check null class
+    testl %edx, %edx                                    // Check null class
     jz   VAR(slowPathLabel)
-                                                               // Check class status.
+                                                        // Check class status.
     cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%edx)
     jne  VAR(slowPathLabel)
-                                                               // No fake dependence needed on x86
-                                                               // between status and flags load,
-                                                               // since each load is a load-acquire,
-                                                               // no loads reordering.
-                                                               // Check access flags has
-                                                               // kAccClassIsFinalizable
+                                                        // No fake dependence needed on x86
+                                                        // between status and flags load,
+                                                        // since each load is a load-acquire,
+                                                        // no loads reordering.
+                                                        // Check access flags has
+                                                        // kAccClassIsFinalizable
     testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%edx)
     jnz  VAR(slowPathLabel)
-    movl %fs:THREAD_SELF_OFFSET, %ebx                          // ebx = thread
-    movl THREAD_LOCAL_END_OFFSET(%ebx), %edi                   // Load thread_local_end.
-    subl THREAD_LOCAL_POS_OFFSET(%ebx), %edi                   // Compute the remaining buffer size.
-    movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%edx), %esi           // Load the object size.
-    cmpl %edi, %esi                                            // Check if it fits. OK to do this
-                                                               // before rounding up the object size
-                                                               // assuming the buf size alignment.
+    movl %fs:THREAD_SELF_OFFSET, %ebx                   // ebx = thread
+    movl THREAD_LOCAL_END_OFFSET(%ebx), %edi            // Load thread_local_end.
+    subl THREAD_LOCAL_POS_OFFSET(%ebx), %edi            // Compute the remaining buffer size.
+    movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%edx), %esi    // Load the object size.
+    cmpl %edi, %esi                                     // Check if it fits. OK to do this
+                                                        // before rounding up the object size
+                                                        // assuming the buf size alignment.
     ja   VAR(slowPathLabel)
-    addl LITERAL(OBJECT_ALIGNMENT_MASK), %esi                  // Align the size by 8. (addr + 7) & ~7.
+    addl LITERAL(OBJECT_ALIGNMENT_MASK), %esi           // Align the size by 8. (addr + 7) & ~7.
     andl LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED), %esi
-    movl THREAD_LOCAL_POS_OFFSET(%ebx), %eax                   // Load thread_local_pos
-                                                               // as allocated object.
-    addl %eax, %esi                                            // Add the object size.
-    movl %esi, THREAD_LOCAL_POS_OFFSET(%ebx)                   // Update thread_local_pos.
-    addl LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%ebx)         // Increase thread_local_objects.
-                                                               // Store the class pointer in the header.
-                                                               // No fence needed for x86.
+    movl THREAD_LOCAL_POS_OFFSET(%ebx), %eax            // Load thread_local_pos
+                                                        // as allocated object.
+    addl %eax, %esi                                     // Add the object size.
+    movl %esi, THREAD_LOCAL_POS_OFFSET(%ebx)            // Update thread_local_pos.
+    addl LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%ebx)  // Increase thread_local_objects.
+                                                        // Store the class pointer in the header.
+                                                        // No fence needed for x86.
     POISON_HEAP_REF edx
     movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%eax)
     POP edi
     POP esi
-    ret                                                        // Fast path succeeded.
+    ret                                                 // Fast path succeeded.
 END_MACRO
 
 // The common slow path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
 MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name)
     POP edi
     POP esi
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx                 // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx                 // save ref containing registers for GC
     // Outgoing argument set up
-    PUSH eax                                                   // alignment padding
-    pushl %fs:THREAD_SELF_OFFSET                               // pass Thread::Current()
+    PUSH eax                                            // alignment padding
+    pushl %fs:THREAD_SELF_OFFSET                        // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx
     PUSH eax
-    call CALLVAR(cxx_name)                                     // cxx_name(arg0, arg1, Thread*)
+    call CALLVAR(cxx_name)                              // cxx_name(arg0, arg1, Thread*)
     addl LITERAL(16), %esp
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME                        // restore frame up to return address
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
+    RESTORE_SAVE_REFS_ONLY_FRAME                        // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER             // return or deliver exception
 END_MACRO
 
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
@@ -1002,9 +1054,9 @@
 #endif
     PUSH esi
     PUSH edi
-    movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx      // Load dex cache resolved types array
+    movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx   // Load dex cache resolved types array
     // Might need to break down into multiple instructions to get the base address in a register.
-                                                               // Load the class
+                                                            // Load the class
     movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx
     ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
 .Lart_quick_alloc_object_tlab_slow_path:
@@ -1022,13 +1074,19 @@
 #endif
     PUSH esi
     PUSH edi
-    movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx      // Load dex cache resolved types array
+    movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx   // Load dex cache resolved types array
     // Might need to break down into multiple instructions to get the base address in a register.
-                                                               // Load the class
+                                                            // Load the class
     movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx
-                                                               // Read barrier for class load.
+                                                            // Read barrier for class load.
     cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET
-    jne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
+    jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+    // Null check so that we can load the lock word.
+    testl %edx, %edx
+    jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+    // Check the mark bit, if it is 1 return.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+    jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
 .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
     ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
 .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
@@ -1036,10 +1094,10 @@
     PUSH eax
     PUSH ecx
     // Outgoing argument set up
-    subl MACRO_LITERAL(8), %esp                                // Alignment padding
+    subl MACRO_LITERAL(8), %esp                             // Alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
-    PUSH edx                                                   // Pass the class as the first param.
-    call SYMBOL(artReadBarrierMark)                            // cxx_name(mirror::Object* obj)
+    PUSH edx                                                // Pass the class as the first param.
+    call SYMBOL(artReadBarrierMark)                         // cxx_name(mirror::Object* obj)
     movl %eax, %edx
     addl MACRO_LITERAL(12), %esp
     CFI_ADJUST_CFA_OFFSET(-12)
@@ -1065,7 +1123,7 @@
     test LITERAL(LOCK_WORD_STATE_MASK), %ecx         // test the 2 high bits.
     jne  .Lslow_lock                      // slow path if either of the two high bits are set.
     movl %ecx, %edx                       // save lock word (edx) to keep read barrier bits.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the gc bits.
     test %ecx, %ecx
     jnz  .Lalready_thin                   // lock word contains a thin lock
     // unlocked case - edx: original lock word, eax: obj.
@@ -1081,9 +1139,9 @@
     cmpw %cx, %dx                         // do we hold the lock already?
     jne  .Lslow_lock
     movl %edx, %ecx                       // copy the lock word to check count overflow.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the read barrier bits.
     addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx  // increment recursion count for overflow check.
-    test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // overflowed if either of the upper two bits (28-29) are set.
+    test LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx  // overflowed if the first gc state bit is set.
     jne  .Lslow_lock                      // count overflowed so go slow
     movl %eax, %ecx                       // save obj to use eax for cmpxchg.
     movl %edx, %eax                       // copy the lock word as the old val for cmpxchg.
@@ -1096,7 +1154,7 @@
     movl  %ecx, %eax                      // restore eax
     jmp  .Lretry_lock
 .Lslow_lock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
     subl LITERAL(8), %esp                 // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
@@ -1106,12 +1164,12 @@
     call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
     addl LITERAL(16), %esp                // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_lock_object
 
 DEFINE_FUNCTION art_quick_lock_object_no_inline
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
     subl LITERAL(8), %esp                 // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
@@ -1121,7 +1179,7 @@
     call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
     addl LITERAL(16), %esp                // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_lock_object_no_inline
 
@@ -1137,13 +1195,13 @@
     cmpw %cx, %dx                         // does the thread id match?
     jne  .Lslow_unlock
     movl %ecx, %edx                       // copy the lock word to detect new count of 0.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %edx  // zero the read barrier bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx  // zero the gc bits.
     cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
     jae  .Lrecursive_thin_unlock
     // update lockword, cmpxchg necessary for read barrier bits.
     movl %eax, %edx                       // edx: obj
     movl %ecx, %eax                       // eax: old lock word.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // ecx: new lock word zero except original rb bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx  // ecx: new lock word zero except original rb bits.
 #ifndef USE_READ_BARRIER
     movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
 #else
@@ -1167,7 +1225,7 @@
     movl %edx, %eax                       // restore eax
     jmp  .Lretry_unlock
 .Lslow_unlock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
     subl LITERAL(8), %esp                 // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
@@ -1177,12 +1235,12 @@
     call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
     addl LITERAL(16), %esp                // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object
 
 DEFINE_FUNCTION art_quick_unlock_object_no_inline
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
     subl LITERAL(8), %esp                 // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
@@ -1192,7 +1250,7 @@
     call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
     addl LITERAL(16), %esp                // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object_no_inline
 
@@ -1223,7 +1281,7 @@
     POP ecx
     addl LITERAL(4), %esp
     CFI_ADJUST_CFA_OFFSET(-4)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  ebx, ebx  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
     // Outgoing argument set up
     PUSH eax                              // alignment padding
     pushl %fs:THREAD_SELF_OFFSET          // pass Thread::Current()
@@ -1375,7 +1433,7 @@
     POP  edx
     POP  ecx
     POP  eax
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
     // Outgoing argument set up
     PUSH eax                      // alignment padding
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
@@ -1397,7 +1455,19 @@
     ret
 END_FUNCTION art_quick_memcpy
 
-NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
+DEFINE_FUNCTION art_quick_test_suspend
+    SETUP_SAVE_EVERYTHING_FRAME ebx, ebx              // save everything for GC
+    // Outgoing argument set up
+    subl MACRO_LITERAL(12), %esp                      // push padding
+    CFI_ADJUST_CFA_OFFSET(12)
+    pushl %fs:THREAD_SELF_OFFSET                      // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    call SYMBOL(artTestSuspendFromCode)               // (Thread*)
+    addl MACRO_LITERAL(16), %esp                      // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-16)
+    RESTORE_SAVE_EVERYTHING_FRAME                     // restore frame up to return address
+    ret                                               // return
+END_FUNCTION art_quick_test_suspend
 
 DEFINE_FUNCTION art_quick_d2l
     subl LITERAL(12), %esp        // alignment padding, room for argument
@@ -1522,14 +1592,14 @@
 // Call artSet64InstanceFromCode with 4 word size arguments and the referrer.
 DEFINE_FUNCTION art_quick_set64_instance
     movd %ebx, %xmm0
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx  // save ref containing registers for GC
     movd %xmm0, %ebx
     // Outgoing argument set up
     subl LITERAL(8), %esp         // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    pushl (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE+12)(%esp)  // pass referrer
+    pushl (FRAME_SIZE_SAVE_REFS_ONLY+12)(%esp)  // pass referrer
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH ebx                      // pass high half of new_val
     PUSH edx                      // pass low half of new_val
@@ -1538,7 +1608,7 @@
     call SYMBOL(artSet64InstanceFromCode)  // (field_idx, Object*, new_val, referrer, Thread*)
     addl LITERAL(32), %esp        // pop arguments
     CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME  // restore frame up to return address
     RETURN_IF_EAX_ZERO            // return or deliver exception
 END_FUNCTION art_quick_set64_instance
 
@@ -1547,9 +1617,9 @@
 DEFINE_FUNCTION art_quick_set64_static
     // TODO: Implement SETUP_GOT_NOSAVE for got_reg = ecx to avoid moving around the registers.
     movd %ebx, %xmm0
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx  // save ref containing registers for GC
     movd %xmm0, %ebx
-    mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %ecx  // get referrer
+    mov FRAME_SIZE_SAVE_REFS_ONLY(%esp), %ecx  // get referrer
     subl LITERAL(12), %esp        // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
@@ -1561,12 +1631,12 @@
     call SYMBOL(artSet64StaticFromCode)  // (field_idx, referrer, new_val, Thread*)
     addl LITERAL(32), %esp        // pop arguments
     CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME  // restore frame up to return address
     RETURN_IF_EAX_ZERO            // return or deliver exception
 END_FUNCTION art_quick_set64_static
 
 DEFINE_FUNCTION art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_EAX
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_EAX
     PUSH esp                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
@@ -1576,9 +1646,9 @@
     movd %eax, %xmm0              // place return value also into floating point return value
     movd %edx, %xmm1
     punpckldq %xmm1, %xmm0
-    addl LITERAL(16 + FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE), %esp
-    CFI_ADJUST_CFA_OFFSET(-(16 + FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE))
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    addl LITERAL(16 + FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY), %esp
+    CFI_ADJUST_CFA_OFFSET(-(16 + FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY))
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
 END_FUNCTION art_quick_proxy_invoke_handler
 
@@ -1623,7 +1693,7 @@
 END_FUNCTION art_quick_imt_conflict_trampoline
 
 DEFINE_FUNCTION art_quick_resolution_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME ebx, ebx
+    SETUP_SAVE_REFS_AND_ARGS_FRAME ebx, ebx
     movl %esp, %edi
     PUSH EDI                      // pass SP. do not just PUSH ESP; that messes up unwinding
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
@@ -1636,14 +1706,14 @@
     CFI_ADJUST_CFA_OFFSET(-16)
     test %eax, %eax               // if code pointer is null goto deliver pending exception
     jz 1f
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME_AND_JUMP
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME_AND_JUMP
 1:
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_resolution_trampoline
 
 DEFINE_FUNCTION art_quick_generic_jni_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_EAX
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_EAX
     movl %esp, %ebp               // save SP at callee-save frame
     CFI_DEF_CFA_REGISTER(ebp)
     subl LITERAL(5120), %esp
@@ -1722,7 +1792,7 @@
 END_FUNCTION art_quick_generic_jni_trampoline
 
 DEFINE_FUNCTION art_quick_to_interpreter_bridge
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  ebx, ebx  // save frame
+    SETUP_SAVE_REFS_AND_ARGS_FRAME  ebx, ebx  // save frame
     mov %esp, %edx                // remember SP
     PUSH eax                      // alignment padding
     PUSH edx                      // pass SP
@@ -1752,11 +1822,11 @@
      * Routine that intercepts method calls and returns.
      */
 DEFINE_FUNCTION art_quick_instrumentation_entry
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME ebx, edx
+    SETUP_SAVE_REFS_AND_ARGS_FRAME ebx, edx
     PUSH eax                      // Save eax which will be clobbered by the callee-save method.
     subl LITERAL(12), %esp        // Align stack.
     CFI_ADJUST_CFA_OFFSET(12)
-    pushl FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-4+16(%esp)  // Pass LR.
+    pushl FRAME_SIZE_SAVE_REFS_AND_ARGS-4+16(%esp)  // Pass LR.
     CFI_ADJUST_CFA_OFFSET(4)
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
     CFI_ADJUST_CFA_OFFSET(4)
@@ -1791,7 +1861,7 @@
 DEFINE_FUNCTION art_quick_instrumentation_exit
     pushl LITERAL(0)              // Push a fake return PC as there will be none on the stack.
     CFI_ADJUST_CFA_OFFSET(4)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx
+    SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx
     mov  %esp, %ecx               // Remember SP
     subl LITERAL(8), %esp         // Save float return value.
     CFI_ADJUST_CFA_OFFSET(8)
@@ -1817,7 +1887,7 @@
     movq (%esp), %xmm0            // Restore fpr return value.
     addl LITERAL(8), %esp
     CFI_ADJUST_CFA_OFFSET(-8)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     addl LITERAL(4), %esp         // Remove fake return pc.
     CFI_ADJUST_CFA_OFFSET(-4)
     jmp   *%ecx                   // Return.
@@ -1829,7 +1899,7 @@
      */
 DEFINE_FUNCTION art_quick_deoptimize
     PUSH ebx                      // Entry point for a jump. Fake that we were called.
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx
     subl LITERAL(12), %esp        // Align stack.
     CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
@@ -1843,7 +1913,7 @@
      * will long jump to the interpreter bridge.
      */
 DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx
     subl LITERAL(12), %esp                      // Align stack.
     CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET                // Pass Thread::Current().
@@ -1923,6 +1993,14 @@
 //   convention (e.g. standard callee-save registers are preserved).
 MACRO2(READ_BARRIER_MARK_REG, name, reg)
     DEFINE_FUNCTION VAR(name)
+    // Null check so that we can load the lock word.
+    test REG_VAR(reg), REG_VAR(reg)
+    jz .Lret_rb_\name
+    // Check the mark bit, if it is 1 return.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
+    jz .Lslow_rb_\name
+    ret
+.Lslow_rb_\name:
     // Save all potentially live caller-save core registers.
     PUSH eax
     PUSH ecx
@@ -1970,6 +2048,7 @@
     POP_REG_NE edx, RAW_VAR(reg)
     POP_REG_NE ecx, RAW_VAR(reg)
     POP_REG_NE eax, RAW_VAR(reg)
+.Lret_rb_\name:
     ret
     END_FUNCTION VAR(name)
 END_MACRO
diff --git a/runtime/arch/x86/quick_method_frame_info_x86.h b/runtime/arch/x86/quick_method_frame_info_x86.h
index 24c671c..9fcde35 100644
--- a/runtime/arch/x86/quick_method_frame_info_x86.h
+++ b/runtime/arch/x86/quick_method_frame_info_x86.h
@@ -36,21 +36,33 @@
   XMM7 = 7,
 };
 
+static constexpr uint32_t kX86CalleeSaveAlwaysSpills =
+    (1 << art::x86::kNumberOfCpuRegisters);  // Fake return address callee save.
 static constexpr uint32_t kX86CalleeSaveRefSpills =
     (1 << art::x86::EBP) | (1 << art::x86::ESI) | (1 << art::x86::EDI);
 static constexpr uint32_t kX86CalleeSaveArgSpills =
     (1 << art::x86::ECX) | (1 << art::x86::EDX) | (1 << art::x86::EBX);
+static constexpr uint32_t kX86CalleeSaveEverythingSpills =
+    (1 << art::x86::EAX) | (1 << art::x86::ECX) | (1 << art::x86::EDX) | (1 << art::x86::EBX);
+
 static constexpr uint32_t kX86CalleeSaveFpArgSpills =
     (1 << art::x86::XMM0) | (1 << art::x86::XMM1) |
     (1 << art::x86::XMM2) | (1 << art::x86::XMM3);
+static constexpr uint32_t kX86CalleeSaveFpEverythingSpills =
+    (1 << art::x86::XMM0) | (1 << art::x86::XMM1) |
+    (1 << art::x86::XMM2) | (1 << art::x86::XMM3) |
+    (1 << art::x86::XMM4) | (1 << art::x86::XMM5) |
+    (1 << art::x86::XMM6) | (1 << art::x86::XMM7);
 
 constexpr uint32_t X86CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
-  return kX86CalleeSaveRefSpills | (type == Runtime::kRefsAndArgs ? kX86CalleeSaveArgSpills : 0) |
-      (1 << art::x86::kNumberOfCpuRegisters);  // fake return address callee save
+  return kX86CalleeSaveAlwaysSpills | kX86CalleeSaveRefSpills |
+      (type == Runtime::kSaveRefsAndArgs ? kX86CalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveEverything ? kX86CalleeSaveEverythingSpills : 0);
 }
 
 constexpr uint32_t X86CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
-    return type == Runtime::kRefsAndArgs ? kX86CalleeSaveFpArgSpills : 0;
+    return (type == Runtime::kSaveRefsAndArgs ? kX86CalleeSaveFpArgSpills : 0) |
+        (type == Runtime::kSaveEverything ? kX86CalleeSaveFpEverythingSpills : 0);
 }
 
 constexpr uint32_t X86CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index c4e723c..0728f99 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -31,7 +31,8 @@
     // Clang/llvm does not support .altmacro. However, the clang/llvm preprocessor doesn't
     // separate the backslash and parameter by a space. Everything just works.
     #define RAW_VAR(name) \name
-    #define VAR(name) SYMBOL(\name)
+    #define VAR(name) \name
+    #define CALLVAR(name) SYMBOL(\name)
     #define PLT_VAR(name) \name@PLT
     #define REG_VAR(name) %\name
     #define CALL_MACRO(name) \name
@@ -45,6 +46,7 @@
     .altmacro
     #define RAW_VAR(name) name&
     #define VAR(name) name&
+    #define CALLVAR(name) SYMBOL(name&)
     #define PLT_VAR(name) name&@PLT
     #define REG_VAR(name) %name
     #define CALL_MACRO(name) name&
@@ -110,10 +112,10 @@
 // for mac builds.
 MACRO1(DEFINE_FUNCTION, c_name)
     FUNCTION_TYPE(SYMBOL(\c_name))
-    ASM_HIDDEN SYMBOL(\c_name)
-    .globl VAR(c_name)
+    ASM_HIDDEN CALLVAR(c_name)
+    .globl CALLVAR(c_name)
     ALIGN_FUNCTION_ENTRY
-VAR(c_name):
+CALLVAR(c_name):
     CFI_STARTPROC
     // Ensure we get a sane starting CFA.
     CFI_DEF_CFA(rsp, 8)
diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h
index 48bec73..a4446d3 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.h
+++ b/runtime/arch/x86_64/asm_support_x86_64.h
@@ -19,8 +19,9 @@
 
 #include "asm_support.h"
 
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE (64 + 4*8)
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE (64 + 4*8)
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE (176 + 4*8)
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES (64 + 4*8)
+#define FRAME_SIZE_SAVE_REFS_ONLY (64 + 4*8)
+#define FRAME_SIZE_SAVE_REFS_AND_ARGS (112 + 12*8)
+#define FRAME_SIZE_SAVE_EVERYTHING (144 + 16*8)
 
 #endif  // ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 784ec39..ac8f523 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -42,9 +42,9 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
      */
-MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME)
+MACRO0(SETUP_SAVE_ALL_CALLEE_SAVES_FRAME)
 #if defined(__APPLE__)
     int3
     int3
@@ -68,7 +68,7 @@
     movq %xmm14, 24(%rsp)
     movq %xmm15, 32(%rsp)
     // R10 := ArtMethod* for save all callee save frame method.
-    movq RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
+    movq RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET(%r10), %r10
     // Store ArtMethod* to bottom of stack.
     movq %r10, 0(%rsp)
     // Store rsp as the top quick frame.
@@ -76,17 +76,17 @@
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6 * 8 + 4 * 8 + 8 + 8)
-#error "SAVE_ALL_CALLEE_SAVE_FRAME(X86_64) size not as expected."
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 6 * 8 + 4 * 8 + 8 + 8)
+#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(X86_64) size not as expected."
 #endif
 #endif  // __APPLE__
 END_MACRO
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly)
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly)
      */
-MACRO0(SETUP_REFS_ONLY_CALLEE_SAVE_FRAME)
+MACRO0(SETUP_SAVE_REFS_ONLY_FRAME)
 #if defined(__APPLE__)
     int3
     int3
@@ -110,7 +110,7 @@
     movq %xmm14, 24(%rsp)
     movq %xmm15, 32(%rsp)
     // R10 := ArtMethod* for refs only callee save frame method.
-    movq RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
+    movq RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET(%r10), %r10
     // Store ArtMethod* to bottom of stack.
     movq %r10, 0(%rsp)
     // Store rsp as the stop quick frame.
@@ -118,13 +118,13 @@
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6 * 8 + 4 * 8 + 8 + 8)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(X86_64) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 6 * 8 + 4 * 8 + 8 + 8)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(X86_64) size not as expected."
 #endif
 #endif  // __APPLE__
 END_MACRO
 
-MACRO0(RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME)
+MACRO0(RESTORE_SAVE_REFS_ONLY_FRAME)
     movq 8(%rsp), %xmm12
     movq 16(%rsp), %xmm13
     movq 24(%rsp), %xmm14
@@ -142,9 +142,9 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs)
      */
-MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME)
+MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME)
 #if defined(__APPLE__)
     int3
     int3
@@ -165,10 +165,10 @@
     PUSH rdx  // Quick arg 2.
     PUSH rcx  // Quick arg 3.
     // Create space for FPR args and create 2 slots for ArtMethod*.
-    subq MACRO_LITERAL(80 + 4 * 8), %rsp
-    CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
+    subq MACRO_LITERAL(16 + 12 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(16 + 12 * 8)
     // R10 := ArtMethod* for ref and args callee save frame method.
-    movq RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
+    movq RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(%r10), %r10
     // Save FPRs.
     movq %xmm0, 16(%rsp)
     movq %xmm1, 24(%rsp)
@@ -189,13 +189,13 @@
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11 * 8 + 4 * 8 + 80 + 8)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86_64) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 11 * 8 + 12 * 8 + 16 + 8)
+#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(X86_64) size not as expected."
 #endif
 #endif  // __APPLE__
 END_MACRO
 
-MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI)
+MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI)
     // Save callee and GPR args, mixed together to agree with core spills bitmap.
     PUSH r15  // Callee save.
     PUSH r14  // Callee save.
@@ -230,7 +230,7 @@
     movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
 END_MACRO
 
-MACRO0(RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME)
+MACRO0(RESTORE_SAVE_REFS_AND_ARGS_FRAME)
     // Restore FPRs.
     movq 16(%rsp), %xmm0
     movq 24(%rsp), %xmm1
@@ -260,13 +260,115 @@
     POP r15
 END_MACRO
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     */
+MACRO0(SETUP_SAVE_EVERYTHING_FRAME)
+#if defined(__APPLE__)
+    int3
+    int3
+#else
+    // Save core registers from highest to lowest to agree with core spills bitmap.
+    PUSH r15
+    PUSH r14
+    PUSH r13
+    PUSH r12
+    PUSH r11
+    PUSH r10
+    PUSH r9
+    PUSH r8
+    PUSH rdi
+    PUSH rsi
+    PUSH rbp
+    PUSH rbx
+    PUSH rdx
+    PUSH rcx
+    PUSH rax
+    // Create space for FPRs and stack alignment padding.
+    subq MACRO_LITERAL(8 + 16 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(8 + 16 * 8)
+    // R10 := Runtime::Current()
+    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
+    movq (%r10), %r10
+    // Save FPRs.
+    movq %xmm0, 8(%rsp)
+    movq %xmm1, 16(%rsp)
+    movq %xmm2, 24(%rsp)
+    movq %xmm3, 32(%rsp)
+    movq %xmm4, 40(%rsp)
+    movq %xmm5, 48(%rsp)
+    movq %xmm6, 56(%rsp)
+    movq %xmm7, 64(%rsp)
+    movq %xmm8, 72(%rsp)
+    movq %xmm9, 80(%rsp)
+    movq %xmm10, 88(%rsp)
+    movq %xmm11, 96(%rsp)
+    movq %xmm12, 104(%rsp)
+    movq %xmm13, 112(%rsp)
+    movq %xmm14, 120(%rsp)
+    movq %xmm15, 128(%rsp)
+    // Push ArtMethod* for save everything frame method.
+    pushq RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET(%r10)
+    CFI_ADJUST_CFA_OFFSET(8)
+    // Store rsp as the top quick frame.
+    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
+
+    // Ugly compile-time check, but we only have the preprocessor.
+    // Last +8: implicit return address pushed on stack when caller made call.
+#if (FRAME_SIZE_SAVE_EVERYTHING != 15 * 8 + 16 * 8 + 16 + 8)
+#error "FRAME_SIZE_SAVE_EVERYTHING(X86_64) size not as expected."
+#endif
+#endif  // __APPLE__
+END_MACRO
+
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
+    // Restore FPRs. Method and padding is still on the stack.
+    movq 16(%rsp), %xmm0
+    movq 24(%rsp), %xmm1
+    movq 32(%rsp), %xmm2
+    movq 40(%rsp), %xmm3
+    movq 48(%rsp), %xmm4
+    movq 56(%rsp), %xmm5
+    movq 64(%rsp), %xmm6
+    movq 72(%rsp), %xmm7
+    movq 80(%rsp), %xmm8
+    movq 88(%rsp), %xmm9
+    movq 96(%rsp), %xmm10
+    movq 104(%rsp), %xmm11
+    movq 112(%rsp), %xmm12
+    movq 120(%rsp), %xmm13
+    movq 128(%rsp), %xmm14
+    movq 136(%rsp), %xmm15
+
+    // Remove save everything callee save method, stack alignment padding and FPRs.
+    addq MACRO_LITERAL(16 + 16 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8))
+    // Restore callee and GPR args, mixed together to agree with core spills bitmap.
+    POP rax
+    POP rcx
+    POP rdx
+    POP rbx
+    POP rbp
+    POP rsi
+    POP rdi
+    POP r8
+    POP r9
+    POP r10
+    POP r11
+    POP r12
+    POP r13
+    POP r14
+    POP r15
+END_MACRO
+
 
     /*
      * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
      * exception is Thread::Current()->exception_.
      */
 MACRO0(DELIVER_PENDING_EXCEPTION)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME         // save callee saves for throw
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME        // save callee saves for throw
     // (Thread*) setup
     movq %gs:THREAD_SELF_OFFSET, %rdi
     call SYMBOL(artDeliverPendingExceptionFromCode)  // artDeliverPendingExceptionFromCode(Thread*)
@@ -275,30 +377,30 @@
 
 MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME   // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
-    call VAR(cxx_name)                 // cxx_name(Thread*)
+    call CALLVAR(cxx_name)             // cxx_name(Thread*)
     UNREACHABLE
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME   // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
-    call VAR(cxx_name)                 // cxx_name(arg1, Thread*)
+    call CALLVAR(cxx_name)             // cxx_name(arg1, Thread*)
     UNREACHABLE
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO2(TWO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME   // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
-    call VAR(cxx_name)                 // cxx_name(Thread*)
+    call CALLVAR(cxx_name)             // cxx_name(Thread*)
     UNREACHABLE
     END_FUNCTION VAR(c_name)
 END_MACRO
@@ -364,18 +466,18 @@
      * Adapted from x86 code.
      */
 MACRO1(INVOKE_TRAMPOLINE_BODY, cxx_name)
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // save callee saves in case allocation triggers GC
+    SETUP_SAVE_REFS_AND_ARGS_FRAME  // save callee saves in case allocation triggers GC
     // Helper signature is always
     // (method_idx, *this_object, *caller_method, *self, sp)
 
     movq %gs:THREAD_SELF_OFFSET, %rdx                      // pass Thread
     movq %rsp, %rcx                                        // pass SP
 
-    call VAR(cxx_name)                                     // cxx_name(arg1, arg2, Thread*, SP)
+    call CALLVAR(cxx_name)                                 // cxx_name(arg1, arg2, Thread*, SP)
                                                            // save the code pointer
     movq %rax, %rdi
     movq %rdx, %rax
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
 
     testq %rdi, %rdi
     jz 1f
@@ -702,57 +804,46 @@
 #endif  // __APPLE__
 END_FUNCTION art_quick_do_long_jump
 
-MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
-    // Outgoing argument set up
-    movq %gs:THREAD_SELF_OFFSET, %rdi    // pass Thread::Current()
-    call VAR(cxx_name)                   // cxx_name(Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
-    CALL_MACRO(return_macro)             // return or deliver exception
-    END_FUNCTION VAR(c_name)
-END_MACRO
-
 MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rsi    // pass Thread::Current()
-    call VAR(cxx_name)                   // cxx_name(arg0, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    call CALLVAR(cxx_name)               // cxx_name(arg0, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
     CALL_MACRO(return_macro)             // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rdx    // pass Thread::Current()
-    call VAR(cxx_name)                   // cxx_name(arg0, arg1, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    call CALLVAR(cxx_name)               // cxx_name(arg0, arg1, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
     CALL_MACRO(return_macro)             // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
-    call VAR(cxx_name)                  // cxx_name(arg0, arg1, arg2, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    call CALLVAR(cxx_name)              // cxx_name(arg0, arg1, arg2, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
     CALL_MACRO(return_macro)            // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
-    call VAR(cxx_name)                  // cxx_name(arg1, arg2, arg3, arg4, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    call CALLVAR(cxx_name)              // cxx_name(arg1, arg2, arg3, arg4, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
     CALL_MACRO(return_macro)            // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
@@ -760,11 +851,11 @@
 MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
     movq 8(%rsp), %rsi                  // pass referrer
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
                                         // arg0 is in rdi
     movq %gs:THREAD_SELF_OFFSET, %rdx   // pass Thread::Current()
-    call VAR(cxx_name)                  // cxx_name(arg0, referrer, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    call CALLVAR(cxx_name)              // cxx_name(arg0, referrer, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
     CALL_MACRO(return_macro)
     END_FUNCTION VAR(c_name)
 END_MACRO
@@ -772,11 +863,11 @@
 MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
     movq 8(%rsp), %rdx                  // pass referrer
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
                                         // arg0 and arg1 are in rdi/rsi
     movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
-    call VAR(cxx_name)                  // (arg0, arg1, referrer, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    call CALLVAR(cxx_name)              // (arg0, arg1, referrer, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
     CALL_MACRO(return_macro)
     END_FUNCTION VAR(c_name)
 END_MACRO
@@ -784,11 +875,11 @@
 MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
     movq 8(%rsp), %rcx                  // pass referrer
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
                                         // arg0, arg1, and arg2 are in rdi/rsi/rdx
     movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
-    call VAR(cxx_name)                  // cxx_name(arg0, arg1, arg2, referrer, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    call CALLVAR(cxx_name)              // cxx_name(arg0, arg1, arg2, referrer, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
     CALL_MACRO(return_macro)            // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
@@ -826,83 +917,83 @@
     // Fast path rosalloc allocation.
     // RDI: type_idx, RSI: ArtMethod*, RAX: return value
     // RDX, RCX, R8, R9: free.
-    movq   ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx   // Load dex cache resolved types array
-                                                              // Load the class (edx)
+    movq   ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx  // Load dex cache resolved types array
+                                                             // Load the class (edx)
     movl   0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx
-    testl  %edx, %edx                                         // Check null class
+    testl  %edx, %edx                                      // Check null class
     jz     .Lart_quick_alloc_object_rosalloc_slow_path
-                                                              // Check class status.
+                                                           // Check class status.
     cmpl   LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx)
     jne    .Lart_quick_alloc_object_rosalloc_slow_path
-                                                              // We don't need a fence (between the
-                                                              // the status and the access flag
-                                                              // loads) here because every load is
-                                                              // a load acquire on x86.
-                                                              // Check access flags has
-                                                              // kAccClassIsFinalizable
+                                                           // We don't need a fence (between the
+                                                           // the status and the access flag
+                                                           // loads) here because every load is
+                                                           // a load acquire on x86.
+                                                           // Check access flags has
+                                                           // kAccClassIsFinalizable
     testl  LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx)
     jnz    .Lart_quick_alloc_object_rosalloc_slow_path
-                                                              // Check if the thread local
-                                                              // allocation stack has room.
-    movq   %gs:THREAD_SELF_OFFSET, %r8                        // r8 = thread
-    movq   THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx     // rcx = alloc stack top.
+                                                           // Check if the thread local
+                                                           // allocation stack has room.
+    movq   %gs:THREAD_SELF_OFFSET, %r8                     // r8 = thread
+    movq   THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx  // rcx = alloc stack top.
     cmpq   THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx
     jae    .Lart_quick_alloc_object_rosalloc_slow_path
-                                                              // Load the object size
+                                                           // Load the object size
     movl   MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %eax
-                                                              // Check if the size is for a thread
-                                                              // local allocation
+                                                           // Check if the size is for a thread
+                                                           // local allocation
     cmpl   LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax
     ja     .Lart_quick_alloc_object_rosalloc_slow_path
-                                                              // Compute the rosalloc bracket index
-                                                              // from the size.
-                                                              // Align up the size by the rosalloc
-                                                              // bracket quantum size and divide
-                                                              // by the quantum size and subtract
-                                                              // by 1. This code is a shorter but
-                                                              // equivalent version.
+                                                           // Compute the rosalloc bracket index
+                                                           // from the size.
+                                                           // Align up the size by the rosalloc
+                                                           // bracket quantum size and divide
+                                                           // by the quantum size and subtract
+                                                           // by 1. This code is a shorter but
+                                                           // equivalent version.
     subq   LITERAL(1), %rax
     shrq   LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax
-                                                              // Load the rosalloc run (r9)
+                                                           // Load the rosalloc run (r9)
     movq   THREAD_ROSALLOC_RUNS_OFFSET(%r8, %rax, __SIZEOF_POINTER__), %r9
-                                                              // Load the free list head (rax). This
-                                                              // will be the return val.
+                                                           // Load the free list head (rax). This
+                                                           // will be the return val.
     movq   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax
     testq  %rax, %rax
     jz     .Lart_quick_alloc_object_rosalloc_slow_path
     // "Point of no slow path". Won't go to the slow path from here on. OK to clobber rdi and rsi.
-                                                              // Push the new object onto the thread
-                                                              // local allocation stack and
-                                                              // increment the thread local
-                                                              // allocation stack top.
+                                                           // Push the new object onto the thread
+                                                           // local allocation stack and
+                                                           // increment the thread local
+                                                           // allocation stack top.
     movl   %eax, (%rcx)
     addq   LITERAL(COMPRESSED_REFERENCE_SIZE), %rcx
     movq   %rcx, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8)
-                                                              // Load the next pointer of the head
-                                                              // and update the list head with the
-                                                              // next pointer.
+                                                           // Load the next pointer of the head
+                                                           // and update the list head with the
+                                                           // next pointer.
     movq   ROSALLOC_SLOT_NEXT_OFFSET(%rax), %rcx
     movq   %rcx, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9)
-                                                              // Store the class pointer in the
-                                                              // header. This also overwrites the
-                                                              // next pointer. The offsets are
-                                                              // asserted to match.
+                                                           // Store the class pointer in the
+                                                           // header. This also overwrites the
+                                                           // next pointer. The offsets are
+                                                           // asserted to match.
 #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
 #error "Class pointer needs to overwrite next pointer."
 #endif
     POISON_HEAP_REF edx
     movl   %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
-                                                              // Decrement the size of the free list
+                                                           // Decrement the size of the free list
     decl   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%r9)
-                                                              // No fence necessary for x86.
+                                                           // No fence necessary for x86.
     ret
 .Lart_quick_alloc_object_rosalloc_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME                         // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
     // Outgoing argument set up
-    movq %gs:THREAD_SELF_OFFSET, %rdx                         // pass Thread::Current()
-    call SYMBOL(artAllocObjectFromCodeRosAlloc)               // cxx_name(arg0, arg1, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME                       // restore frame up to return address
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                   // return or deliver exception
+    movq %gs:THREAD_SELF_OFFSET, %rdx                      // pass Thread::Current()
+    call SYMBOL(artAllocObjectFromCodeRosAlloc)            // cxx_name(arg0, arg1, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                // return or deliver exception
 END_FUNCTION art_quick_alloc_object_rosalloc
 
 // The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
@@ -910,49 +1001,49 @@
 // RDI: type_idx, RSI: ArtMethod*, RDX/EDX: the class, RAX: return value.
 // RCX: scratch, r8: Thread::Current().
 MACRO1(ALLOC_OBJECT_TLAB_FAST_PATH, slowPathLabel)
-    testl %edx, %edx                                           // Check null class
+    testl %edx, %edx                                       // Check null class
     jz   RAW_VAR(slowPathLabel)
-                                                               // Check class status.
+                                                           // Check class status.
     cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx)
     jne  RAW_VAR(slowPathLabel)
-                                                               // No fake dependence needed on x86
-                                                               // between status and flags load,
-                                                               // since each load is a load-acquire,
-                                                               // no loads reordering.
-                                                               // Check access flags has
-                                                               // kAccClassIsFinalizable
+                                                           // No fake dependence needed on x86
+                                                           // between status and flags load,
+                                                           // since each load is a load-acquire,
+                                                           // no loads reordering.
+                                                           // Check access flags has
+                                                           // kAccClassIsFinalizable
     testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx)
     jnz  RAW_VAR(slowPathLabel)
-    movq %gs:THREAD_SELF_OFFSET, %r8                           // r8 = thread
-    movq THREAD_LOCAL_END_OFFSET(%r8), %rax                    // Load thread_local_end.
-    subq THREAD_LOCAL_POS_OFFSET(%r8), %rax                    // Compute the remaining buffer size.
-    movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %ecx           // Load the object size.
-    cmpq %rax, %rcx                                            // Check if it fits. OK to do this
-                                                               // before rounding up the object size
-                                                               // assuming the buf size alignment.
+    movq %gs:THREAD_SELF_OFFSET, %r8                       // r8 = thread
+    movq THREAD_LOCAL_END_OFFSET(%r8), %rax                // Load thread_local_end.
+    subq THREAD_LOCAL_POS_OFFSET(%r8), %rax                // Compute the remaining buffer size.
+    movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %ecx       // Load the object size.
+    cmpq %rax, %rcx                                        // Check if it fits. OK to do this
+                                                           // before rounding up the object size
+                                                           // assuming the buf size alignment.
     ja   RAW_VAR(slowPathLabel)
-    addl LITERAL(OBJECT_ALIGNMENT_MASK), %ecx                  // Align the size by 8. (addr + 7) & ~7.
+    addl LITERAL(OBJECT_ALIGNMENT_MASK), %ecx              // Align the size by 8. (addr + 7) & ~7.
     andl LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED), %ecx
-    movq THREAD_LOCAL_POS_OFFSET(%r8), %rax                    // Load thread_local_pos
-                                                               // as allocated object.
-    addq %rax, %rcx                                            // Add the object size.
-    movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                    // Update thread_local_pos.
-    addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%r8)          // Increase thread_local_objects.
-                                                               // Store the class pointer in the header.
-                                                               // No fence needed for x86.
+    movq THREAD_LOCAL_POS_OFFSET(%r8), %rax                // Load thread_local_pos
+                                                           // as allocated object.
+    addq %rax, %rcx                                        // Add the object size.
+    movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                // Update thread_local_pos.
+    addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%r8)      // Increase thread_local_objects.
+                                                           // Store the class pointer in the header.
+                                                           // No fence needed for x86.
     POISON_HEAP_REF edx
     movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
-    ret                                                        // Fast path succeeded.
+    ret                                                    // Fast path succeeded.
 END_MACRO
 
 // The common slow path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
 MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME                          // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
     // Outgoing argument set up
-    movq %gs:THREAD_SELF_OFFSET, %rdx                          // pass Thread::Current()
-    call VAR(cxx_name)                                         // cxx_name(arg0, arg1, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME                        // restore frame up to return address
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
+    movq %gs:THREAD_SELF_OFFSET, %rdx                      // pass Thread::Current()
+    call CALLVAR(cxx_name)                                 // cxx_name(arg0, arg1, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                // return or deliver exception
 END_MACRO
 
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
@@ -989,7 +1080,13 @@
                                                                // Load the class
     movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx
     cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
-    jne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
+    jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+    // Null check so that we can load the lock word.
+    testl %edx, %edx
+    jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+    // Check the mark bit, if it is 1 return.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+    jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
 .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
     ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
 .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
@@ -1022,7 +1119,7 @@
     test LITERAL(LOCK_WORD_STATE_MASK), %ecx         // Test the 2 high bits.
     jne  .Lslow_lock                      // Slow path if either of the two high bits are set.
     movl %ecx, %edx                       // save lock word (edx) to keep read barrier bits.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the gc bits.
     test %ecx, %ecx
     jnz  .Lalready_thin                   // Lock word contains a thin lock.
     // unlocked case - edx: original lock word, edi: obj.
@@ -1037,9 +1134,9 @@
     cmpw %cx, %dx                         // do we hold the lock already?
     jne  .Lslow_lock
     movl %edx, %ecx                       // copy the lock word to check count overflow.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the gc bits.
     addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx  // increment recursion count
-    test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // overflowed if either of the upper two bits (28-29) are set
+    test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // overflowed if the upper bit (28) is set
     jne  .Lslow_lock                      // count overflowed so go slow
     movl %edx, %eax                       // copy the lock word as the old val for cmpxchg.
     addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx   // increment recursion count again for real.
@@ -1048,18 +1145,18 @@
     jnz  .Lretry_lock                     // cmpxchg failed retry
     ret
 .Lslow_lock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
     movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
     call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_lock_object
 
 DEFINE_FUNCTION art_quick_lock_object_no_inline
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
     movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
     call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_lock_object_no_inline
 
@@ -1074,12 +1171,12 @@
     cmpw %cx, %dx                         // does the thread id match?
     jne  .Lslow_unlock
     movl %ecx, %edx                       // copy the lock word to detect new count of 0.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %edx  // zero the read barrier bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx  // zero the gc bits.
     cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
     jae  .Lrecursive_thin_unlock
     // update lockword, cmpxchg necessary for read barrier bits.
     movl %ecx, %eax                       // eax: old lock word.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // ecx: new lock word zero except original rb bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx  // ecx: new lock word zero except original gc bits.
 #ifndef USE_READ_BARRIER
     movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
 #else
@@ -1099,18 +1196,18 @@
 #endif
     ret
 .Lslow_unlock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
     movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
     call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object
 
 DEFINE_FUNCTION art_quick_unlock_object_no_inline
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
     movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
     call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object_no_inline
 
@@ -1136,7 +1233,7 @@
     CFI_ADJUST_CFA_OFFSET(-8)
     POP rsi                           // Pop arguments
     POP rdi
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
     call SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*)
     UNREACHABLE
@@ -1313,7 +1410,7 @@
     POP  rsi
     POP  rdi
 
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // Save all registers as basis for long jump context.
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // Save all registers as basis for long jump context.
 
     // Outgoing argument set up.
     movq %rdx, %rsi                         // Pass arg 2 = value.
@@ -1329,7 +1426,14 @@
     ret
 END_FUNCTION art_quick_memcpy
 
-NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
+DEFINE_FUNCTION art_quick_test_suspend
+    SETUP_SAVE_EVERYTHING_FRAME                 // save everything for GC
+    // Outgoing argument set up
+    movq %gs:THREAD_SELF_OFFSET, %rdi           // pass Thread::Current()
+    call SYMBOL(artTestSuspendFromCode)         // (Thread*)
+    RESTORE_SAVE_EVERYTHING_FRAME               // restore frame up to return address
+    ret
+END_FUNCTION art_quick_test_suspend
 
 UNIMPLEMENTED art_quick_ldiv
 UNIMPLEMENTED art_quick_lmod
@@ -1369,22 +1473,22 @@
 DEFINE_FUNCTION art_quick_set64_static
                                          // new_val is already in %rdx
     movq 8(%rsp), %rsi                   // pass referrer
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
                                          // field_idx is in rdi
     movq %gs:THREAD_SELF_OFFSET, %rcx    // pass Thread::Current()
     call SYMBOL(artSet64StaticFromCode)  // (field_idx, referrer, new_val, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
     RETURN_IF_EAX_ZERO                   // return or deliver exception
 END_FUNCTION art_quick_set64_static
 
 
 DEFINE_FUNCTION art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
 
     movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass Thread::Current().
     movq %rsp, %rcx                         // Pass SP.
     call SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     movq %rax, %xmm0                        // Copy return value in case of float returns.
     RETURN_OR_DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_proxy_invoke_handler
@@ -1427,13 +1531,13 @@
 END_FUNCTION art_quick_imt_conflict_trampoline
 
 DEFINE_FUNCTION art_quick_resolution_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     movq %gs:THREAD_SELF_OFFSET, %rdx
     movq %rsp, %rcx
     call SYMBOL(artQuickResolutionTrampoline) // (called, receiver, Thread*, SP)
     movq %rax, %r10               // Remember returned code pointer in R10.
     movq (%rsp), %rdi             // Load called method into RDI.
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     testq %r10, %r10              // If code pointer is null goto deliver pending exception.
     jz 1f
     jmp *%r10                     // Tail call into method.
@@ -1518,7 +1622,7 @@
      * Called to do a generic JNI down-call
      */
 DEFINE_FUNCTION art_quick_generic_jni_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
 
     movq %rsp, %rbp                 // save SP at (old) callee-save frame
     CFI_DEF_CFA_REGISTER(rbp)
@@ -1651,11 +1755,11 @@
      * RSI, RDX, RCX, R8, R9 are arguments to that method.
      */
 DEFINE_FUNCTION art_quick_to_interpreter_bridge
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME   // Set up frame and save arguments.
-    movq %gs:THREAD_SELF_OFFSET, %rsi      // RSI := Thread::Current()
-    movq %rsp, %rdx                        // RDX := sp
+    SETUP_SAVE_REFS_AND_ARGS_FRAME     // Set up frame and save arguments.
+    movq %gs:THREAD_SELF_OFFSET, %rsi  // RSI := Thread::Current()
+    movq %rsp, %rdx                    // RDX := sp
     call SYMBOL(artQuickToInterpreterBridge)  // (method, Thread*, SP)
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // TODO: no need to restore arguments in this case.
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME   // TODO: no need to restore arguments in this case.
     movq %rax, %xmm0                   // Place return value also into floating point return value.
     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
 END_FUNCTION art_quick_to_interpreter_bridge
@@ -1668,12 +1772,12 @@
     int3
     int3
 #else
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
 
     movq %rdi, %r12               // Preserve method pointer in a callee-save.
 
     movq %gs:THREAD_SELF_OFFSET, %rdx   // Pass thread.
-    movq FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp), %rcx   // Pass return PC.
+    movq FRAME_SIZE_SAVE_REFS_AND_ARGS-8(%rsp), %rcx   // Pass return PC.
 
     call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, LR)
 
@@ -1681,9 +1785,9 @@
     movq %r12, %rdi               // Reload method pointer.
 
     leaq art_quick_instrumentation_exit(%rip), %r12   // Set up return through instrumentation
-    movq %r12, FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp) // exit.
+    movq %r12, FRAME_SIZE_SAVE_REFS_AND_ARGS-8(%rsp)  // exit.
 
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
 
     jmp *%rax                     // Tail call to intended method.
 #endif  // __APPLE__
@@ -1692,7 +1796,7 @@
 DEFINE_FUNCTION art_quick_instrumentation_exit
     pushq LITERAL(0)          // Push a fake return PC as there will be none on the stack.
 
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
 
     // We need to save rax and xmm0. We could use a callee-save from SETUP_REF_ONLY, but then
     // we would need to fully restore it. As there are a good number of callee-save registers, it
@@ -1719,7 +1823,7 @@
     CFI_ADJUST_CFA_OFFSET(-8)
     POP rax                   // Restore integer result.
 
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
 
     addq LITERAL(8), %rsp     // Drop fake return pc.
 
@@ -1733,7 +1837,7 @@
 DEFINE_FUNCTION art_quick_deoptimize
     pushq %rsi                         // Entry point for a jump. Fake that we were called.
                                        // Use hidden arg.
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
                                        // Stack should be aligned now.
     movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread.
     call SYMBOL(artDeoptimize)         // artDeoptimize(Thread*)
@@ -1745,7 +1849,7 @@
      * will long jump to the interpreter bridge.
      */
 DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
                                                 // Stack should be aligned now.
     movq %gs:THREAD_SELF_OFFSET, %rdi           // Pass Thread.
     call SYMBOL(artDeoptimizeFromCompiledCode)  // artDeoptimizeFromCompiledCode(Thread*)
@@ -1833,6 +1937,14 @@
 //   convention (e.g. standard callee-save registers are preserved).
 MACRO2(READ_BARRIER_MARK_REG, name, reg)
     DEFINE_FUNCTION VAR(name)
+    // Null check so that we can load the lock word.
+    testq REG_VAR(reg), REG_VAR(reg)
+    jz .Lret_rb_\name
+    // Check the mark bit, if it is 1 return.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
+    jz .Lslow_rb_\name
+    ret
+.Lslow_rb_\name:
     // Save all potentially live caller-save core registers.
     PUSH rax
     PUSH rcx
@@ -1897,6 +2009,7 @@
     POP_REG_NE rdx, RAW_VAR(reg)
     POP_REG_NE rcx, RAW_VAR(reg)
     POP_REG_NE rax, RAW_VAR(reg)
+.Lret_rb_\name:
     ret
     END_FUNCTION VAR(name)
 END_MACRO
diff --git a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
index 37eff83..867522f 100644
--- a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
+++ b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
@@ -25,12 +25,19 @@
 namespace art {
 namespace x86_64 {
 
+static constexpr uint32_t kX86_64CalleeSaveAlwaysSpills =
+    (1 << art::x86_64::kNumberOfCpuRegisters);  // Fake return address callee save.
 static constexpr uint32_t kX86_64CalleeSaveRefSpills =
     (1 << art::x86_64::RBX) | (1 << art::x86_64::RBP) | (1 << art::x86_64::R12) |
     (1 << art::x86_64::R13) | (1 << art::x86_64::R14) | (1 << art::x86_64::R15);
 static constexpr uint32_t kX86_64CalleeSaveArgSpills =
     (1 << art::x86_64::RSI) | (1 << art::x86_64::RDX) | (1 << art::x86_64::RCX) |
     (1 << art::x86_64::R8) | (1 << art::x86_64::R9);
+static constexpr uint32_t kX86_64CalleeSaveEverythingSpills =
+    (1 << art::x86_64::RAX) | (1 << art::x86_64::RCX) | (1 << art::x86_64::RDX) |
+    (1 << art::x86_64::RSI) | (1 << art::x86_64::RDI) | (1 << art::x86_64::R8) |
+    (1 << art::x86_64::R9) | (1 << art::x86_64::R10) | (1 << art::x86_64::R11);
+
 static constexpr uint32_t kX86_64CalleeSaveFpArgSpills =
     (1 << art::x86_64::XMM0) | (1 << art::x86_64::XMM1) | (1 << art::x86_64::XMM2) |
     (1 << art::x86_64::XMM3) | (1 << art::x86_64::XMM4) | (1 << art::x86_64::XMM5) |
@@ -38,16 +45,24 @@
 static constexpr uint32_t kX86_64CalleeSaveFpSpills =
     (1 << art::x86_64::XMM12) | (1 << art::x86_64::XMM13) |
     (1 << art::x86_64::XMM14) | (1 << art::x86_64::XMM15);
+static constexpr uint32_t kX86_64CalleeSaveFpEverythingSpills =
+    (1 << art::x86_64::XMM0) | (1 << art::x86_64::XMM1) |
+    (1 << art::x86_64::XMM2) | (1 << art::x86_64::XMM3) |
+    (1 << art::x86_64::XMM4) | (1 << art::x86_64::XMM5) |
+    (1 << art::x86_64::XMM6) | (1 << art::x86_64::XMM7) |
+    (1 << art::x86_64::XMM8) | (1 << art::x86_64::XMM9) |
+    (1 << art::x86_64::XMM10) | (1 << art::x86_64::XMM11);
 
 constexpr uint32_t X86_64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
-  return kX86_64CalleeSaveRefSpills |
-      (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveArgSpills : 0) |
-      (1 << art::x86_64::kNumberOfCpuRegisters);  // fake return address callee save;
+  return kX86_64CalleeSaveAlwaysSpills | kX86_64CalleeSaveRefSpills |
+      (type == Runtime::kSaveRefsAndArgs ? kX86_64CalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveEverything ? kX86_64CalleeSaveEverythingSpills : 0);
 }
 
 constexpr uint32_t X86_64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
   return kX86_64CalleeSaveFpSpills |
-      (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0) |
+      (type == Runtime::kSaveEverything ? kX86_64CalleeSaveFpEverythingSpills : 0);
 }
 
 constexpr uint32_t X86_64CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 2421246..1659f33 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -322,11 +322,11 @@
     return "<runtime internal resolution method>";
   } else if (this == runtime->GetImtConflictMethod()) {
     return "<runtime internal imt conflict method>";
-  } else if (this == runtime->GetCalleeSaveMethod(Runtime::kSaveAll)) {
+  } else if (this == runtime->GetCalleeSaveMethod(Runtime::kSaveAllCalleeSaves)) {
     return "<runtime internal callee-save all registers method>";
-  } else if (this == runtime->GetCalleeSaveMethod(Runtime::kRefsOnly)) {
+  } else if (this == runtime->GetCalleeSaveMethod(Runtime::kSaveRefsOnly)) {
     return "<runtime internal callee-save reference registers method>";
-  } else if (this == runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs)) {
+  } else if (this == runtime->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs)) {
     return "<runtime internal callee-save reference and argument registers method>";
   } else {
     return "<unknown runtime internal method>";
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 0619af8..d4cee44 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -20,6 +20,7 @@
 #if defined(__cplusplus)
 #include "art_method.h"
 #include "gc/allocator/rosalloc.h"
+#include "gc/heap.h"
 #include "jit/jit.h"
 #include "lock_word.h"
 #include "mirror/class.h"
@@ -174,10 +175,17 @@
 #define MIRROR_CLASS_OBJECT_SIZE_OFFSET (100 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_OBJECT_SIZE_OFFSET,
             art::mirror::Class::ObjectSizeOffset().Int32Value())
+#define MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET (104 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET,
+            art::mirror::Class::PrimitiveTypeOffset().Int32Value())
 #define MIRROR_CLASS_STATUS_OFFSET (112 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_STATUS_OFFSET,
             art::mirror::Class::StatusOffset().Int32Value())
 
+#define PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT 16
+ADD_TEST_EQ(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT,
+            static_cast<int>(art::mirror::Class::kPrimitiveTypeSizeShiftShift))
+
 // Array offsets.
 #define MIRROR_ARRAY_LENGTH_OFFSET      MIRROR_OBJECT_HEADER_SIZE
 ADD_TEST_EQ(MIRROR_ARRAY_LENGTH_OFFSET, art::mirror::Array::LengthOffset().Int32Value())
diff --git a/runtime/base/allocator.h b/runtime/base/allocator.h
index cea7046..e48eca9 100644
--- a/runtime/base/allocator.h
+++ b/runtime/base/allocator.h
@@ -52,7 +52,6 @@
   kAllocatorTagMonitorList,
   kAllocatorTagClassTable,
   kAllocatorTagInternTable,
-  kAllocatorTagLambdaBoxTable,
   kAllocatorTagMaps,
   kAllocatorTagLOS,
   kAllocatorTagSafeMap,
diff --git a/runtime/base/array_slice.h b/runtime/base/array_slice.h
index 19ad302..32283d0 100644
--- a/runtime/base/array_slice.h
+++ b/runtime/base/array_slice.h
@@ -129,6 +129,10 @@
     return element_size_;
   }
 
+  bool Contains(const T* element) const {
+    return &AtUnchecked(0) <= element && element < &AtUnchecked(size_);
+  }
+
  private:
   T& AtUnchecked(size_t index) {
     return *reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(array_) + index * element_size_);
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 6f689d7..264a530 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -62,7 +62,6 @@
 Mutex* Locks::thread_suspend_count_lock_ = nullptr;
 Mutex* Locks::trace_lock_ = nullptr;
 Mutex* Locks::unexpected_signal_lock_ = nullptr;
-Mutex* Locks::lambda_table_lock_ = nullptr;
 Uninterruptible Roles::uninterruptible_;
 
 struct AllMutexData {
@@ -963,7 +962,6 @@
     DCHECK(thread_suspend_count_lock_ != nullptr);
     DCHECK(trace_lock_ != nullptr);
     DCHECK(unexpected_signal_lock_ != nullptr);
-    DCHECK(lambda_table_lock_ != nullptr);
   } else {
     // Create global locks in level order from highest lock level to lowest.
     LockLevel current_lock_level = kInstrumentEntrypointsLock;
@@ -1074,10 +1072,6 @@
     DCHECK(reference_queue_soft_references_lock_ == nullptr);
     reference_queue_soft_references_lock_ = new Mutex("ReferenceQueue soft references lock", current_lock_level);
 
-    UPDATE_CURRENT_LOCK_LEVEL(kLambdaTableLock);
-    DCHECK(lambda_table_lock_ == nullptr);
-    lambda_table_lock_ = new Mutex("lambda table lock", current_lock_level);
-
     UPDATE_CURRENT_LOCK_LEVEL(kAbortLock);
     DCHECK(abort_lock_ == nullptr);
     abort_lock_ = new Mutex("abort lock", current_lock_level, true);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 3d7624d..d0dc886 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -60,7 +60,6 @@
   kUnexpectedSignalLock,
   kThreadSuspendCountLock,
   kAbortLock,
-  kLambdaTableLock,
   kJdwpSocketLock,
   kRegionSpaceRegionLock,
   kRosAllocGlobalLock,
@@ -88,7 +87,6 @@
   kTracingUniqueMethodsLock,
   kTracingStreamingLock,
   kDeoptimizedMethodsLock,
-  kJitCodeCacheLock,
   kClassLoaderClassesLock,
   kDefaultMutexLevel,
   kMarkSweepLargeObjectLock,
@@ -99,6 +97,7 @@
   kMonitorPoolLock,
   kMethodVerifiersLock,
   kClassLinkerClassesLock,  // TODO rename.
+  kJitCodeCacheLock,
   kBreakpointLock,
   kMonitorLock,
   kMonitorListLock,
@@ -690,10 +689,6 @@
 
   // Have an exclusive logging thread.
   static Mutex* logging_lock_ ACQUIRED_AFTER(unexpected_signal_lock_);
-
-  // Allow reader-writer mutual exclusion on the boxed table of lambda objects.
-  // TODO: this should be a RW mutex lock, except that ConditionVariables don't work with it.
-  static Mutex* lambda_table_lock_ ACQUIRED_AFTER(mutator_lock_);
 };
 
 class Roles {
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index 6f0e125..48e3ceb 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -132,14 +132,14 @@
 }
 
 bool FdFile::Open(const std::string& path, int flags, mode_t mode) {
+  static_assert(O_RDONLY == 0, "Readonly flag has unexpected value.");
   CHECK_EQ(fd_, -1) << path;
-  read_only_mode_ = (flags & O_RDONLY) != 0;
+  read_only_mode_ = ((flags & O_ACCMODE) == O_RDONLY);
   fd_ = TEMP_FAILURE_RETRY(open(path.c_str(), flags, mode));
   if (fd_ == -1) {
     return false;
   }
   file_path_ = path;
-  static_assert(O_RDONLY == 0, "Readonly flag has unexpected value.");
   if (kCheckSafeUsage && (flags & (O_RDWR | O_CREAT | O_WRONLY)) != 0) {
     // Start in the base state (not flushed, not closed).
     guard_state_ = GuardState::kBase;
diff --git a/runtime/base/unix_file/fd_file_test.cc b/runtime/base/unix_file/fd_file_test.cc
index db3a44f..99ef6f7 100644
--- a/runtime/base/unix_file/fd_file_test.cc
+++ b/runtime/base/unix_file/fd_file_test.cc
@@ -53,12 +53,14 @@
   ASSERT_TRUE(file.IsOpened());
   EXPECT_GE(file.Fd(), 0);
   EXPECT_TRUE(file.IsOpened());
+  EXPECT_FALSE(file.ReadOnlyMode());
   EXPECT_EQ(0, file.Flush());
   EXPECT_EQ(0, file.Close());
   EXPECT_EQ(-1, file.Fd());
   EXPECT_FALSE(file.IsOpened());
-  FdFile file2(good_path,  O_RDONLY, true);
+  FdFile file2(good_path, O_RDONLY, true);
   EXPECT_TRUE(file2.IsOpened());
+  EXPECT_TRUE(file2.ReadOnlyMode());
   EXPECT_GE(file2.Fd(), 0);
 
   ASSERT_EQ(file2.Close(), 0);
@@ -70,6 +72,7 @@
   art::ScratchFile tmp;
   FdFile file(tmp.GetFilename(), O_RDONLY, false);
   ASSERT_TRUE(file.IsOpened());
+  EXPECT_TRUE(file.ReadOnlyMode());
   EXPECT_GE(file.Fd(), 0);
   uint8_t buffer[16];
   EXPECT_FALSE(file.ReadFully(&buffer, 4));
@@ -86,6 +89,7 @@
   FdFile file(tmp.GetFilename(), O_RDWR, false);
   ASSERT_TRUE(file.IsOpened());
   EXPECT_GE(file.Fd(), 0);
+  EXPECT_FALSE(file.ReadOnlyMode());
 
   char ignore_prefix[20] = {'a', };
   NullTerminateCharArray(ignore_prefix);
@@ -114,6 +118,7 @@
   FdFile file(tmp.GetFilename(), O_RDWR, false);
   ASSERT_GE(file.Fd(), 0);
   EXPECT_TRUE(file.IsOpened());
+  EXPECT_FALSE(file.ReadOnlyMode());
 
   const char* test_string = "This is a test string";
   size_t length = strlen(test_string) + 1;
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 3c64c81..46722ec 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1501,11 +1501,8 @@
     SHARED_REQUIRES(Locks::mutator_lock_) {
   DCHECK(error_msg != nullptr);
   std::unique_ptr<const DexFile> dex_file;
-  const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(location, nullptr);
+  const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(location, nullptr, error_msg);
   if (oat_dex_file == nullptr) {
-    *error_msg = StringPrintf("Failed finding oat dex file for %s %s",
-                              oat_file->GetLocation().c_str(),
-                              location);
     return std::unique_ptr<const DexFile>();
   }
   std::string inner_error_msg;
@@ -3563,32 +3560,40 @@
     }
     LOG(INFO) << "Loaded class " << descriptor << source;
   }
-  WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  mirror::ClassLoader* const class_loader = klass->GetClassLoader();
-  ClassTable* const class_table = InsertClassTableForClassLoader(class_loader);
-  mirror::Class* existing = class_table->Lookup(descriptor, hash);
-  if (existing != nullptr) {
-    return existing;
-  }
-  if (kIsDebugBuild &&
-      !klass->IsTemp() &&
-      class_loader == nullptr &&
-      dex_cache_boot_image_class_lookup_required_) {
-    // Check a class loaded with the system class loader matches one in the image if the class
-    // is in the image.
-    existing = LookupClassFromBootImage(descriptor);
+  {
+    WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+    mirror::ClassLoader* const class_loader = klass->GetClassLoader();
+    ClassTable* const class_table = InsertClassTableForClassLoader(class_loader);
+    mirror::Class* existing = class_table->Lookup(descriptor, hash);
     if (existing != nullptr) {
-      CHECK_EQ(klass, existing);
+      return existing;
+    }
+    if (kIsDebugBuild &&
+        !klass->IsTemp() &&
+        class_loader == nullptr &&
+        dex_cache_boot_image_class_lookup_required_) {
+      // Check a class loaded with the system class loader matches one in the image if the class
+      // is in the image.
+      existing = LookupClassFromBootImage(descriptor);
+      if (existing != nullptr) {
+        CHECK_EQ(klass, existing);
+      }
+    }
+    VerifyObject(klass);
+    class_table->InsertWithHash(klass, hash);
+    if (class_loader != nullptr) {
+      // This is necessary because we need to have the card dirtied for remembered sets.
+      Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
+    }
+    if (log_new_class_table_roots_) {
+      new_class_roots_.push_back(GcRoot<mirror::Class>(klass));
     }
   }
-  VerifyObject(klass);
-  class_table->InsertWithHash(klass, hash);
-  if (class_loader != nullptr) {
-    // This is necessary because we need to have the card dirtied for remembered sets.
-    Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
-  }
-  if (log_new_class_table_roots_) {
-    new_class_roots_.push_back(GcRoot<mirror::Class>(klass));
+  if (kIsDebugBuild) {
+    // Test that copied methods correctly can find their holder.
+    for (ArtMethod& method : klass->GetCopiedMethods(image_pointer_size_)) {
+      CHECK_EQ(GetHoldingClassOfCopiedMethod(&method), klass);
+    }
   }
   return nullptr;
 }
@@ -8108,19 +8113,27 @@
 
 void ClassLinker::CleanupClassLoaders() {
   Thread* const self = Thread::Current();
-  WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-  for (auto it = class_loaders_.begin(); it != class_loaders_.end(); ) {
-    const ClassLoaderData& data = *it;
-    // Need to use DecodeJObject so that we get null for cleared JNI weak globals.
-    auto* const class_loader = down_cast<mirror::ClassLoader*>(self->DecodeJObject(data.weak_root));
-    if (class_loader != nullptr) {
-      ++it;
-    } else {
-      VLOG(class_linker) << "Freeing class loader";
-      DeleteClassLoader(self, data);
-      it = class_loaders_.erase(it);
+  std::vector<ClassLoaderData> to_delete;
+  // Do the delete outside the lock to avoid lock violation in jit code cache.
+  {
+    WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+    for (auto it = class_loaders_.begin(); it != class_loaders_.end(); ) {
+      const ClassLoaderData& data = *it;
+      // Need to use DecodeJObject so that we get null for cleared JNI weak globals.
+      auto* const class_loader =
+          down_cast<mirror::ClassLoader*>(self->DecodeJObject(data.weak_root));
+      if (class_loader != nullptr) {
+        ++it;
+      } else {
+        VLOG(class_linker) << "Freeing class loader";
+        to_delete.push_back(data);
+        it = class_loaders_.erase(it);
+      }
     }
   }
+  for (ClassLoaderData& data : to_delete) {
+    DeleteClassLoader(self, data);
+  }
 }
 
 std::set<DexCacheResolvedClasses> ClassLinker::GetResolvedClasses(bool ignore_boot_classes) {
@@ -8239,6 +8252,33 @@
   return ret;
 }
 
+class ClassLinker::FindVirtualMethodHolderVisitor : public ClassVisitor {
+ public:
+  FindVirtualMethodHolderVisitor(const ArtMethod* method, PointerSize pointer_size)
+      : method_(method),
+        pointer_size_(pointer_size) {}
+
+  bool operator()(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) OVERRIDE {
+    if (klass->GetVirtualMethodsSliceUnchecked(pointer_size_).Contains(method_)) {
+      holder_ = klass;
+    }
+    // Return false to stop searching if holder_ is not null.
+    return holder_ == nullptr;
+  }
+
+  mirror::Class* holder_ = nullptr;
+  const ArtMethod* const method_;
+  const PointerSize pointer_size_;
+};
+
+mirror::Class* ClassLinker::GetHoldingClassOfCopiedMethod(ArtMethod* method) {
+  ScopedTrace trace(__FUNCTION__);  // Since this function is slow, have a trace to notify people.
+  CHECK(method->IsCopied());
+  FindVirtualMethodHolderVisitor visitor(method, image_pointer_size_);
+  VisitClasses(&visitor);
+  return visitor.holder_;
+}
+
 // Instantiate ResolveMethod.
 template ArtMethod* ClassLinker::ResolveMethod<ClassLinker::kForceICCECheck>(
     const DexFile& dex_file,
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index fcc6b23..c3ab8c5 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -648,6 +648,10 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
+  // Get the actual holding class for a copied method. Pretty slow, don't call often.
+  mirror::Class* GetHoldingClassOfCopiedMethod(ArtMethod* method)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   struct DexCacheData {
     // Weak root to the DexCache. Note: Do not decode this unnecessarily or else class unloading may
     // not work properly.
@@ -676,7 +680,6 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   static void DeleteClassLoader(Thread* self, const ClassLoaderData& data)
-      REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void VisitClassLoaders(ClassLoaderVisitor* visitor) const
@@ -1168,6 +1171,7 @@
   // Image pointer size.
   PointerSize image_pointer_size_;
 
+  class FindVirtualMethodHolderVisitor;
   friend class ImageDumper;  // for DexLock
   friend class ImageWriter;  // for GetClassRoots
   friend class JniCompilerTest;  // for GetRuntimeQuickGenericJniStub
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index 912a74a..99732c6 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -640,14 +640,6 @@
                      "Attempt to write to null array");
       break;
     }
-    case Instruction::INVOKE_LAMBDA:
-    case Instruction::BOX_LAMBDA:
-    case Instruction::UNBOX_LAMBDA:
-    case Instruction::LIBERATE_VARIABLE: {
-      ThrowException("Ljava/lang/NullPointerException;", nullptr,
-                     "Using a null lambda");
-      break;
-    }
     case Instruction::MONITOR_ENTER:
     case Instruction::MONITOR_EXIT: {
       ThrowException("Ljava/lang/NullPointerException;", nullptr,
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 9f3ff3f..2a5198b 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -1454,6 +1454,15 @@
   }
 }
 
+static size_t GetMethodNumArgRegistersIncludingThis(ArtMethod* method)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  uint32_t num_registers = ArtMethod::NumArgRegisters(method->GetShorty());
+  if (!method->IsStatic()) {
+    ++num_registers;
+  }
+  return num_registers;
+}
+
 /*
  * Circularly shifts registers so that arguments come last. Reverts
  * slots to dex style argument placement.
@@ -1465,7 +1474,7 @@
     // We should not get here for a method without code (native, proxy or abstract). Log it and
     // return the slot as is since all registers are arguments.
     LOG(WARNING) << "Trying to demangle slot for method without code " << PrettyMethod(m);
-    uint16_t vreg_count = ArtMethod::NumArgRegisters(m->GetShorty());
+    uint16_t vreg_count = GetMethodNumArgRegistersIncludingThis(m);
     if (slot < vreg_count) {
       *error = JDWP::ERR_NONE;
       return slot;
@@ -1637,8 +1646,7 @@
 
   // arg_count considers doubles and longs to take 2 units.
   // variable_count considers everything to take 1 unit.
-  std::string shorty(m->GetShorty());
-  expandBufAdd4BE(pReply, ArtMethod::NumArgRegisters(shorty));
+  expandBufAdd4BE(pReply, GetMethodNumArgRegistersIncludingThis(m));
 
   // We don't know the total number of variables yet, so leave a blank and update it later.
   size_t variable_count_offset = expandBufGetLength(pReply);
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 16087a5..a6eb5f6 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -63,7 +63,9 @@
   {'0', '3', '5', '\0'},
   // Dex version 036 skipped because of an old dalvik bug on some versions of android where dex
   // files with that version number would erroneously be accepted and run.
-  {'0', '3', '7', '\0'}
+  {'0', '3', '7', '\0'},
+  // Dex version 038: Android "O" and beyond.
+  {'0', '3', '8', '\0'}
 };
 
 bool DexFile::GetChecksum(const char* filename, uint32_t* checksum, std::string* error_msg) {
@@ -336,6 +338,11 @@
     *error_code = ZipOpenErrorCode::kEntryNotFound;
     return nullptr;
   }
+  if (zip_entry->GetUncompressedLength() == 0) {
+    *error_msg = StringPrintf("Dex file '%s' has zero length", location.c_str());
+    *error_code = ZipOpenErrorCode::kDexFileError;
+    return nullptr;
+  }
   std::unique_ptr<MemMap> map(zip_entry->ExtractToMemMap(location.c_str(), entry_name, error_msg));
   if (map.get() == nullptr) {
     *error_msg = StringPrintf("Failed to extract '%s' from '%s': %s", entry_name, location.c_str(),
@@ -433,6 +440,8 @@
                                                    MemMap* mem_map,
                                                    const OatDexFile* oat_dex_file,
                                                    std::string* error_msg) {
+  DCHECK(base != nullptr);
+  DCHECK_NE(size, 0U);
   CHECK_ALIGNED(base, 4);  // various dex file structures must be word aligned
   std::unique_ptr<DexFile> dex_file(
       new DexFile(base, size, location, location_checksum, mem_map, oat_dex_file));
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 3dffe4b..2eca495 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -63,7 +63,7 @@
   static const uint32_t kClassDefinitionOrderEnforcedVersion = 37;
 
   static const uint8_t kDexMagic[];
-  static constexpr size_t kNumDexVersions = 2;
+  static constexpr size_t kNumDexVersions = 3;
   static constexpr size_t kDexVersionLen = 4;
   static const uint8_t kDexMagicVersions[kNumDexVersions][kDexVersionLen];
 
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index 4f8e6f1..2704d8a 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -133,8 +133,46 @@
   "AAACAAAAQAEAAAEgAAACAAAAVAEAAAYgAAACAAAAiAEAAAEQAAABAAAAqAEAAAIgAAAPAAAArgEA"
   "AAMgAAACAAAAiAIAAAQgAAADAAAAlAIAAAAgAAACAAAAqwIAAAAQAAABAAAAxAIAAA==";
 
-static std::unique_ptr<const DexFile> OpenDexFileBase64(const char* base64,
-                                                        const char* location) {
+// kRawDex38 and 39 are dex'ed versions of the following Java source :
+//
+// public class Main {
+//     public static void main(String[] foo) {
+//     }
+// }
+//
+// The dex file was manually edited to change its dex version code to 38
+// or 39, respectively.
+static const char kRawDex38[] =
+  "ZGV4CjAzOAC4OovJlJ1089ikzK6asMf/f8qp3Kve5VsgAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAI"
+  "AAAAcAAAAAQAAACQAAAAAgAAAKAAAAAAAAAAAAAAAAMAAAC4AAAAAQAAANAAAAAwAQAA8AAAACIB"
+  "AAAqAQAAMgEAAEYBAABRAQAAVAEAAFgBAABtAQAAAQAAAAIAAAAEAAAABgAAAAQAAAACAAAAAAAA"
+  "AAUAAAACAAAAHAEAAAAAAAAAAAAAAAABAAcAAAABAAAAAAAAAAAAAAABAAAAAQAAAAAAAAADAAAA"
+  "AAAAAH4BAAAAAAAAAQABAAEAAABzAQAABAAAAHAQAgAAAA4AAQABAAAAAAB4AQAAAQAAAA4AAAAB"
+  "AAAAAwAGPGluaXQ+AAZMTWFpbjsAEkxqYXZhL2xhbmcvT2JqZWN0OwAJTWFpbi5qYXZhAAFWAAJW"
+  "TAATW0xqYXZhL2xhbmcvU3RyaW5nOwAEbWFpbgABAAcOAAMBAAcOAAAAAgAAgYAE8AEBCYgCDAAA"
+  "AAAAAAABAAAAAAAAAAEAAAAIAAAAcAAAAAIAAAAEAAAAkAAAAAMAAAACAAAAoAAAAAUAAAADAAAA"
+  "uAAAAAYAAAABAAAA0AAAAAEgAAACAAAA8AAAAAEQAAABAAAAHAEAAAIgAAAIAAAAIgEAAAMgAAAC"
+  "AAAAcwEAAAAgAAABAAAAfgEAAAAQAAABAAAAjAEAAA==";
+
+static const char kRawDex39[] =
+  "ZGV4CjAzOQC4OovJlJ1089ikzK6asMf/f8qp3Kve5VsgAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAI"
+  "AAAAcAAAAAQAAACQAAAAAgAAAKAAAAAAAAAAAAAAAAMAAAC4AAAAAQAAANAAAAAwAQAA8AAAACIB"
+  "AAAqAQAAMgEAAEYBAABRAQAAVAEAAFgBAABtAQAAAQAAAAIAAAAEAAAABgAAAAQAAAACAAAAAAAA"
+  "AAUAAAACAAAAHAEAAAAAAAAAAAAAAAABAAcAAAABAAAAAAAAAAAAAAABAAAAAQAAAAAAAAADAAAA"
+  "AAAAAH4BAAAAAAAAAQABAAEAAABzAQAABAAAAHAQAgAAAA4AAQABAAAAAAB4AQAAAQAAAA4AAAAB"
+  "AAAAAwAGPGluaXQ+AAZMTWFpbjsAEkxqYXZhL2xhbmcvT2JqZWN0OwAJTWFpbi5qYXZhAAFWAAJW"
+  "TAATW0xqYXZhL2xhbmcvU3RyaW5nOwAEbWFpbgABAAcOAAMBAAcOAAAAAgAAgYAE8AEBCYgCDAAA"
+  "AAAAAAABAAAAAAAAAAEAAAAIAAAAcAAAAAIAAAAEAAAAkAAAAAMAAAACAAAAoAAAAAUAAAADAAAA"
+  "uAAAAAYAAAABAAAA0AAAAAEgAAACAAAA8AAAAAEQAAABAAAAHAEAAAIgAAAIAAAAIgEAAAMgAAAC"
+  "AAAAcwEAAAAgAAABAAAAfgEAAAAQAAABAAAAjAEAAA==";
+
+static const char kRawDexZeroLength[] =
+  "UEsDBAoAAAAAAOhxAkkAAAAAAAAAAAAAAAALABwAY2xhc3Nlcy5kZXhVVAkAA2QNoVdnDaFXdXgL"
+  "AAEE5AMBAASIEwAAUEsBAh4DCgAAAAAA6HECSQAAAAAAAAAAAAAAAAsAGAAAAAAAAAAAAKCBAAAA"
+  "AGNsYXNzZXMuZGV4VVQFAANkDaFXdXgLAAEE5AMBAASIEwAAUEsFBgAAAAABAAEAUQAAAEUAAAAA"
+  "AA==";
+
+static void DecodeAndWriteDexFile(const char* base64, const char* location) {
   // decode base64
   CHECK(base64 != nullptr);
   size_t length;
@@ -150,7 +188,11 @@
   if (file->FlushCloseOrErase() != 0) {
     PLOG(FATAL) << "Could not flush and close test file.";
   }
-  file.reset();
+}
+
+static std::unique_ptr<const DexFile> OpenDexFileBase64(const char* base64,
+                                                        const char* location) {
+  DecodeAndWriteDexFile(base64, location);
 
   // read dex file
   ScopedObjectAccess soa(Thread::Current());
@@ -197,6 +239,39 @@
   EXPECT_EQ(header.checksum_, raw->GetLocationChecksum());
 }
 
+TEST_F(DexFileTest, Version38Accepted) {
+  ScratchFile tmp;
+  std::unique_ptr<const DexFile> raw(OpenDexFileBase64(kRawDex38, tmp.GetFilename().c_str()));
+  ASSERT_TRUE(raw.get() != nullptr);
+
+  const DexFile::Header& header = raw->GetHeader();
+  EXPECT_EQ(38u, header.GetVersion());
+}
+
+TEST_F(DexFileTest, Version39Rejected) {
+  ScratchFile tmp;
+  const char* location = tmp.GetFilename().c_str();
+  DecodeAndWriteDexFile(kRawDex39, location);
+
+  ScopedObjectAccess soa(Thread::Current());
+  static constexpr bool kVerifyChecksum = true;
+  std::string error_msg;
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  ASSERT_FALSE(DexFile::Open(location, location, kVerifyChecksum, &error_msg, &dex_files));
+}
+
+TEST_F(DexFileTest, ZeroLengthDexRejected) {
+  ScratchFile tmp;
+  const char* location = tmp.GetFilename().c_str();
+  DecodeAndWriteDexFile(kRawDexZeroLength, location);
+
+  ScopedObjectAccess soa(Thread::Current());
+  static constexpr bool kVerifyChecksum = true;
+  std::string error_msg;
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  ASSERT_FALSE(DexFile::Open(location, location, kVerifyChecksum, &error_msg, &dex_files));
+}
+
 TEST_F(DexFileTest, GetLocationChecksum) {
   ScopedObjectAccess soa(Thread::Current());
   std::unique_ptr<const DexFile> raw(OpenTestDexFile("Main"));
diff --git a/runtime/dex_instruction-inl.h b/runtime/dex_instruction-inl.h
index e160a10..3d0fea0 100644
--- a/runtime/dex_instruction-inl.h
+++ b/runtime/dex_instruction-inl.h
@@ -49,6 +49,8 @@
     case k32x: return true;
     case k35c: return true;
     case k3rc: return true;
+    case k45cc: return true;
+    case k4rcc: return true;
     case k51l: return true;
     default: return false;
   }
@@ -79,6 +81,8 @@
     case k32x: return VRegA_32x();
     case k35c: return VRegA_35c();
     case k3rc: return VRegA_3rc();
+    case k45cc: return VRegA_45cc();
+    case k4rcc: return VRegA_4rcc();
     case k51l: return VRegA_51l();
     default:
       LOG(FATAL) << "Tried to access vA of instruction " << Name() << " which has no A operand.";
@@ -206,6 +210,16 @@
   return InstAA(inst_data);
 }
 
+inline uint4_t Instruction::VRegA_45cc(uint16_t inst_data) const {
+  DCHECK_EQ(FormatOf(Opcode()), k45cc);
+  return InstB(inst_data);  // This is labeled A in the spec.
+}
+
+inline uint8_t Instruction::VRegA_4rcc(uint16_t inst_data) const {
+  DCHECK_EQ(FormatOf(Opcode()), k4rcc);
+  return InstAA(inst_data);
+}
+
 //------------------------------------------------------------------------------
 // VRegB
 //------------------------------------------------------------------------------
@@ -223,13 +237,14 @@
     case k22t: return true;
     case k22x: return true;
     case k23x: return true;
-    case k25x: return true;
     case k31c: return true;
     case k31i: return true;
     case k31t: return true;
     case k32x: return true;
     case k35c: return true;
     case k3rc: return true;
+    case k45cc: return true;
+    case k4rcc: return true;
     case k51l: return true;
     default: return false;
   }
@@ -253,13 +268,14 @@
     case k22t: return VRegB_22t();
     case k22x: return VRegB_22x();
     case k23x: return VRegB_23x();
-    case k25x: return VRegB_25x();
     case k31c: return VRegB_31c();
     case k31i: return VRegB_31i();
     case k31t: return VRegB_31t();
     case k32x: return VRegB_32x();
     case k35c: return VRegB_35c();
     case k3rc: return VRegB_3rc();
+    case k45cc: return VRegB_45cc();
+    case k4rcc: return VRegB_4rcc();
     case k51l: return VRegB_51l();
     default:
       LOG(FATAL) << "Tried to access vB of instruction " << Name() << " which has no B operand.";
@@ -331,12 +347,6 @@
   return static_cast<uint8_t>(Fetch16(1) & 0xff);
 }
 
-// Number of additional registers in this instruction. # of var arg registers = this value + 1.
-inline uint4_t Instruction::VRegB_25x() const {
-  DCHECK_EQ(FormatOf(Opcode()), k25x);
-  return InstB(Fetch16(0));
-}
-
 inline uint32_t Instruction::VRegB_31c() const {
   DCHECK_EQ(FormatOf(Opcode()), k31c);
   return Fetch32(1);
@@ -367,6 +377,16 @@
   return Fetch16(1);
 }
 
+inline uint16_t Instruction::VRegB_45cc() const {
+  DCHECK_EQ(FormatOf(Opcode()), k45cc);
+  return Fetch16(1);
+}
+
+inline uint16_t Instruction::VRegB_4rcc() const {
+  DCHECK_EQ(FormatOf(Opcode()), k4rcc);
+  return Fetch16(1);
+}
+
 inline uint64_t Instruction::VRegB_51l() const {
   DCHECK_EQ(FormatOf(Opcode()), k51l);
   uint64_t vB_wide = Fetch32(1) | ((uint64_t) Fetch32(3) << 32);
@@ -383,9 +403,10 @@
     case k22s: return true;
     case k22t: return true;
     case k23x: return true;
-    case k25x: return true;
     case k35c: return true;
     case k3rc: return true;
+    case k45cc: return true;
+    case k4rcc: return true;
     default: return false;
   }
 }
@@ -397,9 +418,10 @@
     case k22s: return VRegC_22s();
     case k22t: return VRegC_22t();
     case k23x: return VRegC_23x();
-    case k25x: return VRegC_25x();
     case k35c: return VRegC_35c();
     case k3rc: return VRegC_3rc();
+    case k45cc: return VRegC_45cc();
+    case k4rcc: return VRegC_4rcc();
     default:
       LOG(FATAL) << "Tried to access vC of instruction " << Name() << " which has no C operand.";
       exit(EXIT_FAILURE);
@@ -431,11 +453,6 @@
   return static_cast<uint8_t>(Fetch16(1) >> 8);
 }
 
-inline uint4_t Instruction::VRegC_25x() const {
-  DCHECK_EQ(FormatOf(Opcode()), k25x);
-  return static_cast<uint4_t>(Fetch16(1) & 0xf);
-}
-
 inline uint4_t Instruction::VRegC_35c() const {
   DCHECK_EQ(FormatOf(Opcode()), k35c);
   return static_cast<uint4_t>(Fetch16(2) & 0x0f);
@@ -446,79 +463,51 @@
   return Fetch16(2);
 }
 
-inline bool Instruction::HasVarArgs35c() const {
-  return FormatOf(Opcode()) == k35c;
+inline uint4_t Instruction::VRegC_45cc() const {
+  DCHECK_EQ(FormatOf(Opcode()), k45cc);
+  return static_cast<uint4_t>(Fetch16(2) & 0x0f);
 }
 
-inline bool Instruction::HasVarArgs25x() const {
-  return FormatOf(Opcode()) == k25x;
+inline uint16_t Instruction::VRegC_4rcc() const {
+  DCHECK_EQ(FormatOf(Opcode()), k4rcc);
+  return Fetch16(2);
 }
 
-// Copies all of the parameter registers into the arg array. Check the length with VRegB_25x()+2.
-inline void Instruction::GetAllArgs25x(uint32_t (&arg)[kMaxVarArgRegs25x]) const {
-  DCHECK_EQ(FormatOf(Opcode()), k25x);
-
-  /*
-   * The opcode looks like this:
-   *   op vC, {vD, vE, vF, vG}
-   *
-   *  and vB is the (implicit) register count (0-4) which denotes how far from vD to vG to read.
-   *
-   *  vC is always present, so with "op vC, {}" the register count will be 0 even though vC
-   *  is valid.
-   *
-   *  The exact semantic meanings of vC:vG is up to the instruction using the format.
-   *
-   *  Encoding drawing as a bit stream:
-   *  (Note that each uint16 is little endian, and each register takes up 4 bits)
-   *
-   *       uint16  |||   uint16
-   *   7-0     15-8    7-0   15-8
-   *  |------|-----|||-----|-----|
-   *  |opcode|vB|vG|||vD|vC|vF|vE|
-   *  |------|-----|||-----|-----|
-   */
-  uint16_t reg_list = Fetch16(1);
-  uint4_t count = VRegB_25x();
-  DCHECK_LE(count, 4U) << "Invalid arg count in 25x (" << count << ")";
-
-  /*
-   * TODO(iam): Change instruction encoding to one of:
-   *
-   * - (X) vA = args count, vB = closure register, {vC..vG} = args (25x)
-   * - (Y) vA = args count, vB = method index, {vC..vG} = args (35x)
-   *
-   * (do this in conjunction with adding verifier support for invoke-lambda)
-   */
-
-  /*
-   * Copy the argument registers into the arg[] array, and
-   * also copy the first argument into vC. (The
-   * DecodedInstruction structure doesn't have separate
-   * fields for {vD, vE, vF, vG}, so there's no need to make
-   * copies of those.) Note that all cases fall-through.
-   */
-  switch (count) {
-    case 4:
-      arg[5] = (Fetch16(0) >> 8) & 0x0f;  // vG
-      FALLTHROUGH_INTENDED;
-    case 3:
-      arg[4] = (reg_list >> 12) & 0x0f;  // vF
-      FALLTHROUGH_INTENDED;
-    case 2:
-      arg[3] = (reg_list >> 8) & 0x0f;  // vE
-      FALLTHROUGH_INTENDED;
-    case 1:
-      arg[2] = (reg_list >> 4) & 0x0f;  // vD
-      FALLTHROUGH_INTENDED;
-    default:  // case 0
-      // The required lambda 'this' is actually a pair, but the pair is implicit.
-      arg[0] = VRegC_25x();  // vC
-      arg[1] = arg[0] + 1;   // vC + 1
-      break;
+//------------------------------------------------------------------------------
+// VRegH
+//------------------------------------------------------------------------------
+inline bool Instruction::HasVRegH() const {
+  switch (FormatOf(Opcode())) {
+    case k45cc: return true;
+    case k4rcc: return true;
+    default : return false;
   }
 }
 
+inline int32_t Instruction::VRegH() const {
+  switch (FormatOf(Opcode())) {
+    case k45cc: return VRegH_45cc();
+    case k4rcc: return VRegH_4rcc();
+    default :
+      LOG(FATAL) << "Tried to access vH of instruction " << Name() << " which has no H operand.";
+      exit(EXIT_FAILURE);
+  }
+}
+
+inline uint16_t Instruction::VRegH_45cc() const {
+  DCHECK_EQ(FormatOf(Opcode()), k45cc);
+  return Fetch16(3);
+}
+
+inline uint16_t Instruction::VRegH_4rcc() const {
+  DCHECK_EQ(FormatOf(Opcode()), k4rcc);
+  return Fetch16(3);
+}
+
+inline bool Instruction::HasVarArgs() const {
+  return FormatOf(Opcode()) == k35c;
+}
+
 inline void Instruction::GetVarArgs(uint32_t arg[kMaxVarArgRegs], uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k35c);
 
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 300e618..c31d236 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -28,7 +28,7 @@
 namespace art {
 
 const char* const Instruction::kInstructionNames[] = {
-#define INSTRUCTION_NAME(o, c, pname, f, r, i, a, v) pname,
+#define INSTRUCTION_NAME(o, c, pname, f, i, a, v) pname,
 #include "dex_instruction_list.h"
   DEX_INSTRUCTION_LIST(INSTRUCTION_NAME)
 #undef DEX_INSTRUCTION_LIST
@@ -36,7 +36,7 @@
 };
 
 Instruction::Format const Instruction::kInstructionFormats[] = {
-#define INSTRUCTION_FORMAT(o, c, p, format, r, i, a, v) format,
+#define INSTRUCTION_FORMAT(o, c, p, format, i, a, v) format,
 #include "dex_instruction_list.h"
   DEX_INSTRUCTION_LIST(INSTRUCTION_FORMAT)
 #undef DEX_INSTRUCTION_LIST
@@ -44,7 +44,7 @@
 };
 
 Instruction::IndexType const Instruction::kInstructionIndexTypes[] = {
-#define INSTRUCTION_INDEX_TYPE(o, c, p, f, r, index, a, v) index,
+#define INSTRUCTION_INDEX_TYPE(o, c, p, f, index, a, v) index,
 #include "dex_instruction_list.h"
   DEX_INSTRUCTION_LIST(INSTRUCTION_INDEX_TYPE)
 #undef DEX_INSTRUCTION_LIST
@@ -52,7 +52,7 @@
 };
 
 int const Instruction::kInstructionFlags[] = {
-#define INSTRUCTION_FLAGS(o, c, p, f, r, i, flags, v) flags,
+#define INSTRUCTION_FLAGS(o, c, p, f, i, flags, v) flags,
 #include "dex_instruction_list.h"
   DEX_INSTRUCTION_LIST(INSTRUCTION_FLAGS)
 #undef DEX_INSTRUCTION_LIST
@@ -60,7 +60,7 @@
 };
 
 int const Instruction::kInstructionVerifyFlags[] = {
-#define INSTRUCTION_VERIFY_FLAGS(o, c, p, f, r, i, a, vflags) vflags,
+#define INSTRUCTION_VERIFY_FLAGS(o, c, p, f, i, a, vflags) vflags,
 #include "dex_instruction_list.h"
   DEX_INSTRUCTION_LIST(INSTRUCTION_VERIFY_FLAGS)
 #undef DEX_INSTRUCTION_LIST
@@ -68,12 +68,13 @@
 };
 
 int const Instruction::kInstructionSizeInCodeUnits[] = {
-#define INSTRUCTION_SIZE(opcode, c, p, format, r, i, a, v) \
-    (((opcode) == NOP)                        ? -1 :       \
-     (((format) >= k10x) && ((format) <= k10t)) ?  1 :     \
-     (((format) >= k20t) && ((format) <= k25x)) ?  2 :     \
-     (((format) >= k32x) && ((format) <= k3rc)) ?  3 :     \
-      ((format) == k51l)                      ?  5 : -1),
+#define INSTRUCTION_SIZE(opcode, c, p, format, i, a, v) \
+    (((opcode) == NOP) ? -1 : \
+     (((format) >= k10x) && ((format) <= k10t)) ?  1 : \
+     (((format) >= k20t) && ((format) <= k22c)) ?  2 : \
+     (((format) >= k32x) && ((format) <= k3rc)) ?  3 : \
+     (((format) >= k45cc) && ((format) <= k4rcc)) ? 4 : \
+      ((format) == k51l) ?  5 : -1),
 #include "dex_instruction_list.h"
   DEX_INSTRUCTION_LIST(INSTRUCTION_SIZE)
 #undef DEX_INSTRUCTION_LIST
@@ -241,14 +242,6 @@
             break;
           }
           FALLTHROUGH_INTENDED;
-        case CREATE_LAMBDA:
-          if (file != nullptr) {
-            uint32_t method_idx = VRegB_21c();
-            os << opcode << " v" << static_cast<int>(VRegA_21c()) << ", " << PrettyMethod(method_idx, *file, true)
-               << " // method@" << method_idx;
-            break;
-          }
-          FALLTHROUGH_INTENDED;
         default:
           os << StringPrintf("%s v%d, thing@%d", opcode, VRegA_21c(), VRegB_21c());
           break;
@@ -329,26 +322,6 @@
       }
       break;
     }
-    case k25x: {
-      if (Opcode() == INVOKE_LAMBDA) {
-        uint32_t arg[kMaxVarArgRegs25x];
-        GetAllArgs25x(arg);
-        const size_t num_extra_var_args = VRegB_25x();
-        DCHECK_LE(num_extra_var_args + 2, arraysize(arg));
-
-        // invoke-lambda vC, {vD, vE, vF, vG}
-        os << opcode << " v" << arg[0] << ", {";
-        for (size_t i = 0; i < num_extra_var_args; ++i) {
-          if (i != 0) {
-            os << ", ";
-          }
-          os << "v" << arg[i + 2];  // Don't print the pair of vC registers. Pair is implicit.
-        }
-        os << "}";
-        break;
-      }
-      FALLTHROUGH_INTENDED;
-    }
     case k32x:  os << StringPrintf("%s v%d, v%d", opcode, VRegA_32x(), VRegB_32x()); break;
     case k30t:  os << StringPrintf("%s %+d", opcode, VRegA_30t()); break;
     case k31t:  os << StringPrintf("%s v%d, %+d", opcode, VRegA_31t(), VRegB_31t()); break;
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index 89c3db6..f437fde 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -80,7 +80,7 @@
   };
 
   enum Code {  // private marker to avoid generate-operator-out.py from processing.
-#define INSTRUCTION_ENUM(opcode, cname, p, f, r, i, a, v) cname = (opcode),
+#define INSTRUCTION_ENUM(opcode, cname, p, f, i, a, v) cname = (opcode),
 #include "dex_instruction_list.h"
     DEX_INSTRUCTION_LIST(INSTRUCTION_ENUM)
 #undef DEX_INSTRUCTION_LIST
@@ -105,7 +105,6 @@
     k22t,  // op vA, vB, +CCCC
     k22s,  // op vA, vB, #+CCCC
     k22c,  // op vA, vB, thing@CCCC
-    k25x,  // op vC, {vD, vE, vF, vG} (B: count)
     k32x,  // op vAAAA, vBBBB
     k30t,  // op +AAAAAAAA
     k31t,  // op vAA, +BBBBBBBB
@@ -113,6 +112,15 @@
     k31c,  // op vAA, thing@BBBBBBBB
     k35c,  // op {vC, vD, vE, vF, vG}, thing@BBBB (B: count, A: vG)
     k3rc,  // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB
+
+    // op {vC, vD, vE, vF, vG}, meth@BBBB, proto@HHHH (A: count)
+    // format: AG op BBBB FEDC HHHH
+    k45cc,
+
+    // op {VCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH (AA: count)
+    // format: AA op BBBB CCCC HHHH
+    k4rcc,  // op {VCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH (AA: count)
+
     k51l,  // op vAA, #+BBBBBBBBBBBBBBBB
   };
 
@@ -180,12 +188,9 @@
     kVerifyVarArgRangeNonZero = 0x100000,
     kVerifyRuntimeOnly        = 0x200000,
     kVerifyError              = 0x400000,
-    kVerifyRegCString         = 0x800000,
   };
 
   static constexpr uint32_t kMaxVarArgRegs = 5;
-  static constexpr uint32_t kMaxVarArgRegs25x = 6;  // lambdas are 2 registers.
-  static constexpr uint32_t kLambdaVirtualRegisterWidth = 2;
 
   // Returns the size (in 2 byte code units) of this instruction.
   size_t SizeInCodeUnits() const {
@@ -221,7 +226,7 @@
 
   // Returns a pointer to the instruction after this 2xx instruction in the stream.
   const Instruction* Next_2xx() const {
-    DCHECK(FormatOf(Opcode()) >= k20t && FormatOf(Opcode()) <= k25x);
+    DCHECK(FormatOf(Opcode()) >= k20t && FormatOf(Opcode()) <= k22c);
     return RelativeAt(2);
   }
 
@@ -231,6 +236,12 @@
     return RelativeAt(3);
   }
 
+  // Returns a pointer to the instruction after this 4xx instruction in the stream.
+  const Instruction* Next_4xx() const {
+    DCHECK(FormatOf(Opcode()) >= k45cc && FormatOf(Opcode()) <= k4rcc);
+    return RelativeAt(4);
+  }
+
   // Returns a pointer to the instruction after this 51l instruction in the stream.
   const Instruction* Next_51l() const {
     DCHECK(FormatOf(Opcode()) == k51l);
@@ -317,6 +328,12 @@
   uint8_t VRegA_51l() const {
     return VRegA_51l(Fetch16(0));
   }
+  uint4_t VRegA_45cc() const {
+    return VRegA_45cc(Fetch16(0));
+  }
+  uint8_t VRegA_4rcc() const {
+    return VRegA_4rcc(Fetch16(0));
+  }
 
   // The following methods return the vA operand for various instruction formats. The "inst_data"
   // parameter holds the first 16 bits of instruction which the returned value is decoded from.
@@ -341,6 +358,8 @@
   uint4_t VRegA_35c(uint16_t inst_data) const;
   uint8_t VRegA_3rc(uint16_t inst_data) const;
   uint8_t VRegA_51l(uint16_t inst_data) const;
+  uint4_t VRegA_45cc(uint16_t inst_data) const;
+  uint8_t VRegA_4rcc(uint16_t inst_data) const;
 
   // VRegB
   bool HasVRegB() const;
@@ -371,7 +390,6 @@
   }
   uint16_t VRegB_22x() const;
   uint8_t VRegB_23x() const;
-  uint4_t VRegB_25x() const;
   uint32_t VRegB_31c() const;
   int32_t VRegB_31i() const;
   int32_t VRegB_31t() const;
@@ -379,6 +397,8 @@
   uint16_t VRegB_35c() const;
   uint16_t VRegB_3rc() const;
   uint64_t VRegB_51l() const;  // vB_wide
+  uint16_t VRegB_45cc() const;
+  uint16_t VRegB_4rcc() const;
 
   // The following methods return the vB operand for all instruction formats where it is encoded in
   // the first 16 bits of instruction. The "inst_data" parameter holds these 16 bits. The returned
@@ -398,20 +418,24 @@
   int16_t VRegC_22s() const;
   int16_t VRegC_22t() const;
   uint8_t VRegC_23x() const;
-  uint4_t VRegC_25x() const;
   uint4_t VRegC_35c() const;
   uint16_t VRegC_3rc() const;
+  uint4_t VRegC_45cc() const;
+  uint16_t VRegC_4rcc() const;
+
+
+  // VRegH
+  bool HasVRegH() const;
+  int32_t VRegH() const;
+  uint16_t VRegH_45cc() const;
+  uint16_t VRegH_4rcc() const;
 
   // Fills the given array with the 'arg' array of the instruction.
-  bool HasVarArgs35c() const;
-  bool HasVarArgs25x() const;
-
-  // TODO(iam): Make this name more consistent with GetAllArgs25x by including the opcode format.
+  bool HasVarArgs() const;
   void GetVarArgs(uint32_t args[kMaxVarArgRegs], uint16_t inst_data) const;
   void GetVarArgs(uint32_t args[kMaxVarArgRegs]) const {
     return GetVarArgs(args, Fetch16(0));
   }
-  void GetAllArgs25x(uint32_t (&args)[kMaxVarArgRegs25x]) const;
 
   // Returns the opcode field of the instruction. The given "inst_data" parameter must be the first
   // 16 bits of instruction.
@@ -539,7 +563,7 @@
 
   int GetVerifyTypeArgumentC() const {
     return (kInstructionVerifyFlags[Opcode()] & (kVerifyRegC | kVerifyRegCField |
-        kVerifyRegCNewArray | kVerifyRegCType | kVerifyRegCWide | kVerifyRegCString));
+        kVerifyRegCNewArray | kVerifyRegCType | kVerifyRegCWide));
   }
 
   int GetVerifyExtraFlags() const {
diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h
index 9d7e0c4..e974932 100644
--- a/runtime/dex_instruction_list.h
+++ b/runtime/dex_instruction_list.h
@@ -17,264 +17,265 @@
 #ifndef ART_RUNTIME_DEX_INSTRUCTION_LIST_H_
 #define ART_RUNTIME_DEX_INSTRUCTION_LIST_H_
 
+// V(opcode, instruction_code, name, format, index, flags, verifier_flags);
 #define DEX_INSTRUCTION_LIST(V) \
-  V(0x00, NOP, "nop", k10x, false, kIndexNone, kContinue, kVerifyNone) \
-  V(0x01, MOVE, "move", k12x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x02, MOVE_FROM16, "move/from16", k22x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x03, MOVE_16, "move/16", k32x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x04, MOVE_WIDE, "move-wide", k12x, true, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x05, MOVE_WIDE_FROM16, "move-wide/from16", k22x, true, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x06, MOVE_WIDE_16, "move-wide/16", k32x, true, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x07, MOVE_OBJECT, "move-object", k12x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x08, MOVE_OBJECT_FROM16, "move-object/from16", k22x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x09, MOVE_OBJECT_16, "move-object/16", k32x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x0A, MOVE_RESULT, "move-result", k11x, true, kIndexNone, kContinue, kVerifyRegA) \
-  V(0x0B, MOVE_RESULT_WIDE, "move-result-wide", k11x, true, kIndexNone, kContinue, kVerifyRegAWide) \
-  V(0x0C, MOVE_RESULT_OBJECT, "move-result-object", k11x, true, kIndexNone, kContinue, kVerifyRegA) \
-  V(0x0D, MOVE_EXCEPTION, "move-exception", k11x, true, kIndexNone, kContinue, kVerifyRegA) \
-  V(0x0E, RETURN_VOID, "return-void", k10x, false, kIndexNone, kReturn, kVerifyNone) \
-  V(0x0F, RETURN, "return", k11x, false, kIndexNone, kReturn, kVerifyRegA) \
-  V(0x10, RETURN_WIDE, "return-wide", k11x, false, kIndexNone, kReturn, kVerifyRegAWide) \
-  V(0x11, RETURN_OBJECT, "return-object", k11x, false, kIndexNone, kReturn, kVerifyRegA) \
-  V(0x12, CONST_4, "const/4", k11n, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
-  V(0x13, CONST_16, "const/16", k21s, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
-  V(0x14, CONST, "const", k31i, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
-  V(0x15, CONST_HIGH16, "const/high16", k21h, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
-  V(0x16, CONST_WIDE_16, "const-wide/16", k21s, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
-  V(0x17, CONST_WIDE_32, "const-wide/32", k31i, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
-  V(0x18, CONST_WIDE, "const-wide", k51l, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
-  V(0x19, CONST_WIDE_HIGH16, "const-wide/high16", k21h, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
-  V(0x1A, CONST_STRING, "const-string", k21c, true, kIndexStringRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBString) \
-  V(0x1B, CONST_STRING_JUMBO, "const-string/jumbo", k31c, true, kIndexStringRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBString) \
-  V(0x1C, CONST_CLASS, "const-class", k21c, true, kIndexTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBType) \
-  V(0x1D, MONITOR_ENTER, "monitor-enter", k11x, false, kIndexNone, kContinue | kThrow | kClobber, kVerifyRegA) \
-  V(0x1E, MONITOR_EXIT, "monitor-exit", k11x, false, kIndexNone, kContinue | kThrow | kClobber, kVerifyRegA) \
-  V(0x1F, CHECK_CAST, "check-cast", k21c, true, kIndexTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBType) \
-  V(0x20, INSTANCE_OF, "instance-of", k22c, true, kIndexTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegCType) \
-  V(0x21, ARRAY_LENGTH, "array-length", k12x, true, kIndexNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB) \
-  V(0x22, NEW_INSTANCE, "new-instance", k21c, true, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyRegBNewInstance) \
-  V(0x23, NEW_ARRAY, "new-array", k22c, true, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyRegB | kVerifyRegCNewArray) \
-  V(0x24, FILLED_NEW_ARRAY, "filled-new-array", k35c, false, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegBType | kVerifyVarArg) \
-  V(0x25, FILLED_NEW_ARRAY_RANGE, "filled-new-array/range", k3rc, false, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegBType | kVerifyVarArgRange) \
-  V(0x26, FILL_ARRAY_DATA, "fill-array-data", k31t, false, kIndexNone, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyArrayData) \
-  V(0x27, THROW, "throw", k11x, false, kIndexNone, kThrow, kVerifyRegA) \
-  V(0x28, GOTO, "goto", k10t, false, kIndexNone, kBranch | kUnconditional, kVerifyBranchTarget) \
-  V(0x29, GOTO_16, "goto/16", k20t, false, kIndexNone, kBranch | kUnconditional, kVerifyBranchTarget) \
-  V(0x2A, GOTO_32, "goto/32", k30t, false, kIndexNone, kBranch | kUnconditional, kVerifyBranchTarget) \
-  V(0x2B, PACKED_SWITCH, "packed-switch", k31t, false, kIndexNone, kContinue | kSwitch, kVerifyRegA | kVerifySwitchTargets) \
-  V(0x2C, SPARSE_SWITCH, "sparse-switch", k31t, false, kIndexNone, kContinue | kSwitch, kVerifyRegA | kVerifySwitchTargets) \
-  V(0x2D, CMPL_FLOAT, "cmpl-float", k23x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x2E, CMPG_FLOAT, "cmpg-float", k23x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x2F, CMPL_DOUBLE, "cmpl-double", k23x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x30, CMPG_DOUBLE, "cmpg-double", k23x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x31, CMP_LONG, "cmp-long", k23x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x32, IF_EQ, "if-eq", k22t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x33, IF_NE, "if-ne", k22t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x34, IF_LT, "if-lt", k22t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x35, IF_GE, "if-ge", k22t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x36, IF_GT, "if-gt", k22t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x37, IF_LE, "if-le", k22t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x38, IF_EQZ, "if-eqz", k21t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x39, IF_NEZ, "if-nez", k21t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x3A, IF_LTZ, "if-ltz", k21t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x3B, IF_GEZ, "if-gez", k21t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x3C, IF_GTZ, "if-gtz", k21t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x3D, IF_LEZ, "if-lez", k21t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x3E, UNUSED_3E, "unused-3e", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x3F, UNUSED_3F, "unused-3f", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x40, UNUSED_40, "unused-40", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x41, UNUSED_41, "unused-41", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x42, UNUSED_42, "unused-42", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x43, UNUSED_43, "unused-43", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x44, AGET, "aget", k23x, true, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x45, AGET_WIDE, "aget-wide", k23x, true, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegAWide | kVerifyRegB | kVerifyRegC) \
-  V(0x46, AGET_OBJECT, "aget-object", k23x, true, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x47, AGET_BOOLEAN, "aget-boolean", k23x, true, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x48, AGET_BYTE, "aget-byte", k23x, true, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x49, AGET_CHAR, "aget-char", k23x, true, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4A, AGET_SHORT, "aget-short", k23x, true, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4B, APUT, "aput", k23x, false, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4C, APUT_WIDE, "aput-wide", k23x, false, kIndexNone, kContinue | kThrow | kStore, kVerifyRegAWide | kVerifyRegB | kVerifyRegC) \
-  V(0x4D, APUT_OBJECT, "aput-object", k23x, false, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4E, APUT_BOOLEAN, "aput-boolean", k23x, false, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4F, APUT_BYTE, "aput-byte", k23x, false, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x50, APUT_CHAR, "aput-char", k23x, false, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x51, APUT_SHORT, "aput-short", k23x, false, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x52, IGET, "iget", k22c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x53, IGET_WIDE, "iget-wide", k22c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRegCField) \
-  V(0x54, IGET_OBJECT, "iget-object", k22c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x55, IGET_BOOLEAN, "iget-boolean", k22c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x56, IGET_BYTE, "iget-byte", k22c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x57, IGET_CHAR, "iget-char", k22c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x58, IGET_SHORT, "iget-short", k22c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x59, IPUT, "iput", k22c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5A, IPUT_WIDE, "iput-wide", k22c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRegCField) \
-  V(0x5B, IPUT_OBJECT, "iput-object", k22c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5C, IPUT_BOOLEAN, "iput-boolean", k22c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5D, IPUT_BYTE, "iput-byte", k22c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5E, IPUT_CHAR, "iput-char", k22c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5F, IPUT_SHORT, "iput-short", k22c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x60, SGET, "sget", k21c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x61, SGET_WIDE, "sget-wide", k21c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegAWide | kVerifyRegBField) \
-  V(0x62, SGET_OBJECT, "sget-object", k21c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x63, SGET_BOOLEAN, "sget-boolean", k21c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x64, SGET_BYTE, "sget-byte", k21c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x65, SGET_CHAR, "sget-char", k21c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x66, SGET_SHORT, "sget-short", k21c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x67, SPUT, "sput", k21c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x68, SPUT_WIDE, "sput-wide", k21c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegAWide | kVerifyRegBField) \
-  V(0x69, SPUT_OBJECT, "sput-object", k21c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x6A, SPUT_BOOLEAN, "sput-boolean", k21c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x6B, SPUT_BYTE, "sput-byte", k21c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x6C, SPUT_CHAR, "sput-char", k21c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x6D, SPUT_SHORT, "sput-short", k21c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x6E, INVOKE_VIRTUAL, "invoke-virtual", k35c, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
-  V(0x6F, INVOKE_SUPER, "invoke-super", k35c, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
-  V(0x70, INVOKE_DIRECT, "invoke-direct", k35c, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
-  V(0x71, INVOKE_STATIC, "invoke-static", k35c, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArg) \
-  V(0x72, INVOKE_INTERFACE, "invoke-interface", k35c, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
-  V(0x73, RETURN_VOID_NO_BARRIER, "return-void-no-barrier", k10x, false, kIndexNone, kReturn, kVerifyNone) \
-  V(0x74, INVOKE_VIRTUAL_RANGE, "invoke-virtual/range", k3rc, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
-  V(0x75, INVOKE_SUPER_RANGE, "invoke-super/range", k3rc, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
-  V(0x76, INVOKE_DIRECT_RANGE, "invoke-direct/range", k3rc, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
-  V(0x77, INVOKE_STATIC_RANGE, "invoke-static/range", k3rc, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRange) \
-  V(0x78, INVOKE_INTERFACE_RANGE, "invoke-interface/range", k3rc, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
-  V(0x79, UNUSED_79, "unused-79", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x7A, UNUSED_7A, "unused-7a", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x7B, NEG_INT, "neg-int", k12x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x7C, NOT_INT, "not-int", k12x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x7D, NEG_LONG, "neg-long", k12x, true, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x7E, NOT_LONG, "not-long", k12x, true, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x7F, NEG_FLOAT, "neg-float", k12x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x80, NEG_DOUBLE, "neg-double", k12x, true, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x81, INT_TO_LONG, "int-to-long", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
-  V(0x82, INT_TO_FLOAT, "int-to-float", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
-  V(0x83, INT_TO_DOUBLE, "int-to-double", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
-  V(0x84, LONG_TO_INT, "long-to-int", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
-  V(0x85, LONG_TO_FLOAT, "long-to-float", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
-  V(0x86, LONG_TO_DOUBLE, "long-to-double", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x87, FLOAT_TO_INT, "float-to-int", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
-  V(0x88, FLOAT_TO_LONG, "float-to-long", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
-  V(0x89, FLOAT_TO_DOUBLE, "float-to-double", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
-  V(0x8A, DOUBLE_TO_INT, "double-to-int", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
-  V(0x8B, DOUBLE_TO_LONG, "double-to-long", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x8C, DOUBLE_TO_FLOAT, "double-to-float", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
-  V(0x8D, INT_TO_BYTE, "int-to-byte", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
-  V(0x8E, INT_TO_CHAR, "int-to-char", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
-  V(0x8F, INT_TO_SHORT, "int-to-short", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
-  V(0x90, ADD_INT, "add-int", k23x, true, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x91, SUB_INT, "sub-int", k23x, true, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x92, MUL_INT, "mul-int", k23x, true, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x93, DIV_INT, "div-int", k23x, true, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x94, REM_INT, "rem-int", k23x, true, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x95, AND_INT, "and-int", k23x, true, kIndexNone, kContinue | kAnd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x96, OR_INT, "or-int", k23x, true, kIndexNone, kContinue | kOr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x97, XOR_INT, "xor-int", k23x, true, kIndexNone, kContinue | kXor, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x98, SHL_INT, "shl-int", k23x, true, kIndexNone, kContinue | kShl, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x99, SHR_INT, "shr-int", k23x, true, kIndexNone, kContinue | kShr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x9A, USHR_INT, "ushr-int", k23x, true, kIndexNone, kContinue | kUshr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x9B, ADD_LONG, "add-long", k23x, true, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x9C, SUB_LONG, "sub-long", k23x, true, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x9D, MUL_LONG, "mul-long", k23x, true, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x9E, DIV_LONG, "div-long", k23x, true, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x9F, REM_LONG, "rem-long", k23x, true, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xA0, AND_LONG, "and-long", k23x, true, kIndexNone, kContinue | kAnd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xA1, OR_LONG, "or-long", k23x, true, kIndexNone, kContinue | kOr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xA2, XOR_LONG, "xor-long", k23x, true, kIndexNone, kContinue | kXor, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xA3, SHL_LONG, "shl-long", k23x, true, kIndexNone, kContinue | kShl, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
-  V(0xA4, SHR_LONG, "shr-long", k23x, true, kIndexNone, kContinue | kShr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
-  V(0xA5, USHR_LONG, "ushr-long", k23x, true, kIndexNone, kContinue | kUshr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
-  V(0xA6, ADD_FLOAT, "add-float", k23x, true, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xA7, SUB_FLOAT, "sub-float", k23x, true, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xA8, MUL_FLOAT, "mul-float", k23x, true, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xA9, DIV_FLOAT, "div-float", k23x, true, kIndexNone, kContinue | kDivide, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xAA, REM_FLOAT, "rem-float", k23x, true, kIndexNone, kContinue | kRemainder, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xAB, ADD_DOUBLE, "add-double", k23x, true, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xAC, SUB_DOUBLE, "sub-double", k23x, true, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xAD, MUL_DOUBLE, "mul-double", k23x, true, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xAE, DIV_DOUBLE, "div-double", k23x, true, kIndexNone, kContinue | kDivide, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xAF, REM_DOUBLE, "rem-double", k23x, true, kIndexNone, kContinue | kRemainder, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xB0, ADD_INT_2ADDR, "add-int/2addr", k12x, true, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB) \
-  V(0xB1, SUB_INT_2ADDR, "sub-int/2addr", k12x, true, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB) \
-  V(0xB2, MUL_INT_2ADDR, "mul-int/2addr", k12x, true, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB) \
-  V(0xB3, DIV_INT_2ADDR, "div-int/2addr", k12x, true, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegA | kVerifyRegB) \
-  V(0xB4, REM_INT_2ADDR, "rem-int/2addr", k12x, true, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegA | kVerifyRegB) \
-  V(0xB5, AND_INT_2ADDR, "and-int/2addr", k12x, true, kIndexNone, kContinue | kAnd, kVerifyRegA | kVerifyRegB) \
-  V(0xB6, OR_INT_2ADDR, "or-int/2addr", k12x, true, kIndexNone, kContinue | kOr, kVerifyRegA | kVerifyRegB) \
-  V(0xB7, XOR_INT_2ADDR, "xor-int/2addr", k12x, true, kIndexNone, kContinue | kXor, kVerifyRegA | kVerifyRegB) \
-  V(0xB8, SHL_INT_2ADDR, "shl-int/2addr", k12x, true, kIndexNone, kContinue | kShl, kVerifyRegA | kVerifyRegB) \
-  V(0xB9, SHR_INT_2ADDR, "shr-int/2addr", k12x, true, kIndexNone, kContinue | kShr, kVerifyRegA | kVerifyRegB) \
-  V(0xBA, USHR_INT_2ADDR, "ushr-int/2addr", k12x, true, kIndexNone, kContinue | kUshr, kVerifyRegA | kVerifyRegB) \
-  V(0xBB, ADD_LONG_2ADDR, "add-long/2addr", k12x, true, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xBC, SUB_LONG_2ADDR, "sub-long/2addr", k12x, true, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xBD, MUL_LONG_2ADDR, "mul-long/2addr", k12x, true, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xBE, DIV_LONG_2ADDR, "div-long/2addr", k12x, true, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xBF, REM_LONG_2ADDR, "rem-long/2addr", k12x, true, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xC0, AND_LONG_2ADDR, "and-long/2addr", k12x, true, kIndexNone, kContinue | kAnd, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xC1, OR_LONG_2ADDR, "or-long/2addr", k12x, true, kIndexNone, kContinue | kOr, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xC2, XOR_LONG_2ADDR, "xor-long/2addr", k12x, true, kIndexNone, kContinue | kXor, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xC3, SHL_LONG_2ADDR, "shl-long/2addr", k12x, true, kIndexNone, kContinue | kShl, kVerifyRegAWide | kVerifyRegB) \
-  V(0xC4, SHR_LONG_2ADDR, "shr-long/2addr", k12x, true, kIndexNone, kContinue | kShr, kVerifyRegAWide | kVerifyRegB) \
-  V(0xC5, USHR_LONG_2ADDR, "ushr-long/2addr", k12x, true, kIndexNone, kContinue | kUshr, kVerifyRegAWide | kVerifyRegB) \
-  V(0xC6, ADD_FLOAT_2ADDR, "add-float/2addr", k12x, true, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB) \
-  V(0xC7, SUB_FLOAT_2ADDR, "sub-float/2addr", k12x, true, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB) \
-  V(0xC8, MUL_FLOAT_2ADDR, "mul-float/2addr", k12x, true, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB) \
-  V(0xC9, DIV_FLOAT_2ADDR, "div-float/2addr", k12x, true, kIndexNone, kContinue | kDivide, kVerifyRegA | kVerifyRegB) \
-  V(0xCA, REM_FLOAT_2ADDR, "rem-float/2addr", k12x, true, kIndexNone, kContinue | kRemainder, kVerifyRegA | kVerifyRegB) \
-  V(0xCB, ADD_DOUBLE_2ADDR, "add-double/2addr", k12x, true, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xCC, SUB_DOUBLE_2ADDR, "sub-double/2addr", k12x, true, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xCD, MUL_DOUBLE_2ADDR, "mul-double/2addr", k12x, true, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xCE, DIV_DOUBLE_2ADDR, "div-double/2addr", k12x, true, kIndexNone, kContinue | kDivide, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xCF, REM_DOUBLE_2ADDR, "rem-double/2addr", k12x, true, kIndexNone, kContinue | kRemainder, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xD0, ADD_INT_LIT16, "add-int/lit16", k22s, true, kIndexNone, kContinue | kAdd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD1, RSUB_INT, "rsub-int", k22s, true, kIndexNone, kContinue | kSubtract | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD2, MUL_INT_LIT16, "mul-int/lit16", k22s, true, kIndexNone, kContinue | kMultiply | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD3, DIV_INT_LIT16, "div-int/lit16", k22s, true, kIndexNone, kContinue | kThrow | kDivide | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD4, REM_INT_LIT16, "rem-int/lit16", k22s, true, kIndexNone, kContinue | kThrow | kRemainder | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD5, AND_INT_LIT16, "and-int/lit16", k22s, true, kIndexNone, kContinue | kAnd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD6, OR_INT_LIT16, "or-int/lit16", k22s, true, kIndexNone, kContinue | kOr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD7, XOR_INT_LIT16, "xor-int/lit16", k22s, true, kIndexNone, kContinue | kXor | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD8, ADD_INT_LIT8, "add-int/lit8", k22b, true, kIndexNone, kContinue | kAdd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD9, RSUB_INT_LIT8, "rsub-int/lit8", k22b, true, kIndexNone, kContinue | kSubtract | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDA, MUL_INT_LIT8, "mul-int/lit8", k22b, true, kIndexNone, kContinue | kMultiply | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDB, DIV_INT_LIT8, "div-int/lit8", k22b, true, kIndexNone, kContinue | kThrow | kDivide | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDC, REM_INT_LIT8, "rem-int/lit8", k22b, true, kIndexNone, kContinue | kThrow | kRemainder | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDD, AND_INT_LIT8, "and-int/lit8", k22b, true, kIndexNone, kContinue | kAnd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDE, OR_INT_LIT8, "or-int/lit8", k22b, true, kIndexNone, kContinue | kOr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDF, XOR_INT_LIT8, "xor-int/lit8", k22b, true, kIndexNone, kContinue | kXor | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE0, SHL_INT_LIT8, "shl-int/lit8", k22b, true, kIndexNone, kContinue | kShl | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE1, SHR_INT_LIT8, "shr-int/lit8", k22b, true, kIndexNone, kContinue | kShr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE2, USHR_INT_LIT8, "ushr-int/lit8", k22b, true, kIndexNone, kContinue | kUshr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE3, IGET_QUICK, "iget-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE4, IGET_WIDE_QUICK, "iget-wide-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE5, IGET_OBJECT_QUICK, "iget-object-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE6, IPUT_QUICK, "iput-quick", k22c, false, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE7, IPUT_WIDE_QUICK, "iput-wide-quick", k22c, false, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE8, IPUT_OBJECT_QUICK, "iput-object-quick", k22c, false, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE9, INVOKE_VIRTUAL_QUICK, "invoke-virtual-quick", k35c, false, kIndexVtableOffset, kContinue | kThrow | kInvoke, kVerifyVarArgNonZero | kVerifyRuntimeOnly) \
-  V(0xEA, INVOKE_VIRTUAL_RANGE_QUICK, "invoke-virtual/range-quick", k3rc, false, kIndexVtableOffset, kContinue | kThrow | kInvoke, kVerifyVarArgRangeNonZero | kVerifyRuntimeOnly) \
-  V(0xEB, IPUT_BOOLEAN_QUICK, "iput-boolean-quick", k22c, false, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xEC, IPUT_BYTE_QUICK, "iput-byte-quick", k22c, false, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xED, IPUT_CHAR_QUICK, "iput-char-quick", k22c, false, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xEE, IPUT_SHORT_QUICK, "iput-short-quick", k22c, false, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xEF, IGET_BOOLEAN_QUICK, "iget-boolean-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xF0, IGET_BYTE_QUICK, "iget-byte-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xF1, IGET_CHAR_QUICK, "iget-char-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xF2, IGET_SHORT_QUICK, "iget-short-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xF3, INVOKE_LAMBDA, "invoke-lambda", k25x, false, kIndexNone, kContinue | kThrow | kInvoke | kExperimental, kVerifyRegC /*TODO: | kVerifyVarArg*/) \
-  V(0xF4, UNUSED_F4, "unused-f4", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0xF5, CAPTURE_VARIABLE, "capture-variable", k21c, false, kIndexStringRef, kExperimental, kVerifyRegA | kVerifyRegBString) \
-  /* TODO(iam): get rid of the unused 'false' column */ \
-  V(0xF6, CREATE_LAMBDA, "create-lambda", k21c, false_UNUSED, kIndexMethodRef, kContinue | kThrow | kExperimental, kVerifyRegA | kVerifyRegBMethod) \
-  V(0xF7, LIBERATE_VARIABLE, "liberate-variable", k22c, false, kIndexStringRef, kExperimental, kVerifyRegA | kVerifyRegB | kVerifyRegCString) \
-  V(0xF8, BOX_LAMBDA, "box-lambda", k22x, true, kIndexNone, kContinue | kExperimental, kVerifyRegA | kVerifyRegB) \
-  V(0xF9, UNBOX_LAMBDA, "unbox-lambda", k22c, true, kIndexTypeRef, kContinue | kThrow | kExperimental, kVerifyRegA | kVerifyRegB | kVerifyRegCType) \
-  V(0xFA, UNUSED_FA, "unused-fa", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0xFB, UNUSED_FB, "unused-fb", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0xFC, UNUSED_FC, "unused-fc", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0xFD, UNUSED_FD, "unused-fd", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0xFE, UNUSED_FE, "unused-fe", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0xFF, UNUSED_FF, "unused-ff", k10x, false, kIndexUnknown, 0, kVerifyError)
+  V(0x00, NOP, "nop", k10x, kIndexNone, kContinue, kVerifyNone) \
+  V(0x01, MOVE, "move", k12x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x02, MOVE_FROM16, "move/from16", k22x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x03, MOVE_16, "move/16", k32x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x04, MOVE_WIDE, "move-wide", k12x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x05, MOVE_WIDE_FROM16, "move-wide/from16", k22x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x06, MOVE_WIDE_16, "move-wide/16", k32x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x07, MOVE_OBJECT, "move-object", k12x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x08, MOVE_OBJECT_FROM16, "move-object/from16", k22x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x09, MOVE_OBJECT_16, "move-object/16", k32x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x0A, MOVE_RESULT, "move-result", k11x, kIndexNone, kContinue, kVerifyRegA) \
+  V(0x0B, MOVE_RESULT_WIDE, "move-result-wide", k11x, kIndexNone, kContinue, kVerifyRegAWide) \
+  V(0x0C, MOVE_RESULT_OBJECT, "move-result-object", k11x, kIndexNone, kContinue, kVerifyRegA) \
+  V(0x0D, MOVE_EXCEPTION, "move-exception", k11x, kIndexNone, kContinue, kVerifyRegA) \
+  V(0x0E, RETURN_VOID, "return-void", k10x, kIndexNone, kReturn, kVerifyNone) \
+  V(0x0F, RETURN, "return", k11x, kIndexNone, kReturn, kVerifyRegA) \
+  V(0x10, RETURN_WIDE, "return-wide", k11x, kIndexNone, kReturn, kVerifyRegAWide) \
+  V(0x11, RETURN_OBJECT, "return-object", k11x, kIndexNone, kReturn, kVerifyRegA) \
+  V(0x12, CONST_4, "const/4", k11n, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
+  V(0x13, CONST_16, "const/16", k21s, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
+  V(0x14, CONST, "const", k31i, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
+  V(0x15, CONST_HIGH16, "const/high16", k21h, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
+  V(0x16, CONST_WIDE_16, "const-wide/16", k21s, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
+  V(0x17, CONST_WIDE_32, "const-wide/32", k31i, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
+  V(0x18, CONST_WIDE, "const-wide", k51l, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
+  V(0x19, CONST_WIDE_HIGH16, "const-wide/high16", k21h, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
+  V(0x1A, CONST_STRING, "const-string", k21c, kIndexStringRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBString) \
+  V(0x1B, CONST_STRING_JUMBO, "const-string/jumbo", k31c, kIndexStringRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBString) \
+  V(0x1C, CONST_CLASS, "const-class", k21c, kIndexTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBType) \
+  V(0x1D, MONITOR_ENTER, "monitor-enter", k11x, kIndexNone, kContinue | kThrow | kClobber, kVerifyRegA) \
+  V(0x1E, MONITOR_EXIT, "monitor-exit", k11x, kIndexNone, kContinue | kThrow | kClobber, kVerifyRegA) \
+  V(0x1F, CHECK_CAST, "check-cast", k21c, kIndexTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBType) \
+  V(0x20, INSTANCE_OF, "instance-of", k22c, kIndexTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegCType) \
+  V(0x21, ARRAY_LENGTH, "array-length", k12x, kIndexNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB) \
+  V(0x22, NEW_INSTANCE, "new-instance", k21c, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyRegBNewInstance) \
+  V(0x23, NEW_ARRAY, "new-array", k22c, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyRegB | kVerifyRegCNewArray) \
+  V(0x24, FILLED_NEW_ARRAY, "filled-new-array", k35c, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegBType | kVerifyVarArg) \
+  V(0x25, FILLED_NEW_ARRAY_RANGE, "filled-new-array/range", k3rc, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegBType | kVerifyVarArgRange) \
+  V(0x26, FILL_ARRAY_DATA, "fill-array-data", k31t, kIndexNone, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyArrayData) \
+  V(0x27, THROW, "throw", k11x, kIndexNone, kThrow, kVerifyRegA) \
+  V(0x28, GOTO, "goto", k10t, kIndexNone, kBranch | kUnconditional, kVerifyBranchTarget) \
+  V(0x29, GOTO_16, "goto/16", k20t, kIndexNone, kBranch | kUnconditional, kVerifyBranchTarget) \
+  V(0x2A, GOTO_32, "goto/32", k30t, kIndexNone, kBranch | kUnconditional, kVerifyBranchTarget) \
+  V(0x2B, PACKED_SWITCH, "packed-switch", k31t, kIndexNone, kContinue | kSwitch, kVerifyRegA | kVerifySwitchTargets) \
+  V(0x2C, SPARSE_SWITCH, "sparse-switch", k31t, kIndexNone, kContinue | kSwitch, kVerifyRegA | kVerifySwitchTargets) \
+  V(0x2D, CMPL_FLOAT, "cmpl-float", k23x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x2E, CMPG_FLOAT, "cmpg-float", k23x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x2F, CMPL_DOUBLE, "cmpl-double", k23x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x30, CMPG_DOUBLE, "cmpg-double", k23x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x31, CMP_LONG, "cmp-long", k23x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x32, IF_EQ, "if-eq", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x33, IF_NE, "if-ne", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x34, IF_LT, "if-lt", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x35, IF_GE, "if-ge", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x36, IF_GT, "if-gt", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x37, IF_LE, "if-le", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x38, IF_EQZ, "if-eqz", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x39, IF_NEZ, "if-nez", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x3A, IF_LTZ, "if-ltz", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x3B, IF_GEZ, "if-gez", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x3C, IF_GTZ, "if-gtz", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x3D, IF_LEZ, "if-lez", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x3E, UNUSED_3E, "unused-3e", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x3F, UNUSED_3F, "unused-3f", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x40, UNUSED_40, "unused-40", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x41, UNUSED_41, "unused-41", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x42, UNUSED_42, "unused-42", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x43, UNUSED_43, "unused-43", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x44, AGET, "aget", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x45, AGET_WIDE, "aget-wide", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegAWide | kVerifyRegB | kVerifyRegC) \
+  V(0x46, AGET_OBJECT, "aget-object", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x47, AGET_BOOLEAN, "aget-boolean", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x48, AGET_BYTE, "aget-byte", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x49, AGET_CHAR, "aget-char", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4A, AGET_SHORT, "aget-short", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4B, APUT, "aput", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4C, APUT_WIDE, "aput-wide", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegAWide | kVerifyRegB | kVerifyRegC) \
+  V(0x4D, APUT_OBJECT, "aput-object", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4E, APUT_BOOLEAN, "aput-boolean", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4F, APUT_BYTE, "aput-byte", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x50, APUT_CHAR, "aput-char", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x51, APUT_SHORT, "aput-short", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x52, IGET, "iget", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x53, IGET_WIDE, "iget-wide", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRegCField) \
+  V(0x54, IGET_OBJECT, "iget-object", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x55, IGET_BOOLEAN, "iget-boolean", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x56, IGET_BYTE, "iget-byte", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x57, IGET_CHAR, "iget-char", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x58, IGET_SHORT, "iget-short", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x59, IPUT, "iput", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5A, IPUT_WIDE, "iput-wide", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRegCField) \
+  V(0x5B, IPUT_OBJECT, "iput-object", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5C, IPUT_BOOLEAN, "iput-boolean", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5D, IPUT_BYTE, "iput-byte", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5E, IPUT_CHAR, "iput-char", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5F, IPUT_SHORT, "iput-short", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x60, SGET, "sget", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x61, SGET_WIDE, "sget-wide", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegAWide | kVerifyRegBField) \
+  V(0x62, SGET_OBJECT, "sget-object", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x63, SGET_BOOLEAN, "sget-boolean", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x64, SGET_BYTE, "sget-byte", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x65, SGET_CHAR, "sget-char", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x66, SGET_SHORT, "sget-short", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x67, SPUT, "sput", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x68, SPUT_WIDE, "sput-wide", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegAWide | kVerifyRegBField) \
+  V(0x69, SPUT_OBJECT, "sput-object", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6A, SPUT_BOOLEAN, "sput-boolean", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6B, SPUT_BYTE, "sput-byte", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6C, SPUT_CHAR, "sput-char", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6D, SPUT_SHORT, "sput-short", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6E, INVOKE_VIRTUAL, "invoke-virtual", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
+  V(0x6F, INVOKE_SUPER, "invoke-super", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
+  V(0x70, INVOKE_DIRECT, "invoke-direct", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
+  V(0x71, INVOKE_STATIC, "invoke-static", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArg) \
+  V(0x72, INVOKE_INTERFACE, "invoke-interface", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
+  V(0x73, RETURN_VOID_NO_BARRIER, "return-void-no-barrier", k10x, kIndexNone, kReturn, kVerifyNone) \
+  V(0x74, INVOKE_VIRTUAL_RANGE, "invoke-virtual/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
+  V(0x75, INVOKE_SUPER_RANGE, "invoke-super/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
+  V(0x76, INVOKE_DIRECT_RANGE, "invoke-direct/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
+  V(0x77, INVOKE_STATIC_RANGE, "invoke-static/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRange) \
+  V(0x78, INVOKE_INTERFACE_RANGE, "invoke-interface/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
+  V(0x79, UNUSED_79, "unused-79", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x7A, UNUSED_7A, "unused-7a", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x7B, NEG_INT, "neg-int", k12x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x7C, NOT_INT, "not-int", k12x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x7D, NEG_LONG, "neg-long", k12x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x7E, NOT_LONG, "not-long", k12x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x7F, NEG_FLOAT, "neg-float", k12x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x80, NEG_DOUBLE, "neg-double", k12x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x81, INT_TO_LONG, "int-to-long", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
+  V(0x82, INT_TO_FLOAT, "int-to-float", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x83, INT_TO_DOUBLE, "int-to-double", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
+  V(0x84, LONG_TO_INT, "long-to-int", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
+  V(0x85, LONG_TO_FLOAT, "long-to-float", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
+  V(0x86, LONG_TO_DOUBLE, "long-to-double", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x87, FLOAT_TO_INT, "float-to-int", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x88, FLOAT_TO_LONG, "float-to-long", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
+  V(0x89, FLOAT_TO_DOUBLE, "float-to-double", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
+  V(0x8A, DOUBLE_TO_INT, "double-to-int", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
+  V(0x8B, DOUBLE_TO_LONG, "double-to-long", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x8C, DOUBLE_TO_FLOAT, "double-to-float", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
+  V(0x8D, INT_TO_BYTE, "int-to-byte", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x8E, INT_TO_CHAR, "int-to-char", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x8F, INT_TO_SHORT, "int-to-short", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x90, ADD_INT, "add-int", k23x, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x91, SUB_INT, "sub-int", k23x, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x92, MUL_INT, "mul-int", k23x, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x93, DIV_INT, "div-int", k23x, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x94, REM_INT, "rem-int", k23x, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x95, AND_INT, "and-int", k23x, kIndexNone, kContinue | kAnd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x96, OR_INT, "or-int", k23x, kIndexNone, kContinue | kOr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x97, XOR_INT, "xor-int", k23x, kIndexNone, kContinue | kXor, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x98, SHL_INT, "shl-int", k23x, kIndexNone, kContinue | kShl, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x99, SHR_INT, "shr-int", k23x, kIndexNone, kContinue | kShr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x9A, USHR_INT, "ushr-int", k23x, kIndexNone, kContinue | kUshr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x9B, ADD_LONG, "add-long", k23x, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x9C, SUB_LONG, "sub-long", k23x, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x9D, MUL_LONG, "mul-long", k23x, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x9E, DIV_LONG, "div-long", k23x, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x9F, REM_LONG, "rem-long", k23x, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xA0, AND_LONG, "and-long", k23x, kIndexNone, kContinue | kAnd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xA1, OR_LONG, "or-long", k23x, kIndexNone, kContinue | kOr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xA2, XOR_LONG, "xor-long", k23x, kIndexNone, kContinue | kXor, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xA3, SHL_LONG, "shl-long", k23x, kIndexNone, kContinue | kShl, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
+  V(0xA4, SHR_LONG, "shr-long", k23x, kIndexNone, kContinue | kShr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
+  V(0xA5, USHR_LONG, "ushr-long", k23x, kIndexNone, kContinue | kUshr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
+  V(0xA6, ADD_FLOAT, "add-float", k23x, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xA7, SUB_FLOAT, "sub-float", k23x, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xA8, MUL_FLOAT, "mul-float", k23x, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xA9, DIV_FLOAT, "div-float", k23x, kIndexNone, kContinue | kDivide, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xAA, REM_FLOAT, "rem-float", k23x, kIndexNone, kContinue | kRemainder, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xAB, ADD_DOUBLE, "add-double", k23x, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xAC, SUB_DOUBLE, "sub-double", k23x, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xAD, MUL_DOUBLE, "mul-double", k23x, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xAE, DIV_DOUBLE, "div-double", k23x, kIndexNone, kContinue | kDivide, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xAF, REM_DOUBLE, "rem-double", k23x, kIndexNone, kContinue | kRemainder, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xB0, ADD_INT_2ADDR, "add-int/2addr", k12x, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB) \
+  V(0xB1, SUB_INT_2ADDR, "sub-int/2addr", k12x, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB) \
+  V(0xB2, MUL_INT_2ADDR, "mul-int/2addr", k12x, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB) \
+  V(0xB3, DIV_INT_2ADDR, "div-int/2addr", k12x, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegA | kVerifyRegB) \
+  V(0xB4, REM_INT_2ADDR, "rem-int/2addr", k12x, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegA | kVerifyRegB) \
+  V(0xB5, AND_INT_2ADDR, "and-int/2addr", k12x, kIndexNone, kContinue | kAnd, kVerifyRegA | kVerifyRegB) \
+  V(0xB6, OR_INT_2ADDR, "or-int/2addr", k12x, kIndexNone, kContinue | kOr, kVerifyRegA | kVerifyRegB) \
+  V(0xB7, XOR_INT_2ADDR, "xor-int/2addr", k12x, kIndexNone, kContinue | kXor, kVerifyRegA | kVerifyRegB) \
+  V(0xB8, SHL_INT_2ADDR, "shl-int/2addr", k12x, kIndexNone, kContinue | kShl, kVerifyRegA | kVerifyRegB) \
+  V(0xB9, SHR_INT_2ADDR, "shr-int/2addr", k12x, kIndexNone, kContinue | kShr, kVerifyRegA | kVerifyRegB) \
+  V(0xBA, USHR_INT_2ADDR, "ushr-int/2addr", k12x, kIndexNone, kContinue | kUshr, kVerifyRegA | kVerifyRegB) \
+  V(0xBB, ADD_LONG_2ADDR, "add-long/2addr", k12x, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xBC, SUB_LONG_2ADDR, "sub-long/2addr", k12x, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xBD, MUL_LONG_2ADDR, "mul-long/2addr", k12x, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xBE, DIV_LONG_2ADDR, "div-long/2addr", k12x, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xBF, REM_LONG_2ADDR, "rem-long/2addr", k12x, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xC0, AND_LONG_2ADDR, "and-long/2addr", k12x, kIndexNone, kContinue | kAnd, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xC1, OR_LONG_2ADDR, "or-long/2addr", k12x, kIndexNone, kContinue | kOr, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xC2, XOR_LONG_2ADDR, "xor-long/2addr", k12x, kIndexNone, kContinue | kXor, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xC3, SHL_LONG_2ADDR, "shl-long/2addr", k12x, kIndexNone, kContinue | kShl, kVerifyRegAWide | kVerifyRegB) \
+  V(0xC4, SHR_LONG_2ADDR, "shr-long/2addr", k12x, kIndexNone, kContinue | kShr, kVerifyRegAWide | kVerifyRegB) \
+  V(0xC5, USHR_LONG_2ADDR, "ushr-long/2addr", k12x, kIndexNone, kContinue | kUshr, kVerifyRegAWide | kVerifyRegB) \
+  V(0xC6, ADD_FLOAT_2ADDR, "add-float/2addr", k12x, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB) \
+  V(0xC7, SUB_FLOAT_2ADDR, "sub-float/2addr", k12x, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB) \
+  V(0xC8, MUL_FLOAT_2ADDR, "mul-float/2addr", k12x, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB) \
+  V(0xC9, DIV_FLOAT_2ADDR, "div-float/2addr", k12x, kIndexNone, kContinue | kDivide, kVerifyRegA | kVerifyRegB) \
+  V(0xCA, REM_FLOAT_2ADDR, "rem-float/2addr", k12x, kIndexNone, kContinue | kRemainder, kVerifyRegA | kVerifyRegB) \
+  V(0xCB, ADD_DOUBLE_2ADDR, "add-double/2addr", k12x, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xCC, SUB_DOUBLE_2ADDR, "sub-double/2addr", k12x, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xCD, MUL_DOUBLE_2ADDR, "mul-double/2addr", k12x, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xCE, DIV_DOUBLE_2ADDR, "div-double/2addr", k12x, kIndexNone, kContinue | kDivide, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xCF, REM_DOUBLE_2ADDR, "rem-double/2addr", k12x, kIndexNone, kContinue | kRemainder, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xD0, ADD_INT_LIT16, "add-int/lit16", k22s, kIndexNone, kContinue | kAdd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD1, RSUB_INT, "rsub-int", k22s, kIndexNone, kContinue | kSubtract | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD2, MUL_INT_LIT16, "mul-int/lit16", k22s, kIndexNone, kContinue | kMultiply | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD3, DIV_INT_LIT16, "div-int/lit16", k22s, kIndexNone, kContinue | kThrow | kDivide | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD4, REM_INT_LIT16, "rem-int/lit16", k22s, kIndexNone, kContinue | kThrow | kRemainder | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD5, AND_INT_LIT16, "and-int/lit16", k22s, kIndexNone, kContinue | kAnd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD6, OR_INT_LIT16, "or-int/lit16", k22s, kIndexNone, kContinue | kOr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD7, XOR_INT_LIT16, "xor-int/lit16", k22s, kIndexNone, kContinue | kXor | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD8, ADD_INT_LIT8, "add-int/lit8", k22b, kIndexNone, kContinue | kAdd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD9, RSUB_INT_LIT8, "rsub-int/lit8", k22b, kIndexNone, kContinue | kSubtract | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDA, MUL_INT_LIT8, "mul-int/lit8", k22b, kIndexNone, kContinue | kMultiply | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDB, DIV_INT_LIT8, "div-int/lit8", k22b, kIndexNone, kContinue | kThrow | kDivide | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDC, REM_INT_LIT8, "rem-int/lit8", k22b, kIndexNone, kContinue | kThrow | kRemainder | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDD, AND_INT_LIT8, "and-int/lit8", k22b, kIndexNone, kContinue | kAnd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDE, OR_INT_LIT8, "or-int/lit8", k22b, kIndexNone, kContinue | kOr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDF, XOR_INT_LIT8, "xor-int/lit8", k22b, kIndexNone, kContinue | kXor | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE0, SHL_INT_LIT8, "shl-int/lit8", k22b, kIndexNone, kContinue | kShl | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE1, SHR_INT_LIT8, "shr-int/lit8", k22b, kIndexNone, kContinue | kShr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE2, USHR_INT_LIT8, "ushr-int/lit8", k22b, kIndexNone, kContinue | kUshr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE3, IGET_QUICK, "iget-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE4, IGET_WIDE_QUICK, "iget-wide-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE5, IGET_OBJECT_QUICK, "iget-object-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE6, IPUT_QUICK, "iput-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE7, IPUT_WIDE_QUICK, "iput-wide-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE8, IPUT_OBJECT_QUICK, "iput-object-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE9, INVOKE_VIRTUAL_QUICK, "invoke-virtual-quick", k35c, kIndexVtableOffset, kContinue | kThrow | kInvoke, kVerifyVarArgNonZero | kVerifyRuntimeOnly) \
+  V(0xEA, INVOKE_VIRTUAL_RANGE_QUICK, "invoke-virtual/range-quick", k3rc, kIndexVtableOffset, kContinue | kThrow | kInvoke, kVerifyVarArgRangeNonZero | kVerifyRuntimeOnly) \
+  V(0xEB, IPUT_BOOLEAN_QUICK, "iput-boolean-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xEC, IPUT_BYTE_QUICK, "iput-byte-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xED, IPUT_CHAR_QUICK, "iput-char-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xEE, IPUT_SHORT_QUICK, "iput-short-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xEF, IGET_BOOLEAN_QUICK, "iget-boolean-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xF0, IGET_BYTE_QUICK, "iget-byte-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xF1, IGET_CHAR_QUICK, "iget-char-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xF2, IGET_SHORT_QUICK, "iget-short-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xF3, UNUSED_F3, "unused-f3", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xF4, UNUSED_F4, "unused-f4", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xF5, UNUSED_F5, "unused-f5", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xF6, UNUSED_F6, "unused-f6", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xF7, UNUSED_F7, "unused-f7", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xF8, UNUSED_F8, "unused-f8", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xF9, UNUSED_F9, "unused-f9", k10x, kIndexUnknown, 0, kVerifyError) \
+  /* TODO(narayan): The following two entries are placeholders. */ \
+  V(0xFA, INVOKE_POLYMORPHIC, "invoke-polymorphic", k45cc, kIndexUnknown, 0, kVerifyError) \
+  V(0xFB, INVOKE_POLYMORPHIC_RANGE, "invoke-polymorphic/range", k4rcc, kIndexUnknown, 0, kVerifyError) \
+  V(0xFC, UNUSED_FC, "unused-fc", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xFD, UNUSED_FD, "unused-fd", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xFE, UNUSED_FE, "unused-fe", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xFF, UNUSED_FF, "unused-ff", k10x, kIndexUnknown, 0, kVerifyError)
 
 #define DEX_INSTRUCTION_FORMAT_LIST(V) \
   V(k10x) \
@@ -293,7 +294,6 @@
   V(k22t) \
   V(k22s) \
   V(k22c) \
-  V(k25x) \
   V(k32x) \
   V(k30t) \
   V(k31t) \
diff --git a/runtime/dex_instruction_test.cc b/runtime/dex_instruction_test.cc
index 671ac0e..00c8e07 100644
--- a/runtime/dex_instruction_test.cc
+++ b/runtime/dex_instruction_test.cc
@@ -28,4 +28,96 @@
   EXPECT_EQ(Instruction::kVerifyNone, Instruction::VerifyFlagsOf(nop));
 }
 
+static void Build45cc(uint8_t num_args, uint16_t method_idx, uint16_t proto_idx,
+                      uint16_t arg_regs, uint16_t* out) {
+  // A = num argument registers
+  // B = method_idx
+  // C - G = argument registers
+  // H = proto_idx
+  //
+  // op = 0xFA
+  //
+  // format:
+  // AG op BBBB FEDC HHHH
+  out[0] = 0;
+  out[0] |= (num_args << 12);
+  out[0] |= 0x00FA;
+
+  out[1] = method_idx;
+  out[2] = arg_regs;
+  out[3] = proto_idx;
+}
+
+static void Build4rcc(uint16_t num_args, uint16_t method_idx, uint16_t proto_idx,
+                      uint16_t arg_regs_start, uint16_t* out) {
+  // A = num argument registers
+  // B = method_idx
+  // C = first argument register
+  // H = proto_idx
+  //
+  // op = 0xFB
+  //
+  // format:
+  // AA op BBBB CCCC HHHH
+  out[0] = 0;
+  out[0] |= (num_args << 8);
+  out[0] |= 0x00FB;
+
+  out[1] = method_idx;
+  out[2] = arg_regs_start;
+  out[3] = proto_idx;
+}
+
+TEST(Instruction, PropertiesOf45cc) {
+  uint16_t instruction[4];
+  Build45cc(4u /* num_vregs */, 16u /* method_idx */, 32u /* proto_idx */,
+            0xcafe /* arg_regs */, instruction);
+
+  const Instruction* ins = Instruction::At(instruction);
+  ASSERT_EQ(4u, ins->SizeInCodeUnits());
+
+  ASSERT_TRUE(ins->HasVRegA());
+  ASSERT_EQ(4, ins->VRegA());
+  ASSERT_EQ(4u, ins->VRegA_45cc());
+  ASSERT_EQ(4u, ins->VRegA_45cc(instruction[0]));
+
+  ASSERT_TRUE(ins->HasVRegB());
+  ASSERT_EQ(16, ins->VRegB());
+  ASSERT_EQ(16u, ins->VRegB_45cc());
+
+  ASSERT_TRUE(ins->HasVRegC());
+  ASSERT_EQ(0xe, ins->VRegC());
+  ASSERT_EQ(0xe, ins->VRegC_45cc());
+
+  ASSERT_TRUE(ins->HasVRegH());
+  ASSERT_EQ(32, ins->VRegH());
+  ASSERT_EQ(32, ins->VRegH_45cc());
+}
+
+TEST(Instruction, PropertiesOf4rcc) {
+  uint16_t instruction[4];
+  Build4rcc(4u /* num_vregs */, 16u /* method_idx */, 32u /* proto_idx */,
+            0xcafe /* arg_regs */, instruction);
+
+  const Instruction* ins = Instruction::At(instruction);
+  ASSERT_EQ(4u, ins->SizeInCodeUnits());
+
+  ASSERT_TRUE(ins->HasVRegA());
+  ASSERT_EQ(4, ins->VRegA());
+  ASSERT_EQ(4u, ins->VRegA_4rcc());
+  ASSERT_EQ(4u, ins->VRegA_4rcc(instruction[0]));
+
+  ASSERT_TRUE(ins->HasVRegB());
+  ASSERT_EQ(16, ins->VRegB());
+  ASSERT_EQ(16u, ins->VRegB_4rcc());
+
+  ASSERT_TRUE(ins->HasVRegC());
+  ASSERT_EQ(0xcafe, ins->VRegC());
+  ASSERT_EQ(0xcafe, ins->VRegC_4rcc());
+
+  ASSERT_TRUE(ins->HasVRegH());
+  ASSERT_EQ(32, ins->VRegH());
+  ASSERT_EQ(32, ins->VRegH_4rcc());
+}
+
 }  // namespace art
diff --git a/runtime/dex_instruction_visitor.h b/runtime/dex_instruction_visitor.h
index 795b95b..42af6a9 100644
--- a/runtime/dex_instruction_visitor.h
+++ b/runtime/dex_instruction_visitor.h
@@ -32,7 +32,7 @@
     while (i < size_in_code_units) {
       const Instruction* inst = Instruction::At(&code[i]);
       switch (inst->Opcode()) {
-#define INSTRUCTION_CASE(o, cname, p, f, r, i, a, v)  \
+#define INSTRUCTION_CASE(o, cname, p, f, i, a, v)  \
         case Instruction::cname: {                    \
           derived->Do_ ## cname(inst);                \
           break;                                      \
@@ -50,7 +50,7 @@
 
  private:
   // Specific handlers for each instruction.
-#define INSTRUCTION_VISITOR(o, cname, p, f, r, i, a, v)    \
+#define INSTRUCTION_VISITOR(o, cname, p, f, i, a, v)    \
   void Do_ ## cname(const Instruction* inst) {             \
     T* derived = static_cast<T*>(this);                    \
     derived->Do_Default(inst);                             \
diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
index b12b118..c045e84 100644
--- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
@@ -31,7 +31,7 @@
   // A class may be accessing another class' fields when it doesn't have access, as access has been
   // given by inheritance.
   ScopedQuickEntrypointChecks sqec(self);
-  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kRefsOnly);
+  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly);
   return ResolveVerifyAndClinit(type_idx, caller, self, true, false);
 }
 
@@ -39,7 +39,7 @@
     SHARED_REQUIRES(Locks::mutator_lock_) {
   // Called when method->dex_cache_resolved_types_[] misses.
   ScopedQuickEntrypointChecks sqec(self);
-  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kRefsOnly);
+  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly);
   return ResolveVerifyAndClinit(type_idx, caller, self, false, false);
 }
 
@@ -48,14 +48,14 @@
   // Called when caller isn't guaranteed to have access to a type and the dex cache may be
   // unpopulated.
   ScopedQuickEntrypointChecks sqec(self);
-  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kRefsOnly);
+  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly);
   return ResolveVerifyAndClinit(type_idx, caller, self, false, true);
 }
 
 extern "C" mirror::String* artResolveStringFromCode(int32_t string_idx, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kRefsOnly);
+  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly);
   return ResolveStringFromCode(caller, string_idx);
 }
 
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
index b5e560f..82d5467 100644
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
@@ -56,7 +56,7 @@
   CHECK(!self->IsExceptionPending()) << "Enter instrumentation exit stub with pending exception "
                                      << self->GetException()->Dump();
   // Compute address of return PC and sanity check that it currently holds 0.
-  size_t return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kRefsOnly);
+  size_t return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kSaveRefsOnly);
   uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(sp) +
                                                       return_pc_offset);
   CHECK_EQ(*return_pc, 0U);
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 9678079..c67379a 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -46,7 +46,7 @@
   static constexpr size_t kBytesStackArgLocation = 4;
   // Frame size in bytes of a callee-save frame for RefsAndArgs.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize =
-      GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kRefsAndArgs);
+      GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kSaveRefsAndArgs);
 #if defined(__arm__)
   // The callee save frame is pointed to by SP.
   // | argN       |  |
@@ -75,11 +75,11 @@
   static constexpr size_t kNumQuickFprArgs = kArm32QuickCodeUseSoftFloat ? 0 : 16;
   static constexpr bool kGprFprLockstep = false;
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset =
-      arm::ArmCalleeSaveFpr1Offset(Runtime::kRefsAndArgs);  // Offset of first FPR arg.
+      arm::ArmCalleeSaveFpr1Offset(Runtime::kSaveRefsAndArgs);  // Offset of first FPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset =
-      arm::ArmCalleeSaveGpr1Offset(Runtime::kRefsAndArgs);  // Offset of first GPR arg.
+      arm::ArmCalleeSaveGpr1Offset(Runtime::kSaveRefsAndArgs);  // Offset of first GPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset =
-      arm::ArmCalleeSaveLrOffset(Runtime::kRefsAndArgs);  // Offset of return address.
+      arm::ArmCalleeSaveLrOffset(Runtime::kSaveRefsAndArgs);  // Offset of return address.
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA);
   }
@@ -113,11 +113,11 @@
   static constexpr size_t kNumQuickFprArgs = 8;  // 8 arguments passed in FPRs.
   static constexpr bool kGprFprLockstep = false;
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset =
-      arm64::Arm64CalleeSaveFpr1Offset(Runtime::kRefsAndArgs);  // Offset of first FPR arg.
+      arm64::Arm64CalleeSaveFpr1Offset(Runtime::kSaveRefsAndArgs);  // Offset of first FPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset =
-      arm64::Arm64CalleeSaveGpr1Offset(Runtime::kRefsAndArgs);  // Offset of first GPR arg.
+      arm64::Arm64CalleeSaveGpr1Offset(Runtime::kSaveRefsAndArgs);  // Offset of first GPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset =
-      arm64::Arm64CalleeSaveLrOffset(Runtime::kRefsAndArgs);  // Offset of return address.
+      arm64::Arm64CalleeSaveLrOffset(Runtime::kSaveRefsAndArgs);  // Offset of return address.
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA);
   }
@@ -307,7 +307,7 @@
 
   static ArtMethod* GetCallingMethod(ArtMethod** sp) SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK((*sp)->IsCalleeSaveMethod());
-    return GetCalleeSaveMethodCaller(sp, Runtime::kRefsAndArgs);
+    return GetCalleeSaveMethodCaller(sp, Runtime::kSaveRefsAndArgs);
   }
 
   static ArtMethod* GetOuterMethod(ArtMethod** sp) SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -319,7 +319,7 @@
 
   static uint32_t GetCallingDexPc(ArtMethod** sp) SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK((*sp)->IsCalleeSaveMethod());
-    const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kRefsAndArgs);
+    const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kSaveRefsAndArgs);
     ArtMethod** caller_sp = reinterpret_cast<ArtMethod**>(
         reinterpret_cast<uintptr_t>(sp) + callee_frame_size);
     uintptr_t outer_pc = QuickArgumentVisitor::GetCallingPc(sp);
@@ -2054,7 +2054,7 @@
 static TwoWordReturn artInvokeCommon(uint32_t method_idx, mirror::Object* this_object, Thread* self,
                                      ArtMethod** sp) {
   ScopedQuickEntrypointChecks sqec(self);
-  DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs));
+  DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs));
   ArtMethod* caller_method = QuickArgumentVisitor::GetCallingMethod(sp);
   ArtMethod* method = FindMethodFast(method_idx, this_object, caller_method, access_check, type);
   if (UNLIKELY(method == nullptr)) {
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
index 01e22a4..553c092 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
@@ -80,10 +80,16 @@
 // This test ensures that kQuickCalleeSaveFrame_RefAndArgs_FrameSize is correct.
 TEST_F(QuickTrampolineEntrypointsTest, FrameSize) {
   // We have to use a define here as the callee_save_frame.h functions are constexpr.
-#define CHECK_FRAME_SIZE(isa)                                                                     \
-  CheckFrameSize(isa, Runtime::kRefsAndArgs, GetCalleeSaveFrameSize(isa, Runtime::kRefsAndArgs)); \
-  CheckFrameSize(isa, Runtime::kRefsOnly, GetCalleeSaveFrameSize(isa, Runtime::kRefsOnly));       \
-  CheckFrameSize(isa, Runtime::kSaveAll, GetCalleeSaveFrameSize(isa, Runtime::kSaveAll))
+#define CHECK_FRAME_SIZE(isa)                                                 \
+  CheckFrameSize(isa,                                                         \
+                 Runtime::kSaveRefsAndArgs,                                   \
+                 GetCalleeSaveFrameSize(isa, Runtime::kSaveRefsAndArgs));     \
+  CheckFrameSize(isa,                                                         \
+                 Runtime::kSaveRefsOnly,                                      \
+                 GetCalleeSaveFrameSize(isa, Runtime::kSaveRefsOnly));        \
+  CheckFrameSize(isa,                                                         \
+                 Runtime::kSaveAllCalleeSaves,                                \
+                 GetCalleeSaveFrameSize(isa, Runtime::kSaveAllCalleeSaves))
 
   CHECK_FRAME_SIZE(kArm);
   CHECK_FRAME_SIZE(kArm64);
@@ -108,12 +114,12 @@
   // Ensure that the computation in callee_save_frame.h correct.
   // Note: we can only check against the kRuntimeISA, because the ArtMethod computation uses
   // sizeof(void*), which is wrong when the target bitwidth is not the same as the host's.
-  CheckPCOffset(kRuntimeISA, Runtime::kRefsAndArgs,
-                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kRefsAndArgs));
-  CheckPCOffset(kRuntimeISA, Runtime::kRefsOnly,
-                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kRefsOnly));
-  CheckPCOffset(kRuntimeISA, Runtime::kSaveAll,
-                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kSaveAll));
+  CheckPCOffset(kRuntimeISA, Runtime::kSaveRefsAndArgs,
+                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kSaveRefsAndArgs));
+  CheckPCOffset(kRuntimeISA, Runtime::kSaveRefsOnly,
+                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kSaveRefsOnly));
+  CheckPCOffset(kRuntimeISA, Runtime::kSaveAllCalleeSaves,
+                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kSaveAllCalleeSaves));
 }
 
 }  // namespace art
diff --git a/runtime/experimental_flags.h b/runtime/experimental_flags.h
index 198f3fa..fde1a5f 100644
--- a/runtime/experimental_flags.h
+++ b/runtime/experimental_flags.h
@@ -26,7 +26,6 @@
   // The actual flag values.
   enum {
     kNone           = 0x0000,
-    kLambdas        = 0x0001,
   };
 
   constexpr ExperimentalFlags() : value_(0x0000) {}
@@ -62,15 +61,9 @@
   uint32_t value_;
 };
 
-inline std::ostream& operator<<(std::ostream& stream, const ExperimentalFlags& e) {
-  bool started = false;
-  if (e & ExperimentalFlags::kLambdas) {
-    stream << (started ? "|" : "") << "kLambdas";
-    started = true;
-  }
-  if (!started) {
-    stream << "kNone";
-  }
+inline std::ostream& operator<<(std::ostream& stream,
+                                const ExperimentalFlags& e ATTRIBUTE_UNUSED) {
+  stream << "kNone";
   return stream;
 }
 
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
index 4019a5b..fb774a4 100644
--- a/runtime/gc/collector/concurrent_copying-inl.h
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -154,11 +154,30 @@
 }
 
 inline mirror::Object* ConcurrentCopying::MarkFromReadBarrier(mirror::Object* from_ref) {
+  mirror::Object* ret;
+  // TODO: Delete GetMarkBit check when all of the callers properly check the bit. Remaining caller
+  // is array allocations.
+  if (from_ref == nullptr || from_ref->GetMarkBit()) {
+    return from_ref;
+  }
   // TODO: Consider removing this check when we are done investigating slow paths. b/30162165
   if (UNLIKELY(mark_from_read_barrier_measurements_)) {
-    return MarkFromReadBarrierWithMeasurements(from_ref);
+    ret = MarkFromReadBarrierWithMeasurements(from_ref);
+  } else {
+    ret = Mark(from_ref);
   }
-  return Mark(from_ref);
+  // Only set the mark bit for baker barrier.
+  if (kUseBakerReadBarrier && LIKELY(!rb_mark_bit_stack_full_ && ret->AtomicSetMarkBit(0, 1))) {
+    // If the mark stack is full, we may temporarily go to mark and back to unmarked. Seeing both
+    // values are OK since the only race is doing an unnecessary Mark.
+    if (!rb_mark_bit_stack_->AtomicPushBack(ret)) {
+      // Mark stack is full, set the bit back to zero.
+      CHECK(ret->AtomicSetMarkBit(1, 0));
+      // Set rb_mark_bit_stack_full_, this is racy but OK since AtomicPushBack is thread safe.
+      rb_mark_bit_stack_full_ = true;
+    }
+  }
+  return ret;
 }
 
 inline mirror::Object* ConcurrentCopying::GetFwdPtr(mirror::Object* from_ref) {
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index d7221e4..071537d 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -42,9 +42,6 @@
 namespace collector {
 
 static constexpr size_t kDefaultGcMarkStackSize = 2 * MB;
-// If kGrayDirtyImmuneObjects is true then we gray dirty objects in the GC pause to prevent dirty
-// pages.
-static constexpr bool kGrayDirtyImmuneObjects = true;
 // If kFilterModUnionCards then we attempt to filter cards that don't need to be dirty in the mod
 // union table. Disabled since it does not seem to help the pause much.
 static constexpr bool kFilterModUnionCards = kIsDebugBuild;
@@ -52,6 +49,9 @@
 // ConcurrentCopying::Scan. May be used to diagnose possibly unnecessary read barriers.
 // Only enabled for kIsDebugBuild to avoid performance hit.
 static constexpr bool kDisallowReadBarrierDuringScan = kIsDebugBuild;
+// Slow path mark stack size, increase this if the stack is getting full and it is causing
+// performance problems.
+static constexpr size_t kReadBarrierMarkStackSize = 512 * KB;
 
 ConcurrentCopying::ConcurrentCopying(Heap* heap,
                                      const std::string& name_prefix,
@@ -63,6 +63,10 @@
       gc_mark_stack_(accounting::ObjectStack::Create("concurrent copying gc mark stack",
                                                      kDefaultGcMarkStackSize,
                                                      kDefaultGcMarkStackSize)),
+      rb_mark_bit_stack_(accounting::ObjectStack::Create("rb copying gc mark stack",
+                                                         kReadBarrierMarkStackSize,
+                                                         kReadBarrierMarkStackSize)),
+      rb_mark_bit_stack_full_(false),
       mark_stack_lock_("concurrent copying mark stack lock", kMarkSweepMarkStackLock),
       thread_running_gc_(nullptr),
       is_marking_(false), is_active_(false), is_asserting_to_space_invariant_(false),
@@ -187,6 +191,7 @@
     CHECK(false_gray_stack_.empty());
   }
 
+  rb_mark_bit_stack_full_ = false;
   mark_from_read_barrier_measurements_ = measure_read_barrier_slow_path_;
   if (measure_read_barrier_slow_path_) {
     rb_slow_path_ns_.StoreRelaxed(0);
@@ -914,9 +919,9 @@
     }
     collector_->AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
     if (kUseBakerReadBarrier) {
-      CHECK(ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr())
+      CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
           << "Ref " << ref << " " << PrettyTypeOf(ref)
-          << " has non-white rb_ptr " << ref->GetReadBarrierPointer();
+          << " has non-white rb_ptr ";
     }
   }
 
@@ -982,7 +987,7 @@
     VerifyNoFromSpaceRefsFieldVisitor visitor(collector);
     obj->VisitReferences(visitor, visitor);
     if (kUseBakerReadBarrier) {
-      CHECK(obj->GetReadBarrierPointer() == ReadBarrier::WhitePtr())
+      CHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
           << "obj=" << obj << " non-white rb_ptr " << obj->GetReadBarrierPointer();
     }
   }
@@ -2243,6 +2248,15 @@
         }
       }
     }
+    if (kUseBakerReadBarrier) {
+      TimingLogger::ScopedTiming split("EmptyRBMarkBitStack", GetTimings());
+      DCHECK(rb_mark_bit_stack_.get() != nullptr);
+      const auto* limit = rb_mark_bit_stack_->End();
+      for (StackReference<mirror::Object>* it = rb_mark_bit_stack_->Begin(); it != limit; ++it) {
+        CHECK(it->AsMirrorPtr()->AtomicSetMarkBit(1, 0));
+      }
+      rb_mark_bit_stack_->Reset();
+    }
   }
   if (measure_read_barrier_slow_path_) {
     MutexLock mu(self, rb_slow_path_histogram_lock_);
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 72112fa..a862802 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -57,6 +57,9 @@
   static constexpr bool kEnableFromSpaceAccountingCheck = kIsDebugBuild;
   // Enable verbose mode.
   static constexpr bool kVerboseMode = false;
+  // If kGrayDirtyImmuneObjects is true then we gray dirty objects in the GC pause to prevent dirty
+  // pages.
+  static constexpr bool kGrayDirtyImmuneObjects = true;
 
   ConcurrentCopying(Heap* heap,
                     const std::string& name_prefix = "",
@@ -230,6 +233,8 @@
   space::RegionSpace* region_space_;      // The underlying region space.
   std::unique_ptr<Barrier> gc_barrier_;
   std::unique_ptr<accounting::ObjectStack> gc_mark_stack_;
+  std::unique_ptr<accounting::ObjectStack> rb_mark_bit_stack_;
+  bool rb_mark_bit_stack_full_;
   std::vector<mirror::Object*> false_gray_stack_ GUARDED_BY(mark_stack_lock_);
   Mutex mark_stack_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::vector<accounting::ObjectStack*> revoked_mark_stacks_
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index a92cb24..88fbf78 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -257,6 +257,7 @@
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() entering";
   }
+  CHECK_GE(large_object_threshold, kMinLargeObjectThreshold);
   ScopedTrace trace(__FUNCTION__);
   Runtime* const runtime = Runtime::Current();
   // If we aren't the zygote, switch to the default non zygote allocator. This may update the
@@ -2538,6 +2539,17 @@
   AddSpace(zygote_space_);
   non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
   AddSpace(non_moving_space_);
+  if (kUseBakerReadBarrier && gc::collector::ConcurrentCopying::kGrayDirtyImmuneObjects) {
+    // Treat all of the objects in the zygote as marked to avoid unnecessary dirty pages. This is
+    // safe since we mark all of the objects that may reference non immune objects as gray.
+    zygote_space_->GetLiveBitmap()->VisitMarkedRange(
+        reinterpret_cast<uintptr_t>(zygote_space_->Begin()),
+        reinterpret_cast<uintptr_t>(zygote_space_->Limit()),
+        [](mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
+      CHECK(obj->AtomicSetMarkBit(0, 1));
+    });
+  }
+
   // Create the zygote space mod union table.
   accounting::ModUnionTable* mod_union_table =
       new accounting::ModUnionTableCardCache("zygote space mod-union table", this,
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index bb0d11a..be8ed40 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -132,7 +132,8 @@
   static constexpr double kDefaultTargetUtilization = 0.5;
   static constexpr double kDefaultHeapGrowthMultiplier = 2.0;
   // Primitive arrays larger than this size are put in the large object space.
-  static constexpr size_t kDefaultLargeObjectThreshold = 3 * kPageSize;
+  static constexpr size_t kMinLargeObjectThreshold = 3 * kPageSize;
+  static constexpr size_t kDefaultLargeObjectThreshold = kMinLargeObjectThreshold;
   // Whether or not parallel GC is enabled. If not, then we never create the thread pool.
   static constexpr bool kDefaultEnableParallelGC = false;
 
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index d140b75..c2e2a1e 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -1430,12 +1430,14 @@
              image_header->GetImageMethod(ImageHeader::kImtConflictMethod));
     CHECK_EQ(runtime->GetImtUnimplementedMethod(),
              image_header->GetImageMethod(ImageHeader::kImtUnimplementedMethod));
-    CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kSaveAll),
-             image_header->GetImageMethod(ImageHeader::kCalleeSaveMethod));
-    CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kRefsOnly),
-             image_header->GetImageMethod(ImageHeader::kRefsOnlySaveMethod));
-    CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs),
-             image_header->GetImageMethod(ImageHeader::kRefsAndArgsSaveMethod));
+    CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kSaveAllCalleeSaves),
+             image_header->GetImageMethod(ImageHeader::kSaveAllCalleeSavesMethod));
+    CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kSaveRefsOnly),
+             image_header->GetImageMethod(ImageHeader::kSaveRefsOnlyMethod));
+    CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs),
+             image_header->GetImageMethod(ImageHeader::kSaveRefsAndArgsMethod));
+    CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kSaveEverything),
+             image_header->GetImageMethod(ImageHeader::kSaveEverythingMethod));
   } else if (!runtime->HasResolutionMethod()) {
     runtime->SetInstructionSet(space->oat_file_non_owned_->GetOatHeader().GetInstructionSet());
     runtime->SetResolutionMethod(image_header->GetImageMethod(ImageHeader::kResolutionMethod));
@@ -1443,11 +1445,15 @@
     runtime->SetImtUnimplementedMethod(
         image_header->GetImageMethod(ImageHeader::kImtUnimplementedMethod));
     runtime->SetCalleeSaveMethod(
-        image_header->GetImageMethod(ImageHeader::kCalleeSaveMethod), Runtime::kSaveAll);
+        image_header->GetImageMethod(ImageHeader::kSaveAllCalleeSavesMethod),
+        Runtime::kSaveAllCalleeSaves);
     runtime->SetCalleeSaveMethod(
-        image_header->GetImageMethod(ImageHeader::kRefsOnlySaveMethod), Runtime::kRefsOnly);
+        image_header->GetImageMethod(ImageHeader::kSaveRefsOnlyMethod), Runtime::kSaveRefsOnly);
     runtime->SetCalleeSaveMethod(
-        image_header->GetImageMethod(ImageHeader::kRefsAndArgsSaveMethod), Runtime::kRefsAndArgs);
+        image_header->GetImageMethod(ImageHeader::kSaveRefsAndArgsMethod),
+        Runtime::kSaveRefsAndArgs);
+    runtime->SetCalleeSaveMethod(
+        image_header->GetImageMethod(ImageHeader::kSaveEverythingMethod), Runtime::kSaveEverything);
   }
 
   VLOG(image) << "ImageSpace::Init exiting " << *space.get();
diff --git a/runtime/gc_root.h b/runtime/gc_root.h
index 3734bcc..0304d0d 100644
--- a/runtime/gc_root.h
+++ b/runtime/gc_root.h
@@ -195,7 +195,8 @@
     return root_.IsNull();
   }
 
-  ALWAYS_INLINE GcRoot(MirrorType* ref = nullptr) SHARED_REQUIRES(Locks::mutator_lock_);
+  ALWAYS_INLINE GcRoot() {}
+  explicit ALWAYS_INLINE GcRoot(MirrorType* ref) SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
   // Root visitors take pointers to root_ and place them in CompressedReference** arrays. We use a
diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h
index 5d62b59..9692472 100644
--- a/runtime/generated/asm_support_gen.h
+++ b/runtime/generated/asm_support_gen.h
@@ -26,12 +26,14 @@
 DEFINE_CHECK_EQ(static_cast<size_t>(COMPRESSED_REFERENCE_SIZE), (static_cast<size_t>(sizeof(art::mirror::CompressedReference<art::mirror::Object>))))
 #define COMPRESSED_REFERENCE_SIZE_SHIFT 0x2
 DEFINE_CHECK_EQ(static_cast<size_t>(COMPRESSED_REFERENCE_SIZE_SHIFT), (static_cast<size_t>(art::WhichPowerOf2(sizeof(art::mirror::CompressedReference<art::mirror::Object>)))))
-#define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 0
-DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kSaveAll))))
-#define RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET 0x8
-DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kRefsOnly))))
-#define RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET 0x10
-DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kRefsAndArgs))))
+#define RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET 0
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kSaveAllCalleeSaves))))
+#define RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET 0x8
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kSaveRefsOnly))))
+#define RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET 0x10
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kSaveRefsAndArgs))))
+#define RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET 0x18
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kSaveEverything))))
 #define THREAD_FLAGS_OFFSET 0
 DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_FLAGS_OFFSET), (static_cast<int32_t>(art::Thread:: ThreadFlagsOffset<art::kRuntimePointerSize>().Int32Value())))
 #define THREAD_ID_OFFSET 12
@@ -68,18 +70,30 @@
 DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_QUICK_CODE_OFFSET_32), (static_cast<int32_t>(art::ArtMethod:: EntryPointFromQuickCompiledCodeOffset(art::PointerSize::k32).Int32Value())))
 #define ART_METHOD_QUICK_CODE_OFFSET_64 48
 DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_QUICK_CODE_OFFSET_64), (static_cast<int32_t>(art::ArtMethod:: EntryPointFromQuickCompiledCodeOffset(art::PointerSize::k64).Int32Value())))
+#define MIN_LARGE_OBJECT_THRESHOLD 0x3000
+DEFINE_CHECK_EQ(static_cast<size_t>(MIN_LARGE_OBJECT_THRESHOLD), (static_cast<size_t>(art::gc::Heap::kMinLargeObjectThreshold)))
 #define LOCK_WORD_STATE_SHIFT 30
 DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_STATE_SHIFT), (static_cast<int32_t>(art::LockWord::kStateShift)))
 #define LOCK_WORD_STATE_MASK 0xc0000000
 DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_STATE_MASK), (static_cast<uint32_t>(art::LockWord::kStateMaskShifted)))
 #define LOCK_WORD_READ_BARRIER_STATE_SHIFT 28
 DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_READ_BARRIER_STATE_SHIFT), (static_cast<int32_t>(art::LockWord::kReadBarrierStateShift)))
-#define LOCK_WORD_READ_BARRIER_STATE_MASK 0x30000000
+#define LOCK_WORD_READ_BARRIER_STATE_MASK 0x10000000
 DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_READ_BARRIER_STATE_MASK), (static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShifted)))
-#define LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED 0xcfffffff
+#define LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED 0xefffffff
 DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), (static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShiftedToggled)))
 #define LOCK_WORD_THIN_LOCK_COUNT_ONE 65536
 DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_THIN_LOCK_COUNT_ONE), (static_cast<int32_t>(art::LockWord::kThinLockCountOne)))
+#define LOCK_WORD_GC_STATE_MASK_SHIFTED 0x30000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_GC_STATE_MASK_SHIFTED), (static_cast<uint32_t>(art::LockWord::kGCStateMaskShifted)))
+#define LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED 0xcfffffff
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), (static_cast<uint32_t>(art::LockWord::kGCStateMaskShiftedToggled)))
+#define LOCK_WORD_GC_STATE_SHIFT 28
+DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_GC_STATE_SHIFT), (static_cast<int32_t>(art::LockWord::kGCStateShift)))
+#define LOCK_WORD_MARK_BIT_SHIFT 29
+DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_MARK_BIT_SHIFT), (static_cast<int32_t>(art::LockWord::kMarkBitStateShift)))
+#define LOCK_WORD_MARK_BIT_MASK_SHIFTED 0x20000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_MARK_BIT_MASK_SHIFTED), (static_cast<uint32_t>(art::LockWord::kMarkBitStateMaskShifted)))
 #define OBJECT_ALIGNMENT_MASK 0x7
 DEFINE_CHECK_EQ(static_cast<size_t>(OBJECT_ALIGNMENT_MASK), (static_cast<size_t>(art::kObjectAlignment - 1)))
 #define OBJECT_ALIGNMENT_MASK_TOGGLED 0xfffffff8
diff --git a/runtime/globals.h b/runtime/globals.h
index 0b44c47..9045d40 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -47,7 +47,8 @@
 }
 
 // Required object alignment
-static constexpr size_t kObjectAlignment = 8;
+static constexpr size_t kObjectAlignmentShift = 3;
+static constexpr size_t kObjectAlignment = 1u << kObjectAlignmentShift;
 static constexpr size_t kLargeObjectAlignment = kPageSize;
 
 // Whether or not this is a debug build. Useful in conditionals where NDEBUG isn't.
diff --git a/runtime/image.h b/runtime/image.h
index a98cea1..9ff18d6 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -183,9 +183,10 @@
     kResolutionMethod,
     kImtConflictMethod,
     kImtUnimplementedMethod,
-    kCalleeSaveMethod,
-    kRefsOnlySaveMethod,
-    kRefsAndArgsSaveMethod,
+    kSaveAllCalleeSavesMethod,
+    kSaveRefsOnlyMethod,
+    kSaveRefsAndArgsMethod,
+    kSaveEverythingMethod,
     kImageMethodsCount,  // Number of elements in enum.
   };
 
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 61ffe44..4a86e36 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -356,7 +356,7 @@
             LOG(INFO) << "  Removing exit stub in " << DescribeLocation();
           }
           if (instrumentation_frame.interpreter_entry_) {
-            CHECK(m == Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs));
+            CHECK(m == Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs));
           } else {
             CHECK(m == instrumentation_frame.method_) << PrettyMethod(m);
           }
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 11b7ef4..ac146b3 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -30,9 +30,6 @@
 namespace art {
 namespace interpreter {
 
-// All lambda closures have to be a consecutive pair of virtual registers.
-static constexpr size_t kLambdaVirtualRegisterWidth = 2;
-
 void ThrowNullPointerExceptionFromInterpreter() {
   ThrowNullPointerExceptionFromDexPC();
 }
@@ -732,7 +729,6 @@
 
     // Fast path: no extra checks.
     if (is_range) {
-      // TODO: Implement the range version of invoke-lambda
       uint16_t first_src_reg = vregC;
 
       for (size_t src_reg = first_src_reg, dest_reg = first_dest_reg; dest_reg < num_regs;
@@ -772,34 +768,6 @@
 }
 
 template<bool is_range, bool do_assignability_check>
-bool DoLambdaCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
-                  const Instruction* inst, uint16_t inst_data ATTRIBUTE_UNUSED, JValue* result) {
-  const uint4_t num_additional_registers = inst->VRegB_25x();
-  // Argument word count.
-  const uint16_t number_of_inputs = num_additional_registers + kLambdaVirtualRegisterWidth;
-  // The lambda closure register is always present and is not encoded in the count.
-  // Furthermore, the lambda closure register is always wide, so it counts as 2 inputs.
-
-  // TODO: find a cleaner way to separate non-range and range information without duplicating
-  //       code.
-  uint32_t arg[Instruction::kMaxVarArgRegs25x];  // only used in invoke-XXX.
-  uint32_t vregC = 0;   // only used in invoke-XXX-range.
-  if (is_range) {
-    vregC = inst->VRegC_3rc();
-  } else {
-    // TODO(iam): See if it's possible to remove inst_data dependency from 35x to avoid this path
-    inst->GetAllArgs25x(arg);
-  }
-
-  // TODO: if there's an assignability check, throw instead?
-  DCHECK(called_method->IsStatic());
-
-  return DoCallCommon<is_range, do_assignability_check>(
-      called_method, self, shadow_frame,
-      result, number_of_inputs, arg, vregC);
-}
-
-template<bool is_range, bool do_assignability_check>
 bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
             const Instruction* inst, uint16_t inst_data, JValue* result) {
   // Argument word count.
@@ -947,20 +915,6 @@
 EXPLICIT_DO_CALL_TEMPLATE_DECL(true, true);
 #undef EXPLICIT_DO_CALL_TEMPLATE_DECL
 
-// Explicit DoLambdaCall template function declarations.
-#define EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(_is_range, _do_assignability_check)               \
-  template SHARED_REQUIRES(Locks::mutator_lock_)                                                \
-  bool DoLambdaCall<_is_range, _do_assignability_check>(ArtMethod* method, Thread* self,        \
-                                                        ShadowFrame& shadow_frame,              \
-                                                        const Instruction* inst,                \
-                                                        uint16_t inst_data,                     \
-                                                        JValue* result)
-EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(false, false);
-EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(false, true);
-EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(true, false);
-EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(true, true);
-#undef EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL
-
 // Explicit DoFilledNewArray template function declarations.
 #define EXPLICIT_DO_FILLED_NEW_ARRAY_TEMPLATE_DECL(_is_range_, _check, _transaction_active)       \
   template SHARED_REQUIRES(Locks::mutator_lock_)                                                  \
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 174d4e0..4fd1514 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -36,14 +36,7 @@
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "handle_scope-inl.h"
 #include "jit/jit.h"
-#include "lambda/art_lambda_method.h"
-#include "lambda/box_table.h"
-#include "lambda/closure.h"
-#include "lambda/closure_builder-inl.h"
-#include "lambda/leaking_allocator.h"
-#include "lambda/shorty_field_type.h"
 #include "mirror/class-inl.h"
-#include "mirror/method.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/string-inl.h"
@@ -142,488 +135,7 @@
 bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
             const Instruction* inst, uint16_t inst_data, JValue* result);
 
-// Invokes the given lambda closure. This is part of the invocation support and is used by
-// DoLambdaInvoke functions.
-// Returns true on success, otherwise throws an exception and returns false.
-template<bool is_range, bool do_assignability_check>
-bool DoLambdaCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
-                  const Instruction* inst, uint16_t inst_data, JValue* result);
-
-// Validates that the art method corresponding to a lambda method target
-// is semantically valid:
-//
-// Must be ACC_STATIC and ACC_LAMBDA. Must be a concrete managed implementation
-// (i.e. not native, not proxy, not abstract, ...).
-//
-// If the validation fails, return false and raise an exception.
-static inline bool IsValidLambdaTargetOrThrow(ArtMethod* called_method)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  bool success = false;
-
-  if (UNLIKELY(called_method == nullptr)) {
-    // The shadow frame should already be pushed, so we don't need to update it.
-  } else if (UNLIKELY(!called_method->IsInvokable())) {
-    called_method->ThrowInvocationTimeError();
-    // We got an error.
-    // TODO(iam): Also handle the case when the method is non-static, what error do we throw?
-    // TODO(iam): Also make sure that ACC_LAMBDA is set.
-  } else if (UNLIKELY(called_method->GetCodeItem() == nullptr)) {
-    // Method could be native, proxy method, etc. Lambda targets have to be concrete impls,
-    // so don't allow this.
-  } else {
-    success = true;
-  }
-
-  return success;
-}
-
-// Write out the 'Closure*' into vreg and vreg+1, as if it was a jlong.
-static inline void WriteLambdaClosureIntoVRegs(ShadowFrame& shadow_frame,
-                                               const lambda::Closure& lambda_closure,
-                                               uint32_t vreg) {
-  // Split the method into a lo and hi 32 bits so we can encode them into 2 virtual registers.
-  uint32_t closure_lo = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(&lambda_closure));
-  uint32_t closure_hi = static_cast<uint32_t>(reinterpret_cast<uint64_t>(&lambda_closure)
-                                                    >> BitSizeOf<uint32_t>());
-  // Use uint64_t instead of uintptr_t to allow shifting past the max on 32-bit.
-  static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible");
-
-  DCHECK_NE(closure_lo | closure_hi, 0u);
-
-  shadow_frame.SetVReg(vreg, closure_lo);
-  shadow_frame.SetVReg(vreg + 1, closure_hi);
-}
-
-// Handles create-lambda instructions.
-// Returns true on success, otherwise throws an exception and returns false.
-// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
-//
-// The closure must be allocated big enough to hold the data, and should not be
-// pre-initialized. It is initialized with the actual captured variables as a side-effect,
-// although this should be unimportant to the caller since this function also handles storing it to
-// the ShadowFrame.
-//
-// As a work-in-progress implementation, this shoves the ArtMethod object corresponding
-// to the target dex method index into the target register vA and vA + 1.
-template<bool do_access_check>
-static inline bool DoCreateLambda(Thread* self,
-                                  const Instruction* inst,
-                                  /*inout*/ShadowFrame& shadow_frame,
-                                  /*inout*/lambda::ClosureBuilder* closure_builder,
-                                  /*inout*/lambda::Closure* uninitialized_closure) {
-  DCHECK(closure_builder != nullptr);
-  DCHECK(uninitialized_closure != nullptr);
-  DCHECK_ALIGNED(uninitialized_closure, alignof(lambda::Closure));
-
-  using lambda::ArtLambdaMethod;
-  using lambda::LeakingAllocator;
-
-  /*
-   * create-lambda is opcode 0x21c
-   * - vA is the target register where the closure will be stored into
-   *   (also stores into vA + 1)
-   * - vB is the method index which will be the target for a later invoke-lambda
-   */
-  const uint32_t method_idx = inst->VRegB_21c();
-  mirror::Object* receiver = nullptr;  // Always static. (see 'kStatic')
-  ArtMethod* sf_method = shadow_frame.GetMethod();
-  ArtMethod* const called_method = FindMethodFromCode<kStatic, do_access_check>(
-      method_idx, &receiver, sf_method, self);
-
-  uint32_t vreg_dest_closure = inst->VRegA_21c();
-
-  if (UNLIKELY(!IsValidLambdaTargetOrThrow(called_method))) {
-    CHECK(self->IsExceptionPending());
-    shadow_frame.SetVReg(vreg_dest_closure, 0u);
-    shadow_frame.SetVReg(vreg_dest_closure + 1, 0u);
-    return false;
-  }
-
-  ArtLambdaMethod* initialized_lambda_method;
-  // Initialize the ArtLambdaMethod with the right data.
-  {
-    // Allocate enough memory to store a well-aligned ArtLambdaMethod.
-    // This is not the final type yet since the data starts out uninitialized.
-    LeakingAllocator::AlignedMemoryStorage<ArtLambdaMethod>* uninitialized_lambda_method =
-            LeakingAllocator::AllocateMemory<ArtLambdaMethod>(self);
-
-    std::string captured_variables_shorty = closure_builder->GetCapturedVariableShortyTypes();
-    std::string captured_variables_long_type_desc;
-
-    // Synthesize a long type descriptor from the short one.
-    for (char shorty : captured_variables_shorty) {
-      lambda::ShortyFieldType shorty_field_type(shorty);
-      if (shorty_field_type.IsObject()) {
-        // Not the true type, but good enough until we implement verifier support.
-        captured_variables_long_type_desc += "Ljava/lang/Object;";
-        UNIMPLEMENTED(FATAL) << "create-lambda with an object captured variable";
-      } else if (shorty_field_type.IsLambda()) {
-        // Not the true type, but good enough until we implement verifier support.
-        captured_variables_long_type_desc += "Ljava/lang/Runnable;";
-        UNIMPLEMENTED(FATAL) << "create-lambda with a lambda captured variable";
-      } else {
-        // The primitive types have the same length shorty or not, so this is always correct.
-        DCHECK(shorty_field_type.IsPrimitive());
-        captured_variables_long_type_desc += shorty_field_type;
-      }
-    }
-
-    // Copy strings to dynamically allocated storage. This leaks, but that's ok. Fix it later.
-    // TODO: Strings need to come from the DexFile, so they won't need their own allocations.
-    char* captured_variables_type_desc = LeakingAllocator::MakeFlexibleInstance<char>(
-        self,
-        captured_variables_long_type_desc.size() + 1);
-    strcpy(captured_variables_type_desc, captured_variables_long_type_desc.c_str());
-    char* captured_variables_shorty_copy = LeakingAllocator::MakeFlexibleInstance<char>(
-        self,
-        captured_variables_shorty.size() + 1);
-    strcpy(captured_variables_shorty_copy, captured_variables_shorty.c_str());
-
-    // After initialization, the object at the storage is well-typed. Use strong type going forward.
-    initialized_lambda_method =
-        new (uninitialized_lambda_method) ArtLambdaMethod(called_method,
-                                                          captured_variables_type_desc,
-                                                          captured_variables_shorty_copy,
-                                                          true);  // innate lambda
-  }
-
-  // Write all the closure captured variables and the closure header into the closure.
-  lambda::Closure* initialized_closure =
-      closure_builder->CreateInPlace(uninitialized_closure, initialized_lambda_method);
-
-  WriteLambdaClosureIntoVRegs(/*inout*/shadow_frame, *initialized_closure, vreg_dest_closure);
-  return true;
-}
-
-// Reads out the 'ArtMethod*' stored inside of vreg and vreg+1
-//
-// Validates that the art method points to a valid lambda function, otherwise throws
-// an exception and returns null.
-// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
-static inline lambda::Closure* ReadLambdaClosureFromVRegsOrThrow(ShadowFrame& shadow_frame,
-                                                                 uint32_t vreg)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  // Lambda closures take up a consecutive pair of 2 virtual registers.
-  // On 32-bit the high bits are always 0.
-  uint32_t vc_value_lo = shadow_frame.GetVReg(vreg);
-  uint32_t vc_value_hi = shadow_frame.GetVReg(vreg + 1);
-
-  uint64_t vc_value_ptr = (static_cast<uint64_t>(vc_value_hi) << BitSizeOf<uint32_t>())
-                           | vc_value_lo;
-
-  // Use uint64_t instead of uintptr_t to allow left-shifting past the max on 32-bit.
-  static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible");
-  lambda::Closure* const lambda_closure = reinterpret_cast<lambda::Closure*>(vc_value_ptr);
-  DCHECK_ALIGNED(lambda_closure, alignof(lambda::Closure));
-
-  // Guard against the user passing a null closure, which is odd but (sadly) semantically valid.
-  if (UNLIKELY(lambda_closure == nullptr)) {
-    ThrowNullPointerExceptionFromInterpreter();
-    return nullptr;
-  } else if (UNLIKELY(!IsValidLambdaTargetOrThrow(lambda_closure->GetTargetMethod()))) {
-    // Sanity check against data corruption.
-    return nullptr;
-  }
-
-  return lambda_closure;
-}
-
-// Forward declaration for lock annotations. See below for documentation.
-template <bool do_access_check>
-static inline const char* GetStringDataByDexStringIndexOrThrow(ShadowFrame& shadow_frame,
-                                                               uint32_t string_idx)
-    SHARED_REQUIRES(Locks::mutator_lock_);
-
-// Find the c-string data corresponding to a dex file's string index.
-// Otherwise, returns null if not found and throws a VerifyError.
-//
-// Note that with do_access_check=false, we never return null because the verifier
-// must guard against invalid string indices.
-// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
-template <bool do_access_check>
-static inline const char* GetStringDataByDexStringIndexOrThrow(ShadowFrame& shadow_frame,
-                                                               uint32_t string_idx) {
-  ArtMethod* method = shadow_frame.GetMethod();
-  const DexFile* dex_file = method->GetDexFile();
-
-  mirror::Class* declaring_class = method->GetDeclaringClass();
-  if (!do_access_check) {
-    // MethodVerifier refuses methods with string_idx out of bounds.
-    DCHECK_LT(string_idx, declaring_class->GetDexCache()->NumStrings());
-  } else {
-    // Access checks enabled: perform string index bounds ourselves.
-    if (string_idx >= dex_file->GetHeader().string_ids_size_) {
-      ThrowVerifyError(declaring_class, "String index '%" PRIu32 "' out of bounds",
-                       string_idx);
-      return nullptr;
-    }
-  }
-
-  const char* type_string = dex_file->StringDataByIdx(string_idx);
-
-  if (UNLIKELY(type_string == nullptr)) {
-    CHECK_EQ(false, do_access_check)
-        << " verifier should've caught invalid string index " << string_idx;
-    CHECK_EQ(true, do_access_check)
-        << " string idx size check should've caught invalid string index " << string_idx;
-  }
-
-  return type_string;
-}
-
-// Handles capture-variable instructions.
-// Returns true on success, otherwise throws an exception and returns false.
-// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
-template<bool do_access_check>
-static inline bool DoCaptureVariable(Thread* self,
-                                     const Instruction* inst,
-                                     /*inout*/ShadowFrame& shadow_frame,
-                                     /*inout*/lambda::ClosureBuilder* closure_builder) {
-  DCHECK(closure_builder != nullptr);
-  using lambda::ShortyFieldType;
-  /*
-   * capture-variable is opcode 0xf6, fmt 0x21c
-   * - vA is the source register of the variable that will be captured
-   * - vB is the string ID of the variable's type that will be captured
-   */
-  const uint32_t source_vreg = inst->VRegA_21c();
-  const uint32_t string_idx = inst->VRegB_21c();
-  // TODO: this should be a proper [type id] instead of a [string ID] pointing to a type.
-
-  const char* type_string = GetStringDataByDexStringIndexOrThrow<do_access_check>(shadow_frame,
-                                                                                  string_idx);
-  if (UNLIKELY(type_string == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    return false;
-  }
-
-  char type_first_letter = type_string[0];
-  ShortyFieldType shorty_type;
-  if (do_access_check &&
-      UNLIKELY(!ShortyFieldType::MaybeCreate(type_first_letter, /*out*/&shorty_type))) {  // NOLINT: [whitespace/comma] [3]
-    ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(),
-                     "capture-variable vB must be a valid type");
-    return false;
-  } else {
-    // Already verified that the type is valid.
-    shorty_type = ShortyFieldType(type_first_letter);
-  }
-
-  const size_t captured_variable_count = closure_builder->GetCaptureCount();
-
-  // Note: types are specified explicitly so that the closure is packed tightly.
-  switch (shorty_type) {
-    case ShortyFieldType::kBoolean: {
-      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
-      closure_builder->CaptureVariablePrimitive<bool>(primitive_narrow_value);
-      break;
-    }
-    case ShortyFieldType::kByte: {
-      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
-      closure_builder->CaptureVariablePrimitive<int8_t>(primitive_narrow_value);
-      break;
-    }
-    case ShortyFieldType::kChar: {
-      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
-      closure_builder->CaptureVariablePrimitive<uint16_t>(primitive_narrow_value);
-      break;
-    }
-    case ShortyFieldType::kShort: {
-      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
-      closure_builder->CaptureVariablePrimitive<int16_t>(primitive_narrow_value);
-      break;
-    }
-    case ShortyFieldType::kInt: {
-      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
-      closure_builder->CaptureVariablePrimitive<int32_t>(primitive_narrow_value);
-      break;
-    }
-    case ShortyFieldType::kDouble: {
-      closure_builder->CaptureVariablePrimitive(shadow_frame.GetVRegDouble(source_vreg));
-      break;
-    }
-    case ShortyFieldType::kFloat: {
-      closure_builder->CaptureVariablePrimitive(shadow_frame.GetVRegFloat(source_vreg));
-      break;
-    }
-    case ShortyFieldType::kLambda: {
-      UNIMPLEMENTED(FATAL) << " capture-variable with type kLambda";
-      // TODO: Capturing lambdas recursively will be done at a later time.
-      UNREACHABLE();
-    }
-    case ShortyFieldType::kLong: {
-      closure_builder->CaptureVariablePrimitive(shadow_frame.GetVRegLong(source_vreg));
-      break;
-    }
-    case ShortyFieldType::kObject: {
-      closure_builder->CaptureVariableObject(shadow_frame.GetVRegReference(source_vreg));
-      UNIMPLEMENTED(FATAL) << " capture-variable with type kObject";
-      // TODO: finish implementing this. disabled for now since we can't track lambda refs for GC.
-      UNREACHABLE();
-    }
-
-    default:
-      LOG(FATAL) << "Invalid shorty type value " << shorty_type;
-      UNREACHABLE();
-  }
-
-  DCHECK_EQ(captured_variable_count + 1, closure_builder->GetCaptureCount());
-
-  return true;
-}
-
-// Handles capture-variable instructions.
-// Returns true on success, otherwise throws an exception and returns false.
-// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
-template<bool do_access_check>
-static inline bool DoLiberateVariable(Thread* self,
-                                     const Instruction* inst,
-                                     size_t captured_variable_index,
-                                     /*inout*/ShadowFrame& shadow_frame) {
-  using lambda::ShortyFieldType;
-  /*
-   * liberate-variable is opcode 0xf7, fmt 0x22c
-   * - vA is the destination register
-   * - vB is the register with the lambda closure in it
-   * - vC is the string ID which needs to be a valid field type descriptor
-   */
-
-  const uint32_t dest_vreg = inst->VRegA_22c();
-  const uint32_t closure_vreg = inst->VRegB_22c();
-  const uint32_t string_idx = inst->VRegC_22c();
-  // TODO: this should be a proper [type id] instead of a [string ID] pointing to a type.
-
-
-  // Synthesize a long type descriptor from a shorty type descriptor list.
-  // TODO: Fix the dex encoding to contain the long and short type descriptors.
-  const char* type_string = GetStringDataByDexStringIndexOrThrow<do_access_check>(shadow_frame,
-                                                                                  string_idx);
-  if (UNLIKELY(do_access_check && type_string == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    shadow_frame.SetVReg(dest_vreg, 0);
-    return false;
-  }
-
-  char type_first_letter = type_string[0];
-  ShortyFieldType shorty_type;
-  if (do_access_check &&
-      UNLIKELY(!ShortyFieldType::MaybeCreate(type_first_letter, /*out*/&shorty_type))) {  // NOLINT: [whitespace/comma] [3]
-    ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(),
-                     "liberate-variable vC must be a valid type");
-    shadow_frame.SetVReg(dest_vreg, 0);
-    return false;
-  } else {
-    // Already verified that the type is valid.
-    shorty_type = ShortyFieldType(type_first_letter);
-  }
-
-  // Check for closure being null *after* the type check.
-  // This way we can access the type info in case we fail later, to know how many vregs to clear.
-  const lambda::Closure* lambda_closure =
-      ReadLambdaClosureFromVRegsOrThrow(/*inout*/shadow_frame, closure_vreg);
-
-  // Failed lambda target runtime check, an exception was raised.
-  if (UNLIKELY(lambda_closure == nullptr)) {
-    CHECK(self->IsExceptionPending());
-
-    // Clear the destination vreg(s) to be safe.
-    shadow_frame.SetVReg(dest_vreg, 0);
-    if (shorty_type.IsPrimitiveWide() || shorty_type.IsLambda()) {
-      shadow_frame.SetVReg(dest_vreg + 1, 0);
-    }
-    return false;
-  }
-
-  if (do_access_check &&
-      UNLIKELY(captured_variable_index >= lambda_closure->GetNumberOfCapturedVariables())) {
-    ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(),
-                     "liberate-variable captured variable index %zu out of bounds",
-                     lambda_closure->GetNumberOfCapturedVariables());
-    // Clear the destination vreg(s) to be safe.
-    shadow_frame.SetVReg(dest_vreg, 0);
-    if (shorty_type.IsPrimitiveWide() || shorty_type.IsLambda()) {
-      shadow_frame.SetVReg(dest_vreg + 1, 0);
-    }
-    return false;
-  }
-
-  // Verify that the runtime type of the captured-variable matches the requested dex type.
-  if (do_access_check) {
-    ShortyFieldType actual_type = lambda_closure->GetCapturedShortyType(captured_variable_index);
-    if (actual_type != shorty_type) {
-      ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(),
-                     "cannot liberate-variable of runtime type '%c' to dex type '%c'",
-                     static_cast<char>(actual_type),
-                     static_cast<char>(shorty_type));
-
-      shadow_frame.SetVReg(dest_vreg, 0);
-      if (shorty_type.IsPrimitiveWide() || shorty_type.IsLambda()) {
-        shadow_frame.SetVReg(dest_vreg + 1, 0);
-      }
-      return false;
-    }
-
-    if (actual_type.IsLambda() || actual_type.IsObject()) {
-      UNIMPLEMENTED(FATAL) << "liberate-variable type checks needs to "
-                           << "parse full type descriptor for objects and lambdas";
-    }
-  }
-
-  // Unpack the captured variable from the closure into the correct type, then save it to the vreg.
-  if (shorty_type.IsPrimitiveNarrow()) {
-    uint32_t primitive_narrow_value =
-        lambda_closure->GetCapturedPrimitiveNarrow(captured_variable_index);
-    shadow_frame.SetVReg(dest_vreg, primitive_narrow_value);
-  } else if (shorty_type.IsPrimitiveWide()) {
-      uint64_t primitive_wide_value =
-          lambda_closure->GetCapturedPrimitiveWide(captured_variable_index);
-      shadow_frame.SetVRegLong(dest_vreg, static_cast<int64_t>(primitive_wide_value));
-  } else if (shorty_type.IsObject()) {
-    mirror::Object* unpacked_object =
-        lambda_closure->GetCapturedObject(captured_variable_index);
-    shadow_frame.SetVRegReference(dest_vreg, unpacked_object);
-
-    UNIMPLEMENTED(FATAL) << "liberate-variable cannot unpack objects yet";
-  } else if (shorty_type.IsLambda()) {
-    UNIMPLEMENTED(FATAL) << "liberate-variable cannot unpack lambdas yet";
-  } else {
-    LOG(FATAL) << "unreachable";
-    UNREACHABLE();
-  }
-
-  return true;
-}
-
-template<bool do_access_check>
-static inline bool DoInvokeLambda(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
-                                  uint16_t inst_data, JValue* result) {
-  /*
-   * invoke-lambda is opcode 0x25
-   *
-   * - vC is the closure register (both vC and vC + 1 will be used to store the closure).
-   * - vB is the number of additional registers up to |{vD,vE,vF,vG}| (4)
-   * - the rest of the registers are always var-args
-   *
-   * - reading var-args for 0x25 gets us vD,vE,vF,vG (but not vB)
-   */
-  uint32_t vreg_closure = inst->VRegC_25x();
-  const lambda::Closure* lambda_closure =
-      ReadLambdaClosureFromVRegsOrThrow(shadow_frame, vreg_closure);
-
-  // Failed lambda target runtime check, an exception was raised.
-  if (UNLIKELY(lambda_closure == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    result->SetJ(0);
-    return false;
-  }
-
-  ArtMethod* const called_method = lambda_closure->GetTargetMethod();
-  // Invoke a non-range lambda
-  return DoLambdaCall<false, do_access_check>(called_method, self, shadow_frame, inst, inst_data,
-                                              result);
-}
-
-// Handles invoke-XXX/range instructions (other than invoke-lambda[-range]).
+// Handles invoke-XXX/range instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<InvokeType type, bool is_range, bool do_access_check>
 static inline bool DoInvoke(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
@@ -904,74 +416,6 @@
   return 3;
 }
 
-template <bool _do_check>
-static inline bool DoBoxLambda(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
-                               uint16_t inst_data) SHARED_REQUIRES(Locks::mutator_lock_) {
-  /*
-   * box-lambda vA, vB /// opcode 0xf8, format 22x
-   * - vA is the target register where the Object representation of the closure will be stored into
-   * - vB is a closure (made by create-lambda)
-   *   (also reads vB + 1)
-   */
-  uint32_t vreg_target_object = inst->VRegA_22x(inst_data);
-  uint32_t vreg_source_closure = inst->VRegB_22x();
-
-  lambda::Closure* lambda_closure = ReadLambdaClosureFromVRegsOrThrow(shadow_frame,
-                                                                      vreg_source_closure);
-
-  // Failed lambda target runtime check, an exception was raised.
-  if (UNLIKELY(lambda_closure == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    return false;
-  }
-
-  mirror::Object* closure_as_object =
-      Runtime::Current()->GetLambdaBoxTable()->BoxLambda(lambda_closure);
-
-  // Failed to box the lambda, an exception was raised.
-  if (UNLIKELY(closure_as_object == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    return false;
-  }
-
-  shadow_frame.SetVRegReference(vreg_target_object, closure_as_object);
-  return true;
-}
-
-template <bool _do_check> SHARED_REQUIRES(Locks::mutator_lock_)
-static inline bool DoUnboxLambda(Thread* self,
-                                 ShadowFrame& shadow_frame,
-                                 const Instruction* inst,
-                                 uint16_t inst_data) {
-  /*
-   * unbox-lambda vA, vB, [type id] /// opcode 0xf9, format 22c
-   * - vA is the target register where the closure will be written into
-   *   (also writes vA + 1)
-   * - vB is the Object representation of the closure (made by box-lambda)
-   */
-  uint32_t vreg_target_closure = inst->VRegA_22c(inst_data);
-  uint32_t vreg_source_object = inst->VRegB_22c();
-
-  // Raise NullPointerException if object is null
-  mirror::Object* boxed_closure_object = shadow_frame.GetVRegReference(vreg_source_object);
-  if (UNLIKELY(boxed_closure_object == nullptr)) {
-    ThrowNullPointerExceptionFromInterpreter();
-    return false;
-  }
-
-  lambda::Closure* unboxed_closure = nullptr;
-  // Raise an exception if unboxing fails.
-  if (!Runtime::Current()->GetLambdaBoxTable()->UnboxLambda(boxed_closure_object,
-                                                            /*out*/&unboxed_closure)) {
-    CHECK(self->IsExceptionPending());
-    return false;
-  }
-
-  DCHECK(unboxed_closure != nullptr);
-  WriteLambdaClosureIntoVRegs(/*inout*/shadow_frame, *unboxed_closure, vreg_target_closure);
-  return true;
-}
-
 uint32_t FindNextInstructionFollowingException(Thread* self, ShadowFrame& shadow_frame,
     uint32_t dex_pc, const instrumentation::Instrumentation* instrumentation)
         SHARED_REQUIRES(Locks::mutator_lock_);
@@ -1058,72 +502,6 @@
 EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(true);   // invoke-virtual-quick-range.
 #undef EXPLICIT_INSTANTIATION_DO_INVOKE_VIRTUAL_QUICK
 
-// Explicitly instantiate all DoCreateLambda functions.
-#define EXPLICIT_DO_CREATE_LAMBDA_DECL(_do_check)                                                 \
-template SHARED_REQUIRES(Locks::mutator_lock_)                                                    \
-bool DoCreateLambda<_do_check>(Thread* self,                                                      \
-                               const Instruction* inst,                                           \
-                               /*inout*/ShadowFrame& shadow_frame,                                \
-                               /*inout*/lambda::ClosureBuilder* closure_builder,                  \
-                               /*inout*/lambda::Closure* uninitialized_closure);
-
-EXPLICIT_DO_CREATE_LAMBDA_DECL(false);  // create-lambda
-EXPLICIT_DO_CREATE_LAMBDA_DECL(true);   // create-lambda
-#undef EXPLICIT_DO_CREATE_LAMBDA_DECL
-
-// Explicitly instantiate all DoInvokeLambda functions.
-#define EXPLICIT_DO_INVOKE_LAMBDA_DECL(_do_check)                                    \
-template SHARED_REQUIRES(Locks::mutator_lock_)                                 \
-bool DoInvokeLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst, \
-                               uint16_t inst_data, JValue* result);
-
-EXPLICIT_DO_INVOKE_LAMBDA_DECL(false);  // invoke-lambda
-EXPLICIT_DO_INVOKE_LAMBDA_DECL(true);   // invoke-lambda
-#undef EXPLICIT_DO_INVOKE_LAMBDA_DECL
-
-// Explicitly instantiate all DoBoxLambda functions.
-#define EXPLICIT_DO_BOX_LAMBDA_DECL(_do_check)                                                \
-template SHARED_REQUIRES(Locks::mutator_lock_)                                          \
-bool DoBoxLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst, \
-                            uint16_t inst_data);
-
-EXPLICIT_DO_BOX_LAMBDA_DECL(false);  // box-lambda
-EXPLICIT_DO_BOX_LAMBDA_DECL(true);   // box-lambda
-#undef EXPLICIT_DO_BOX_LAMBDA_DECL
-
-// Explicitly instantiate all DoUnBoxLambda functions.
-#define EXPLICIT_DO_UNBOX_LAMBDA_DECL(_do_check)                                                \
-template SHARED_REQUIRES(Locks::mutator_lock_)                                            \
-bool DoUnboxLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst, \
-                              uint16_t inst_data);
-
-EXPLICIT_DO_UNBOX_LAMBDA_DECL(false);  // unbox-lambda
-EXPLICIT_DO_UNBOX_LAMBDA_DECL(true);   // unbox-lambda
-#undef EXPLICIT_DO_BOX_LAMBDA_DECL
-
-// Explicitly instantiate all DoCaptureVariable functions.
-#define EXPLICIT_DO_CAPTURE_VARIABLE_DECL(_do_check)                                    \
-template SHARED_REQUIRES(Locks::mutator_lock_)                                          \
-bool DoCaptureVariable<_do_check>(Thread* self,                                         \
-                                  const Instruction* inst,                              \
-                                  ShadowFrame& shadow_frame,                            \
-                                  lambda::ClosureBuilder* closure_builder);
-
-EXPLICIT_DO_CAPTURE_VARIABLE_DECL(false);  // capture-variable
-EXPLICIT_DO_CAPTURE_VARIABLE_DECL(true);   // capture-variable
-#undef EXPLICIT_DO_CREATE_LAMBDA_DECL
-
-// Explicitly instantiate all DoLiberateVariable functions.
-#define EXPLICIT_DO_LIBERATE_VARIABLE_DECL(_do_check)                                   \
-template SHARED_REQUIRES(Locks::mutator_lock_)                                          \
-bool DoLiberateVariable<_do_check>(Thread* self,                                        \
-                                   const Instruction* inst,                             \
-                                   size_t captured_variable_index,                      \
-                                   ShadowFrame& shadow_frame);                          \
-
-EXPLICIT_DO_LIBERATE_VARIABLE_DECL(false);  // liberate-variable
-EXPLICIT_DO_LIBERATE_VARIABLE_DECL(true);   // liberate-variable
-#undef EXPLICIT_DO_LIBERATE_LAMBDA_DECL
 }  // namespace interpreter
 }  // namespace art
 
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index f03036b..43b2778 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -18,14 +18,11 @@
 // Clang 3.4 fails to build the goto interpreter implementation.
 
 
-#include "base/stl_util.h"  // MakeUnique
 #include "experimental_flags.h"
 #include "interpreter_common.h"
 #include "jit/jit.h"
 #include "safe_math.h"
 
-#include <memory>  // std::unique_ptr
-
 namespace art {
 namespace interpreter {
 
@@ -93,16 +90,6 @@
 #define HANDLE_INSTRUCTION_START(opcode) op_##opcode:  // NOLINT(whitespace/labels)
 #define HANDLE_INSTRUCTION_END() UNREACHABLE_CODE_CHECK()
 
-// Use with instructions labeled with kExperimental flag:
-#define HANDLE_EXPERIMENTAL_INSTRUCTION_START(opcode)                                             \
-  HANDLE_INSTRUCTION_START(opcode);                                                               \
-  DCHECK(inst->IsExperimental());                                                                 \
-  if (Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas)) {
-#define HANDLE_EXPERIMENTAL_INSTRUCTION_END()                                                     \
-  } else {                                                                                        \
-      UnexpectedOpcode(inst, shadow_frame);                                                       \
-  } HANDLE_INSTRUCTION_END();
-
 #define HANDLE_MONITOR_CHECKS()                                                                   \
   if (!DoMonitorCheckOnExit<do_assignability_check>(self, &shadow_frame)) {                       \
     HANDLE_PENDING_EXCEPTION();                                                                   \
@@ -163,14 +150,14 @@
   static const void* const handlersTable[instrumentation::kNumHandlerTables][kNumPackedOpcodes] = {
     {
     // Main handler table.
-#define INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v) &&op_##code,
+#define INSTRUCTION_HANDLER(o, code, n, f, i, a, v) &&op_##code,
 #include "dex_instruction_list.h"
       DEX_INSTRUCTION_LIST(INSTRUCTION_HANDLER)
 #undef DEX_INSTRUCTION_LIST
 #undef INSTRUCTION_HANDLER
     }, {
     // Alternative handler table.
-#define INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v) &&alt_op_##code,
+#define INSTRUCTION_HANDLER(o, code, n, f, i, a, v) &&alt_op_##code,
 #include "dex_instruction_list.h"
       DEX_INSTRUCTION_LIST(INSTRUCTION_HANDLER)
 #undef DEX_INSTRUCTION_LIST
@@ -190,8 +177,6 @@
   uint16_t inst_data;
   const void* const* currentHandlersTable;
   UPDATE_HANDLER_TABLE();
-  std::unique_ptr<lambda::ClosureBuilder> lambda_closure_builder;
-  size_t lambda_captured_variable_index = 0;
   const auto* const instrumentation = Runtime::Current()->GetInstrumentation();
   ArtMethod* method = shadow_frame.GetMethod();
   jit::Jit* jit = Runtime::Current()->GetJit();
@@ -1668,14 +1653,6 @@
   }
   HANDLE_INSTRUCTION_END();
 
-  HANDLE_EXPERIMENTAL_INSTRUCTION_START(INVOKE_LAMBDA) {
-    bool success = DoInvokeLambda<do_access_check>(self, shadow_frame, inst, inst_data,
-                                                   &result_register);
-    UPDATE_HANDLER_TABLE();
-    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
-  }
-  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
-
   HANDLE_INSTRUCTION_START(NEG_INT)
     shadow_frame.SetVReg(
         inst->VRegA_12x(inst_data), -shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
@@ -2457,62 +2434,6 @@
     ADVANCE(2);
   HANDLE_INSTRUCTION_END();
 
-  HANDLE_EXPERIMENTAL_INSTRUCTION_START(CREATE_LAMBDA) {
-    if (lambda_closure_builder == nullptr) {
-      // DoCreateLambda always needs a ClosureBuilder, even if it has 0 captured variables.
-      lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>();
-    }
-
-    // TODO: these allocations should not leak, and the lambda method should not be local.
-    lambda::Closure* lambda_closure =
-        reinterpret_cast<lambda::Closure*>(alloca(lambda_closure_builder->GetSize()));
-    bool success = DoCreateLambda<do_access_check>(self,
-                                                   inst,
-                                                   /*inout*/shadow_frame,
-                                                   /*inout*/lambda_closure_builder.get(),
-                                                   /*inout*/lambda_closure);
-    lambda_closure_builder.reset(nullptr);  // reset state of variables captured
-    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
-  }
-  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
-
-  HANDLE_EXPERIMENTAL_INSTRUCTION_START(BOX_LAMBDA) {
-    bool success = DoBoxLambda<do_access_check>(self, shadow_frame, inst, inst_data);
-    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
-  }
-  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
-
-  HANDLE_EXPERIMENTAL_INSTRUCTION_START(UNBOX_LAMBDA) {
-    bool success = DoUnboxLambda<do_access_check>(self, shadow_frame, inst, inst_data);
-    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
-  }
-  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
-
-  HANDLE_EXPERIMENTAL_INSTRUCTION_START(CAPTURE_VARIABLE) {
-    if (lambda_closure_builder == nullptr) {
-      lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>();
-    }
-
-    bool success = DoCaptureVariable<do_access_check>(self,
-                                                      inst,
-                                                      /*inout*/shadow_frame,
-                                                      /*inout*/lambda_closure_builder.get());
-
-    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
-  }
-  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
-
-  HANDLE_EXPERIMENTAL_INSTRUCTION_START(LIBERATE_VARIABLE) {
-    bool success = DoLiberateVariable<do_access_check>(self,
-                                                           inst,
-                                                           lambda_captured_variable_index,
-                                                           /*inout*/shadow_frame);
-    // Temporarily only allow sequences of 'liberate-variable, liberate-variable, ...'
-    lambda_captured_variable_index++;
-    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
-  }
-  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
-
   HANDLE_INSTRUCTION_START(UNUSED_3E)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
@@ -2545,10 +2466,34 @@
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
+  HANDLE_INSTRUCTION_START(UNUSED_F3)
+    UnexpectedOpcode(inst, shadow_frame);
+  HANDLE_INSTRUCTION_END();
+
   HANDLE_INSTRUCTION_START(UNUSED_F4)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
+  HANDLE_INSTRUCTION_START(UNUSED_F5)
+    UnexpectedOpcode(inst, shadow_frame);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F6)
+    UnexpectedOpcode(inst, shadow_frame);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F7)
+    UnexpectedOpcode(inst, shadow_frame);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F8)
+    UnexpectedOpcode(inst, shadow_frame);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F9)
+    UnexpectedOpcode(inst, shadow_frame);
+  HANDLE_INSTRUCTION_END();
+
   HANDLE_INSTRUCTION_START(UNUSED_FA)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
@@ -2597,7 +2542,7 @@
 // Note: we do not use the kReturn instruction flag here (to test the instruction is a return). The
 // compiler seems to not evaluate "(Instruction::FlagsOf(Instruction::code) & kReturn) != 0" to
 // a constant condition that would remove the "if" statement so the test is free.
-#define INSTRUMENTATION_INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v)                        \
+#define INSTRUMENTATION_INSTRUCTION_HANDLER(o, code, n, f, i, a, v)                        \
   alt_op_##code: {                                                                            \
     if (UNLIKELY(instrumentation->HasDexPcListeners())) {                                     \
       Object* this_object = shadow_frame.GetThisObject(code_item->ins_size_);                 \
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 8bfc10c..a6349fc 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -15,14 +15,11 @@
  */
 
 #include "base/enums.h"
-#include "base/stl_util.h"  // MakeUnique
 #include "experimental_flags.h"
 #include "interpreter_common.h"
 #include "jit/jit.h"
 #include "safe_math.h"
 
-#include <memory>  // std::unique_ptr
-
 namespace art {
 namespace interpreter {
 
@@ -92,11 +89,6 @@
     }                                                                                          \
   } while (false)
 
-static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
-  DCHECK(inst->IsExperimental());
-  return Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas);
-}
-
 template<bool do_access_check, bool transaction_active>
 JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
                          ShadowFrame& shadow_frame, JValue result_register,
@@ -116,10 +108,6 @@
   ArtMethod* method = shadow_frame.GetMethod();
   jit::Jit* jit = Runtime::Current()->GetJit();
 
-  // TODO: collapse capture-variable+create-lambda into one opcode, then we won't need
-  // to keep this live for the scope of the entire function call.
-  std::unique_ptr<lambda::ClosureBuilder> lambda_closure_builder;
-  size_t lambda_captured_variable_index = 0;
   do {
     dex_pc = inst->GetDexPc(insns);
     shadow_frame.SetDexPC(dex_pc);
@@ -2333,105 +2321,13 @@
                              (inst->VRegC_22b() & 0x1f));
         inst = inst->Next_2xx();
         break;
-      case Instruction::INVOKE_LAMBDA: {
-        if (!IsExperimentalInstructionEnabled(inst)) {
-          UnexpectedOpcode(inst, shadow_frame);
-        }
-
-        PREAMBLE();
-        bool success = DoInvokeLambda<do_access_check>(self, shadow_frame, inst, inst_data,
-                                                       &result_register);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::CAPTURE_VARIABLE: {
-        if (!IsExperimentalInstructionEnabled(inst)) {
-          UnexpectedOpcode(inst, shadow_frame);
-        }
-
-        if (lambda_closure_builder == nullptr) {
-          lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>();
-        }
-
-        PREAMBLE();
-        bool success = DoCaptureVariable<do_access_check>(self,
-                                                          inst,
-                                                          /*inout*/shadow_frame,
-                                                          /*inout*/lambda_closure_builder.get());
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::CREATE_LAMBDA: {
-        if (!IsExperimentalInstructionEnabled(inst)) {
-          UnexpectedOpcode(inst, shadow_frame);
-        }
-
-        PREAMBLE();
-
-        if (lambda_closure_builder == nullptr) {
-          // DoCreateLambda always needs a ClosureBuilder, even if it has 0 captured variables.
-          lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>();
-        }
-
-        // TODO: these allocations should not leak, and the lambda method should not be local.
-        lambda::Closure* lambda_closure =
-            reinterpret_cast<lambda::Closure*>(alloca(lambda_closure_builder->GetSize()));
-        bool success = DoCreateLambda<do_access_check>(self,
-                                                       inst,
-                                                       /*inout*/shadow_frame,
-                                                       /*inout*/lambda_closure_builder.get(),
-                                                       /*inout*/lambda_closure);
-        lambda_closure_builder.reset(nullptr);  // reset state of variables captured
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::LIBERATE_VARIABLE: {
-        if (!IsExperimentalInstructionEnabled(inst)) {
-          UnexpectedOpcode(inst, shadow_frame);
-        }
-
-        PREAMBLE();
-        bool success = DoLiberateVariable<do_access_check>(self,
-                                                           inst,
-                                                           lambda_captured_variable_index,
-                                                           /*inout*/shadow_frame);
-        // Temporarily only allow sequences of 'liberate-variable, liberate-variable, ...'
-        lambda_captured_variable_index++;
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::UNUSED_F4: {
-        if (!IsExperimentalInstructionEnabled(inst)) {
-          UnexpectedOpcode(inst, shadow_frame);
-        }
-
-        CHECK(false);  // TODO(iam): Implement opcodes for lambdas
-        break;
-      }
-      case Instruction::BOX_LAMBDA: {
-        if (!IsExperimentalInstructionEnabled(inst)) {
-          UnexpectedOpcode(inst, shadow_frame);
-        }
-
-        PREAMBLE();
-        bool success = DoBoxLambda<do_access_check>(self, shadow_frame, inst, inst_data);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::UNBOX_LAMBDA: {
-        if (!IsExperimentalInstructionEnabled(inst)) {
-          UnexpectedOpcode(inst, shadow_frame);
-        }
-
-        PREAMBLE();
-        bool success = DoUnboxLambda<do_access_check>(self, shadow_frame, inst, inst_data);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
       case Instruction::UNUSED_3E ... Instruction::UNUSED_43:
-      case Instruction::UNUSED_FA ... Instruction::UNUSED_FF:
+      case Instruction::UNUSED_F3 ... Instruction::UNUSED_F9:
+      case Instruction::UNUSED_FC ... Instruction::UNUSED_FF:
       case Instruction::UNUSED_79:
       case Instruction::UNUSED_7A:
+      case Instruction::INVOKE_POLYMORPHIC:
+      case Instruction::INVOKE_POLYMORPHIC_RANGE:
         UnexpectedOpcode(inst, shadow_frame);
     }
   } while (!interpret_one_instruction);
diff --git a/runtime/interpreter/mterp/arm/binopLit8.S b/runtime/interpreter/mterp/arm/binopLit8.S
index b8f0d92..7c9c631 100644
--- a/runtime/interpreter/mterp/arm/binopLit8.S
+++ b/runtime/interpreter/mterp/arm/binopLit8.S
@@ -1,10 +1,14 @@
-%default {"preinstr":"", "result":"r0", "chkzero":"0"}
+%default {"extract":"asr     r1, r3, #8", "result":"r0", "chkzero":"0"}
     /*
      * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = r0 op r1".
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -17,14 +21,13 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    $extract                            @ optional; typically r1<- ssssssCC (sign extended)
     .if $chkzero
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-    $preinstr                           @ optional op; may set condition codes
     $instr                              @ $result<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG $result, r9                @ vAA<- $result
diff --git a/runtime/interpreter/mterp/arm/op_add_int_lit8.S b/runtime/interpreter/mterp/arm/op_add_int_lit8.S
index b84684a..035510d 100644
--- a/runtime/interpreter/mterp/arm/op_add_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_add_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"instr":"add     r0, r0, r1"}
+%include "arm/binopLit8.S" {"extract":"", "instr":"add     r0, r0, r3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm/op_and_int_lit8.S b/runtime/interpreter/mterp/arm/op_and_int_lit8.S
index d5783e5..af746b5 100644
--- a/runtime/interpreter/mterp/arm/op_and_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_and_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"instr":"and     r0, r0, r1"}
+%include "arm/binopLit8.S" {"extract":"", "instr":"and     r0, r0, r3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm/op_or_int_lit8.S b/runtime/interpreter/mterp/arm/op_or_int_lit8.S
index 2d85038..9882bfc 100644
--- a/runtime/interpreter/mterp/arm/op_or_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_or_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"instr":"orr     r0, r0, r1"}
+%include "arm/binopLit8.S" {"extract":"", "instr":"orr     r0, r0, r3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm/op_rsub_int_lit8.S b/runtime/interpreter/mterp/arm/op_rsub_int_lit8.S
index 2ee11e1..dc953dc 100644
--- a/runtime/interpreter/mterp/arm/op_rsub_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_rsub_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"instr":"rsb     r0, r0, r1"}
+%include "arm/binopLit8.S" {"extract":"", "instr":"rsb     r0, r0, r3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm/op_shl_int_lit8.S b/runtime/interpreter/mterp/arm/op_shl_int_lit8.S
index 6a48bfc..60a1498 100644
--- a/runtime/interpreter/mterp/arm/op_shl_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_shl_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asl r1"}
+%include "arm/binopLit8.S" {"extract":"ubfx    r1, r3, #8, #5", "instr":"mov     r0, r0, asl r1"}
diff --git a/runtime/interpreter/mterp/arm/op_shr_int_lit8.S b/runtime/interpreter/mterp/arm/op_shr_int_lit8.S
index 60fe5fc..c2f6cb0 100644
--- a/runtime/interpreter/mterp/arm/op_shr_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_shr_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asr r1"}
+%include "arm/binopLit8.S" {"extract":"ubfx    r1, r3, #8, #5", "instr":"mov     r0, r0, asr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_ushr_int_lit8.S b/runtime/interpreter/mterp/arm/op_ushr_int_lit8.S
index 40a4435..5554eb0 100644
--- a/runtime/interpreter/mterp/arm/op_ushr_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_ushr_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, lsr r1"}
+%include "arm/binopLit8.S" {"extract":"ubfx    r1, r3, #8, #5", "instr":"mov     r0, r0, lsr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_xor_int_lit8.S b/runtime/interpreter/mterp/arm/op_xor_int_lit8.S
index 46bb712..97d0b9e 100644
--- a/runtime/interpreter/mterp/arm/op_xor_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_xor_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"instr":"eor     r0, r0, r1"}
+%include "arm/binopLit8.S" {"extract":"", "instr":"eor     r0, r0, r3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm64/binopLit8.S b/runtime/interpreter/mterp/arm64/binopLit8.S
index 0b7c68a..dfa3169 100644
--- a/runtime/interpreter/mterp/arm64/binopLit8.S
+++ b/runtime/interpreter/mterp/arm64/binopLit8.S
@@ -1,10 +1,14 @@
-%default {"preinstr":"", "result":"w0", "chkzero":"0"}
+%default {"extract": "asr     w1, w3, #8", "preinstr":"", "result":"w0", "chkzero":"0"}
     /*
      * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = w0 op w1".
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -17,7 +21,7 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    $extract                            // optional; typically w1<- ssssssCC (sign extended)
     .if $chkzero
     cbz     w1, common_errDivideByZero
     .endif
diff --git a/runtime/interpreter/mterp/arm64/op_add_int_lit8.S b/runtime/interpreter/mterp/arm64/op_add_int_lit8.S
index 196ea99..2dfb8b9 100644
--- a/runtime/interpreter/mterp/arm64/op_add_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_add_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"instr":"add     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"extract":"", "instr":"add     w0, w0, w3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm64/op_and_int_lit8.S b/runtime/interpreter/mterp/arm64/op_and_int_lit8.S
index 167b40e..495b5cd 100644
--- a/runtime/interpreter/mterp/arm64/op_and_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_and_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"instr":"and     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"extract":"", "instr":"and     w0, w0, w3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm64/op_or_int_lit8.S b/runtime/interpreter/mterp/arm64/op_or_int_lit8.S
index 51675f8..7cb26b7 100644
--- a/runtime/interpreter/mterp/arm64/op_or_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_or_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"instr":"orr     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"extract":"", "instr":"orr     w0, w0, w3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
index 17f57f9..9c19b55 100644
--- a/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"instr":"lsl     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"extract":"ubfx    w1, w3, #8, #5", "instr":"lsl     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
index 274080c..c7b61df 100644
--- a/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"instr":"asr     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"extract":"ubfx    w1, w3, #8, #5", "instr":"asr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
index ff30e1f..555ed4e 100644
--- a/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"instr":"lsr     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"extract":"ubfx    w1, w3, #8, #5", "instr":"lsr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_xor_int_lit8.S b/runtime/interpreter/mterp/arm64/op_xor_int_lit8.S
index 6d187b5..1d3d93e 100644
--- a/runtime/interpreter/mterp/arm64/op_xor_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_xor_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"instr":"eor     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"extract":"", "instr":"eor     w0, w0, w3, asr #8"}
diff --git a/runtime/interpreter/mterp/config_arm b/runtime/interpreter/mterp/config_arm
index 436dcd2..b6caf11 100644
--- a/runtime/interpreter/mterp/config_arm
+++ b/runtime/interpreter/mterp/config_arm
@@ -279,13 +279,13 @@
     # op op_iget_byte_quick FALLBACK
     # op op_iget_char_quick FALLBACK
     # op op_iget_short_quick FALLBACK
-    op op_invoke_lambda FALLBACK
+    # op op_unused_f3 FALLBACK
     # op op_unused_f4 FALLBACK
-    op op_capture_variable FALLBACK
-    op op_create_lambda FALLBACK
-    op op_liberate_variable FALLBACK
-    op op_box_lambda FALLBACK
-    op op_unbox_lambda FALLBACK
+    # op op_unused_f5 FALLBACK
+    # op op_unused_f6 FALLBACK
+    # op op_unused_f7 FALLBACK
+    # op op_unused_f8 FALLBACK
+    # op op_unused_f9 FALLBACK
     # op op_unused_fa FALLBACK
     # op op_unused_fb FALLBACK
     # op op_unused_fc FALLBACK
diff --git a/runtime/interpreter/mterp/config_arm64 b/runtime/interpreter/mterp/config_arm64
index 6427ead..c5e06c7 100644
--- a/runtime/interpreter/mterp/config_arm64
+++ b/runtime/interpreter/mterp/config_arm64
@@ -277,13 +277,13 @@
     # op op_iget_byte_quick FALLBACK
     # op op_iget_char_quick FALLBACK
     # op op_iget_short_quick FALLBACK
-    op op_invoke_lambda FALLBACK
+    # op op_unused_f3 FALLBACK
     # op op_unused_f4 FALLBACK
-    op op_capture_variable FALLBACK
-    op op_create_lambda FALLBACK
-    op op_liberate_variable FALLBACK
-    op op_box_lambda FALLBACK
-    op op_unbox_lambda FALLBACK
+    # op op_unused_f5 FALLBACK
+    # op op_unused_f6 FALLBACK
+    # op op_unused_f7 FALLBACK
+    # op op_unused_f8 FALLBACK
+    # op op_unused_f9 FALLBACK
     # op op_unused_fa FALLBACK
     # op op_unused_fb FALLBACK
     # op op_unused_fc FALLBACK
diff --git a/runtime/interpreter/mterp/config_mips b/runtime/interpreter/mterp/config_mips
index c6292c3..515cb0b 100644
--- a/runtime/interpreter/mterp/config_mips
+++ b/runtime/interpreter/mterp/config_mips
@@ -279,13 +279,13 @@
     # op op_iget_byte_quick FALLBACK
     # op op_iget_char_quick FALLBACK
     # op op_iget_short_quick FALLBACK
-    op op_invoke_lambda FALLBACK
+    # op op_unused_f3 FALLBACK
     # op op_unused_f4 FALLBACK
-    op op_capture_variable FALLBACK
-    op op_create_lambda FALLBACK
-    op op_liberate_variable FALLBACK
-    op op_box_lambda FALLBACK
-    op op_unbox_lambda FALLBACK
+    # op op_unused_f5 FALLBACK
+    # op op_unused_f6 FALLBACK
+    # op op_unused_f7 FALLBACK
+    # op op_unused_f8 FALLBACK
+    # op op_unused_f9 FALLBACK
     # op op_unused_fa FALLBACK
     # op op_unused_fb FALLBACK
     # op op_unused_fc FALLBACK
diff --git a/runtime/interpreter/mterp/config_mips64 b/runtime/interpreter/mterp/config_mips64
index c40c007..aafd248 100644
--- a/runtime/interpreter/mterp/config_mips64
+++ b/runtime/interpreter/mterp/config_mips64
@@ -279,13 +279,13 @@
     # op op_iget_byte_quick FALLBACK
     # op op_iget_char_quick FALLBACK
     # op op_iget_short_quick FALLBACK
-    op op_invoke_lambda FALLBACK
+    # op op_unused_f3 FALLBACK
     # op op_unused_f4 FALLBACK
-    op op_capture_variable FALLBACK
-    op op_create_lambda FALLBACK
-    op op_liberate_variable FALLBACK
-    op op_box_lambda FALLBACK
-    op op_unbox_lambda FALLBACK
+    # op op_unused_f5 FALLBACK
+    # op op_unused_f6 FALLBACK
+    # op op_unused_f7 FALLBACK
+    # op op_unused_f8 FALLBACK
+    # op op_unused_f9 FALLBACK
     # op op_unused_fa FALLBACK
     # op op_unused_fb FALLBACK
     # op op_unused_fc FALLBACK
diff --git a/runtime/interpreter/mterp/config_x86 b/runtime/interpreter/mterp/config_x86
index f1501e1..64d8ee8 100644
--- a/runtime/interpreter/mterp/config_x86
+++ b/runtime/interpreter/mterp/config_x86
@@ -283,13 +283,13 @@
     # op op_iget_byte_quick FALLBACK
     # op op_iget_char_quick FALLBACK
     # op op_iget_short_quick FALLBACK
-    op op_invoke_lambda FALLBACK
+    # op op_unused_f3 FALLBACK
     # op op_unused_f4 FALLBACK
-    op op_capture_variable FALLBACK
-    op op_create_lambda FALLBACK
-    op op_liberate_variable FALLBACK
-    op op_box_lambda FALLBACK
-    op op_unbox_lambda FALLBACK
+    # op op_unused_f5 FALLBACK
+    # op op_unused_f6 FALLBACK
+    # op op_unused_f7 FALLBACK
+    # op op_unused_f8 FALLBACK
+    # op op_unused_f9 FALLBACK
     # op op_unused_fa FALLBACK
     # op op_unused_fb FALLBACK
     # op op_unused_fc FALLBACK
diff --git a/runtime/interpreter/mterp/config_x86_64 b/runtime/interpreter/mterp/config_x86_64
index 1d7eb03..7c357db 100644
--- a/runtime/interpreter/mterp/config_x86_64
+++ b/runtime/interpreter/mterp/config_x86_64
@@ -283,13 +283,13 @@
     # op op_iget_byte_quick FALLBACK
     # op op_iget_char_quick FALLBACK
     # op op_iget_short_quick FALLBACK
-    op op_invoke_lambda FALLBACK
+    # op op_unused_f3 FALLBACK
     # op op_unused_f4 FALLBACK
-    op op_capture_variable FALLBACK
-    op op_create_lambda FALLBACK
-    op op_liberate_variable FALLBACK
-    op op_box_lambda FALLBACK
-    op op_unbox_lambda FALLBACK
+    # op op_unused_f5 FALLBACK
+    # op op_unused_f6 FALLBACK
+    # op op_unused_f7 FALLBACK
+    # op op_unused_f8 FALLBACK
+    # op op_unused_f9 FALLBACK
     # op op_unused_fa FALLBACK
     # op op_unused_fb FALLBACK
     # op op_unused_fc FALLBACK
diff --git a/runtime/interpreter/mterp/mips64/op_unused_f3.S b/runtime/interpreter/mterp/mips64/op_unused_f3.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_f3.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_f5.S b/runtime/interpreter/mterp/mips64/op_unused_f5.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_f5.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_f6.S b/runtime/interpreter/mterp/mips64/op_unused_f6.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_f6.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_f7.S b/runtime/interpreter/mterp/mips64/op_unused_f7.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_f7.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_f8.S b/runtime/interpreter/mterp/mips64/op_unused_f8.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_f8.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_f9.S b/runtime/interpreter/mterp/mips64/op_unused_f9.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_f9.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index df25767..c33df6d 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -6473,6 +6473,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6485,15 +6489,14 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+                                @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-                               @ optional op; may set condition codes
-    add     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    add     r0, r0, r3, asr #8                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6511,6 +6514,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6523,15 +6530,14 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+                                @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-                               @ optional op; may set condition codes
-    rsb     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    rsb     r0, r0, r3, asr #8                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6550,6 +6556,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6562,14 +6572,13 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    asr     r1, r3, #8                            @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-                               @ optional op; may set condition codes
     mul     r0, r1, r0                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
@@ -6657,6 +6666,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6669,15 +6682,14 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+                                @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-                               @ optional op; may set condition codes
-    and     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    and     r0, r0, r3, asr #8                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6695,6 +6707,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6707,15 +6723,14 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+                                @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-                               @ optional op; may set condition codes
-    orr     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    orr     r0, r0, r3, asr #8                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6733,6 +6748,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6745,15 +6764,14 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+                                @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-                               @ optional op; may set condition codes
-    eor     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    eor     r0, r0, r3, asr #8                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6771,6 +6789,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6783,14 +6805,13 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    ubfx    r1, r3, #8, #5                            @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-    and     r1, r1, #31                           @ optional op; may set condition codes
     mov     r0, r0, asl r1                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
@@ -6809,6 +6830,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6821,14 +6846,13 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    ubfx    r1, r3, #8, #5                            @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-    and     r1, r1, #31                           @ optional op; may set condition codes
     mov     r0, r0, asr r1                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
@@ -6847,6 +6871,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6859,14 +6887,13 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    ubfx    r1, r3, #8, #5                            @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-    and     r1, r1, #31                           @ optional op; may set condition codes
     mov     r0, r0, lsr r1                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
@@ -7201,9 +7228,13 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_invoke_lambda: /* 0xf3 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f3: /* 0xf3 */
+/* File: arm/op_unused_f3.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
@@ -7219,37 +7250,57 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_capture_variable: /* 0xf5 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f5: /* 0xf5 */
+/* File: arm/op_unused_f5.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_create_lambda: /* 0xf6 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f6: /* 0xf6 */
+/* File: arm/op_unused_f6.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_liberate_variable: /* 0xf7 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f7: /* 0xf7 */
+/* File: arm/op_unused_f7.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_box_lambda: /* 0xf8 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f8: /* 0xf8 */
+/* File: arm/op_unused_f8.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_unbox_lambda: /* 0xf9 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f9: /* 0xf9 */
+/* File: arm/op_unused_f9.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
@@ -11564,7 +11615,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_invoke_lambda: /* 0xf3 */
+.L_ALT_op_unused_f3: /* 0xf3 */
 /* File: arm/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11598,7 +11649,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_capture_variable: /* 0xf5 */
+.L_ALT_op_unused_f5: /* 0xf5 */
 /* File: arm/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11615,7 +11666,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_create_lambda: /* 0xf6 */
+.L_ALT_op_unused_f6: /* 0xf6 */
 /* File: arm/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11632,7 +11683,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_liberate_variable: /* 0xf7 */
+.L_ALT_op_unused_f7: /* 0xf7 */
 /* File: arm/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11649,7 +11700,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_box_lambda: /* 0xf8 */
+.L_ALT_op_unused_f8: /* 0xf8 */
 /* File: arm/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11666,7 +11717,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unbox_lambda: /* 0xf9 */
+.L_ALT_op_unused_f9: /* 0xf9 */
 /* File: arm/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index de37e07..c7303b9 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -6044,6 +6044,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6056,13 +6060,13 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+                                // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
                                // optional op; may set condition codes
-    add     w0, w0, w1                              // w0<- op, w0-w3 changed
+    add     w0, w0, w3, asr #8                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -6080,6 +6084,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6092,7 +6100,7 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    asr     w1, w3, #8                            // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
@@ -6117,6 +6125,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6129,7 +6141,7 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    asr     w1, w3, #8                            // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
@@ -6153,6 +6165,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6165,7 +6181,7 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    asr     w1, w3, #8                            // optional; typically w1<- ssssssCC (sign extended)
     .if 1
     cbz     w1, common_errDivideByZero
     .endif
@@ -6189,6 +6205,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6201,7 +6221,7 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    asr     w1, w3, #8                            // optional; typically w1<- ssssssCC (sign extended)
     .if 1
     cbz     w1, common_errDivideByZero
     .endif
@@ -6225,6 +6245,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6237,13 +6261,13 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+                                // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
                                // optional op; may set condition codes
-    and     w0, w0, w1                              // w0<- op, w0-w3 changed
+    and     w0, w0, w3, asr #8                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -6261,6 +6285,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6273,13 +6301,13 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+                                // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
                                // optional op; may set condition codes
-    orr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    orr     w0, w0, w3, asr #8                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -6297,6 +6325,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6309,13 +6341,13 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+                                // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
                                // optional op; may set condition codes
-    eor     w0, w0, w1                              // w0<- op, w0-w3 changed
+    eor     w0, w0, w3, asr #8                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -6333,6 +6365,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6345,7 +6381,7 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    ubfx    w1, w3, #8, #5                            // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
@@ -6369,6 +6405,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6381,7 +6421,7 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    ubfx    w1, w3, #8, #5                            // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
@@ -6405,6 +6445,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6417,7 +6461,7 @@
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    ubfx    w1, w3, #8, #5                            // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
@@ -6741,9 +6785,13 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_invoke_lambda: /* 0xf3 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f3: /* 0xf3 */
+/* File: arm64/op_unused_f3.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
@@ -6759,37 +6807,57 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_capture_variable: /* 0xf5 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f5: /* 0xf5 */
+/* File: arm64/op_unused_f5.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_create_lambda: /* 0xf6 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f6: /* 0xf6 */
+/* File: arm64/op_unused_f6.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_liberate_variable: /* 0xf7 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f7: /* 0xf7 */
+/* File: arm64/op_unused_f7.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_box_lambda: /* 0xf8 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f8: /* 0xf8 */
+/* File: arm64/op_unused_f8.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_unbox_lambda: /* 0xf9 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f9: /* 0xf9 */
+/* File: arm64/op_unused_f9.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
@@ -11332,7 +11400,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_invoke_lambda: /* 0xf3 */
+.L_ALT_op_unused_f3: /* 0xf3 */
 /* File: arm64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11366,7 +11434,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_capture_variable: /* 0xf5 */
+.L_ALT_op_unused_f5: /* 0xf5 */
 /* File: arm64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11383,7 +11451,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_create_lambda: /* 0xf6 */
+.L_ALT_op_unused_f6: /* 0xf6 */
 /* File: arm64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11400,7 +11468,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_liberate_variable: /* 0xf7 */
+.L_ALT_op_unused_f7: /* 0xf7 */
 /* File: arm64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11417,7 +11485,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_box_lambda: /* 0xf8 */
+.L_ALT_op_unused_f8: /* 0xf8 */
 /* File: arm64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11434,7 +11502,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unbox_lambda: /* 0xf9 */
+.L_ALT_op_unused_f9: /* 0xf9 */
 /* File: arm64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
diff --git a/runtime/interpreter/mterp/out/mterp_mips.S b/runtime/interpreter/mterp/out/mterp_mips.S
index 5e0c19f..fef7dc6 100644
--- a/runtime/interpreter/mterp/out/mterp_mips.S
+++ b/runtime/interpreter/mterp/out/mterp_mips.S
@@ -7547,9 +7547,14 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_invoke_lambda: /* 0xf3 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f3: /* 0xf3 */
+/* File: mips/op_unused_f3.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
 
 /* ------------------------------ */
     .balign 128
@@ -7564,33 +7569,58 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_capture_variable: /* 0xf5 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f5: /* 0xf5 */
+/* File: mips/op_unused_f5.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
 
 /* ------------------------------ */
     .balign 128
-.L_op_create_lambda: /* 0xf6 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f6: /* 0xf6 */
+/* File: mips/op_unused_f6.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
 
 /* ------------------------------ */
     .balign 128
-.L_op_liberate_variable: /* 0xf7 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f7: /* 0xf7 */
+/* File: mips/op_unused_f7.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
 
 /* ------------------------------ */
     .balign 128
-.L_op_box_lambda: /* 0xf8 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f8: /* 0xf8 */
+/* File: mips/op_unused_f8.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
 
 /* ------------------------------ */
     .balign 128
-.L_op_unbox_lambda: /* 0xf9 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f9: /* 0xf9 */
+/* File: mips/op_unused_f9.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
 
 /* ------------------------------ */
     .balign 128
@@ -12381,7 +12411,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_invoke_lambda: /* 0xf3 */
+.L_ALT_op_unused_f3: /* 0xf3 */
 /* File: mips/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12417,7 +12447,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_capture_variable: /* 0xf5 */
+.L_ALT_op_unused_f5: /* 0xf5 */
 /* File: mips/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12435,7 +12465,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_create_lambda: /* 0xf6 */
+.L_ALT_op_unused_f6: /* 0xf6 */
 /* File: mips/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12453,7 +12483,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_liberate_variable: /* 0xf7 */
+.L_ALT_op_unused_f7: /* 0xf7 */
 /* File: mips/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12471,7 +12501,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_box_lambda: /* 0xf8 */
+.L_ALT_op_unused_f8: /* 0xf8 */
 /* File: mips/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12489,7 +12519,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unbox_lambda: /* 0xf9 */
+.L_ALT_op_unused_f9: /* 0xf9 */
 /* File: mips/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
diff --git a/runtime/interpreter/mterp/out/mterp_mips64.S b/runtime/interpreter/mterp/out/mterp_mips64.S
index 35fbe94..a061f1e 100644
--- a/runtime/interpreter/mterp/out/mterp_mips64.S
+++ b/runtime/interpreter/mterp/out/mterp_mips64.S
@@ -7003,10 +7003,15 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_invoke_lambda: /* 0xf3 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f3: /* 0xf3 */
+/* File: mips64/op_unused_f3.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     b       MterpFallback
 
+
 /* ------------------------------ */
     .balign 128
 .L_op_unused_f4: /* 0xf4 */
@@ -7020,34 +7025,59 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_capture_variable: /* 0xf5 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f5: /* 0xf5 */
+/* File: mips64/op_unused_f5.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     b       MterpFallback
 
+
 /* ------------------------------ */
     .balign 128
-.L_op_create_lambda: /* 0xf6 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f6: /* 0xf6 */
+/* File: mips64/op_unused_f6.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     b       MterpFallback
 
+
 /* ------------------------------ */
     .balign 128
-.L_op_liberate_variable: /* 0xf7 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f7: /* 0xf7 */
+/* File: mips64/op_unused_f7.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     b       MterpFallback
 
+
 /* ------------------------------ */
     .balign 128
-.L_op_box_lambda: /* 0xf8 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f8: /* 0xf8 */
+/* File: mips64/op_unused_f8.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     b       MterpFallback
 
+
 /* ------------------------------ */
     .balign 128
-.L_op_unbox_lambda: /* 0xf9 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f9: /* 0xf9 */
+/* File: mips64/op_unused_f9.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     b       MterpFallback
 
+
 /* ------------------------------ */
     .balign 128
 .L_op_unused_fa: /* 0xfa */
@@ -11799,7 +11829,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_invoke_lambda: /* 0xf3 */
+.L_ALT_op_unused_f3: /* 0xf3 */
 /* File: mips64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11837,7 +11867,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_capture_variable: /* 0xf5 */
+.L_ALT_op_unused_f5: /* 0xf5 */
 /* File: mips64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11856,7 +11886,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_create_lambda: /* 0xf6 */
+.L_ALT_op_unused_f6: /* 0xf6 */
 /* File: mips64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11875,7 +11905,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_liberate_variable: /* 0xf7 */
+.L_ALT_op_unused_f7: /* 0xf7 */
 /* File: mips64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11894,7 +11924,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_box_lambda: /* 0xf8 */
+.L_ALT_op_unused_f8: /* 0xf8 */
 /* File: mips64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11913,7 +11943,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unbox_lambda: /* 0xf9 */
+.L_ALT_op_unused_f9: /* 0xf9 */
 /* File: mips64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
diff --git a/runtime/interpreter/mterp/out/mterp_x86.S b/runtime/interpreter/mterp/out/mterp_x86.S
index 5caaa80..29ee248 100644
--- a/runtime/interpreter/mterp/out/mterp_x86.S
+++ b/runtime/interpreter/mterp/out/mterp_x86.S
@@ -6201,8 +6201,12 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_invoke_lambda: /* 0xf3 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f3: /* 0xf3 */
+/* File: x86/op_unused_f3.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
@@ -6219,36 +6223,56 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_capture_variable: /* 0xf5 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f5: /* 0xf5 */
+/* File: x86/op_unused_f5.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_create_lambda: /* 0xf6 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f6: /* 0xf6 */
+/* File: x86/op_unused_f6.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_liberate_variable: /* 0xf7 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f7: /* 0xf7 */
+/* File: x86/op_unused_f7.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_box_lambda: /* 0xf8 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f8: /* 0xf8 */
+/* File: x86/op_unused_f8.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_unbox_lambda: /* 0xf9 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f9: /* 0xf9 */
+/* File: x86/op_unused_f9.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
@@ -12178,7 +12202,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_invoke_lambda: /* 0xf3 */
+.L_ALT_op_unused_f3: /* 0xf3 */
 /* File: x86/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12226,7 +12250,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_capture_variable: /* 0xf5 */
+.L_ALT_op_unused_f5: /* 0xf5 */
 /* File: x86/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12250,7 +12274,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_create_lambda: /* 0xf6 */
+.L_ALT_op_unused_f6: /* 0xf6 */
 /* File: x86/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12274,7 +12298,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_liberate_variable: /* 0xf7 */
+.L_ALT_op_unused_f7: /* 0xf7 */
 /* File: x86/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12298,7 +12322,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_box_lambda: /* 0xf8 */
+.L_ALT_op_unused_f8: /* 0xf8 */
 /* File: x86/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12322,7 +12346,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unbox_lambda: /* 0xf9 */
+.L_ALT_op_unused_f9: /* 0xf9 */
 /* File: x86/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
diff --git a/runtime/interpreter/mterp/out/mterp_x86_64.S b/runtime/interpreter/mterp/out/mterp_x86_64.S
index 2f7b854..bc1abcc 100644
--- a/runtime/interpreter/mterp/out/mterp_x86_64.S
+++ b/runtime/interpreter/mterp/out/mterp_x86_64.S
@@ -5966,8 +5966,12 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_invoke_lambda: /* 0xf3 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f3: /* 0xf3 */
+/* File: x86_64/op_unused_f3.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
@@ -5984,36 +5988,56 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_capture_variable: /* 0xf5 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f5: /* 0xf5 */
+/* File: x86_64/op_unused_f5.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_create_lambda: /* 0xf6 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f6: /* 0xf6 */
+/* File: x86_64/op_unused_f6.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_liberate_variable: /* 0xf7 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f7: /* 0xf7 */
+/* File: x86_64/op_unused_f7.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_box_lambda: /* 0xf8 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f8: /* 0xf8 */
+/* File: x86_64/op_unused_f8.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_unbox_lambda: /* 0xf9 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f9: /* 0xf9 */
+/* File: x86_64/op_unused_f9.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
@@ -11457,7 +11481,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_invoke_lambda: /* 0xf3 */
+.L_ALT_op_unused_f3: /* 0xf3 */
 /* File: x86_64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11501,7 +11525,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_capture_variable: /* 0xf5 */
+.L_ALT_op_unused_f5: /* 0xf5 */
 /* File: x86_64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11523,7 +11547,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_create_lambda: /* 0xf6 */
+.L_ALT_op_unused_f6: /* 0xf6 */
 /* File: x86_64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11545,7 +11569,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_liberate_variable: /* 0xf7 */
+.L_ALT_op_unused_f7: /* 0xf7 */
 /* File: x86_64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11567,7 +11591,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_box_lambda: /* 0xf8 */
+.L_ALT_op_unused_f8: /* 0xf8 */
 /* File: x86_64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11589,7 +11613,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unbox_lambda: /* 0xf9 */
+.L_ALT_op_unused_f9: /* 0xf9 */
 /* File: x86_64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
diff --git a/runtime/interpreter/mterp/x86/op_unused_f3.S b/runtime/interpreter/mterp/x86/op_unused_f3.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_f3.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_f5.S b/runtime/interpreter/mterp/x86/op_unused_f5.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_f5.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_f6.S b/runtime/interpreter/mterp/x86/op_unused_f6.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_f6.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_f7.S b/runtime/interpreter/mterp/x86/op_unused_f7.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_f7.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_f8.S b/runtime/interpreter/mterp/x86/op_unused_f8.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_f8.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_f9.S b/runtime/interpreter/mterp/x86/op_unused_f9.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_f9.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_f3.S b/runtime/interpreter/mterp/x86_64/op_unused_f3.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_f3.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_f5.S b/runtime/interpreter/mterp/x86_64/op_unused_f5.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_f5.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_f6.S b/runtime/interpreter/mterp/x86_64/op_unused_f6.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_f6.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_f7.S b/runtime/interpreter/mterp/x86_64/op_unused_f7.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_f7.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_f8.S b/runtime/interpreter/mterp/x86_64/op_unused_f8.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_f8.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_f9.S b/runtime/interpreter/mterp/x86_64/op_unused_f9.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_f9.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 57443f1..a0e0e62 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -41,6 +41,7 @@
 #include "mirror/array-inl.h"
 #include "mirror/class.h"
 #include "mirror/field-inl.h"
+#include "mirror/method.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/string-inl.h"
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index d52030f..cff2354 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -692,9 +692,6 @@
   DCHECK(this_object != nullptr);
   ProfilingInfo* info = caller->GetProfilingInfo(kRuntimePointerSize);
   if (info != nullptr) {
-    // Since the instrumentation is marked from the declaring class we need to mark the card so
-    // that mod-union tables and card rescanning know about the update.
-    Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(caller->GetDeclaringClass());
     info->AddInvokeInfo(dex_pc, this_object->GetClass());
   }
 }
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 6dc1578..1938221 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -146,7 +146,6 @@
   // Remove all methods in our cache that were allocated by 'alloc'.
   void RemoveMethodsIn(Thread* self, const LinearAlloc& alloc)
       REQUIRES(!lock_)
-      REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void ClearGcRootsInInlineCaches(Thread* self) REQUIRES(!lock_);
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
index 07c8051..216df2f 100644
--- a/runtime/jit/profiling_info.cc
+++ b/runtime/jit/profiling_info.cc
@@ -25,10 +25,33 @@
 
 namespace art {
 
+ProfilingInfo::ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries)
+      : number_of_inline_caches_(entries.size()),
+        method_(method),
+        is_method_being_compiled_(false),
+        is_osr_method_being_compiled_(false),
+        current_inline_uses_(0),
+        saved_entry_point_(nullptr) {
+  memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
+  for (size_t i = 0; i < number_of_inline_caches_; ++i) {
+    cache_[i].dex_pc_ = entries[i];
+  }
+  if (method->IsCopied()) {
+    // GetHoldingClassOfCopiedMethod is expensive, but creating a profiling info for a copied method
+    // appears to happen very rarely in practice.
+    holding_class_ = GcRoot<mirror::Class>(
+        Runtime::Current()->GetClassLinker()->GetHoldingClassOfCopiedMethod(method));
+  } else {
+    holding_class_ = GcRoot<mirror::Class>(method->GetDeclaringClass());
+  }
+  DCHECK(!holding_class_.IsNull());
+}
+
 bool ProfilingInfo::Create(Thread* self, ArtMethod* method, bool retry_allocation) {
   // Walk over the dex instructions of the method and keep track of
   // instructions we are interested in profiling.
   DCHECK(!method->IsNative());
+
   const DexFile::CodeItem& code_item = *method->GetCodeItem();
   const uint16_t* code_ptr = code_item.insns_;
   const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
@@ -93,6 +116,14 @@
         --i;
       } else {
         // We successfully set `cls`, just return.
+        // Since the instrumentation is marked from the declaring class we need to mark the card so
+        // that mod-union tables and card rescanning know about the update.
+        // Note that the declaring class is not necessarily the holding class if the method is
+        // copied. We need the card mark to be in the holding class since that is from where we
+        // will visit the profiling info.
+        if (!holding_class_.IsNull()) {
+          Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(holding_class_.Read());
+        }
         return;
       }
     }
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
index d04d2de..a890fbb 100644
--- a/runtime/jit/profiling_info.h
+++ b/runtime/jit/profiling_info.h
@@ -105,6 +105,7 @@
   // NO_THREAD_SAFETY_ANALYSIS since we don't know what the callback requires.
   template<typename RootVisitorType>
   void VisitRoots(RootVisitorType& visitor) NO_THREAD_SAFETY_ANALYSIS {
+    visitor.VisitRootIfNonNull(holding_class_.AddressWithoutBarrier());
     for (size_t i = 0; i < number_of_inline_caches_; ++i) {
       InlineCache* cache = &cache_[i];
       for (size_t j = 0; j < InlineCache::kIndividualCacheSize; ++j) {
@@ -166,18 +167,7 @@
   }
 
  private:
-  ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries)
-      : number_of_inline_caches_(entries.size()),
-        method_(method),
-        is_method_being_compiled_(false),
-        is_osr_method_being_compiled_(false),
-        current_inline_uses_(0),
-        saved_entry_point_(nullptr) {
-    memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
-    for (size_t i = 0; i < number_of_inline_caches_; ++i) {
-      cache_[i].dex_pc_ = entries[i];
-    }
-  }
+  ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries);
 
   // Number of instructions we are profiling in the ArtMethod.
   const uint32_t number_of_inline_caches_;
@@ -185,6 +175,9 @@
   // Method this profiling info is for.
   ArtMethod* const method_;
 
+  // Holding class for the method in case method is a copied method.
+  GcRoot<mirror::Class> holding_class_;
+
   // Whether the ArtMethod is currently being compiled. This flag
   // is implicitly guarded by the JIT code cache lock.
   // TODO: Make the JIT code cache lock global.
diff --git a/runtime/lambda/art_lambda_method.cc b/runtime/lambda/art_lambda_method.cc
deleted file mode 100644
index 6f9f8bb..0000000
--- a/runtime/lambda/art_lambda_method.cc
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "lambda/art_lambda_method.h"
-
-#include "base/logging.h"
-#include "lambda/shorty_field_type.h"
-
-namespace art {
-namespace lambda {
-
-ArtLambdaMethod::ArtLambdaMethod(ArtMethod* target_method,
-                                 const char* captured_variables_type_descriptor,
-                                 const char* captured_variables_shorty,
-                                 bool innate_lambda)
-    : method_(target_method),
-      captured_variables_type_descriptor_(captured_variables_type_descriptor),
-      captured_variables_shorty_(captured_variables_shorty),
-      innate_lambda_(innate_lambda) {
-  DCHECK(target_method != nullptr);
-  DCHECK(captured_variables_type_descriptor != nullptr);
-  DCHECK(captured_variables_shorty != nullptr);
-
-  // Calculate the static closure size from the captured variables.
-  size_t size = sizeof(ArtLambdaMethod*);  // Initial size is just this method.
-  bool static_size = true;
-  const char* shorty = captured_variables_shorty_;
-  while (shorty != nullptr && *shorty != '\0') {
-    // Each captured variable also appends to the size.
-    ShortyFieldType shorty_field{*shorty};  // NOLINT [readability/braces] [4]
-    size += shorty_field.GetStaticSize();
-    static_size &= shorty_field.IsStaticSize();
-    ++shorty;
-  }
-  closure_size_ = size;
-
-  // We determine whether or not the size is dynamic by checking for nested lambdas.
-  //
-  // This is conservative, since in theory an optimization could determine the size
-  // of the nested lambdas recursively. In practice it's probably better to flatten out
-  // nested lambdas and inline all their code if they are known statically.
-  dynamic_size_ = !static_size;
-
-  if (kIsDebugBuild) {
-    // Double check that the number of captured variables match in both strings.
-    size_t shorty_count = strlen(captured_variables_shorty);
-
-    size_t long_count = 0;
-    const char* long_type = captured_variables_type_descriptor;
-    ShortyFieldType out;
-    while ((long_type = ShortyFieldType::ParseFromFieldTypeDescriptor(long_type, &out))
-           != nullptr) {
-      ++long_count;
-    }
-
-    DCHECK_EQ(shorty_count, long_count)
-        << "number of captured variables in long type '" << captured_variables_type_descriptor
-        << "' (" << long_count << ")" << " did not match short type '"
-        << captured_variables_shorty << "' (" << shorty_count << ")";
-  }
-}
-
-}  // namespace lambda
-}  // namespace art
diff --git a/runtime/lambda/art_lambda_method.h b/runtime/lambda/art_lambda_method.h
deleted file mode 100644
index ea13eb7..0000000
--- a/runtime/lambda/art_lambda_method.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ART_RUNTIME_LAMBDA_ART_LAMBDA_METHOD_H_
-#define ART_RUNTIME_LAMBDA_ART_LAMBDA_METHOD_H_
-
-#include "base/macros.h"
-#include "art_method.h"
-
-#include <stdint.h>
-
-namespace art {
-namespace lambda {
-
-class ArtLambdaMethod {
- public:
-  // Construct an art lambda method.
-  // The target method is the one invoked by invoke-lambda.
-  // The type descriptor describes the types of variables captured, e.g. "ZFLObject;\FI;[Z"
-  // The shorty drops the object name and treats arrays as objects, e.g. "ZFL\L"
-  // Innate lambda means that the lambda was originally created via invoke-lambda.
-  // -- Non-innate lambdas (learned lambdas) come from a regular class that was boxed to lambda.
-  // (Ownership of strings is retained by the caller and the lifetime should exceed this class).
-  ArtLambdaMethod(ArtMethod* target_method,
-                  const char* captured_variables_type_descriptor,
-                  const char* captured_variables_shorty,
-                  bool innate_lambda = true);
-
-  // Get the target method for this lambda that would be used by the invoke-lambda dex instruction.
-  ArtMethod* GetArtMethod() const {
-    return method_;
-  }
-
-  // Get the compile-time size of lambda closures for this method in bytes.
-  // This is circular (that is, it includes the size of the ArtLambdaMethod pointer).
-  // One should also check if the size is dynamic since nested lambdas have a runtime size.
-  size_t GetStaticClosureSize() const {
-    return closure_size_;
-  }
-
-  // Get the type descriptor for the list of captured variables.
-  // e.g. "ZFLObject;\FI;[Z" means a captured int, float, class Object, lambda FI, array of ints
-  const char* GetCapturedVariablesTypeDescriptor() const {
-    return captured_variables_type_descriptor_;
-  }
-
-  // Get the shorty 'field' type descriptor list of captured variables.
-  // This follows the same rules as a string of ShortyFieldType in the dex specification.
-  // Every captured variable is represented by exactly one character.
-  // - Objects become 'L'.
-  // - Arrays become 'L'.
-  // - Lambdas become '\'.
-  const char* GetCapturedVariablesShortyTypeDescriptor() const {
-    return captured_variables_shorty_;
-  }
-
-  // Will the size of this lambda change at runtime?
-  // Only returns true if there is a nested lambda that we can't determine statically the size of.
-  bool IsDynamicSize() const {
-    return dynamic_size_;
-  }
-
-  // Will the size of this lambda always be constant at runtime?
-  // This generally means there's no nested lambdas, or we were able to successfully determine
-  // their size statically at compile time.
-  bool IsStaticSize() const {
-    return !IsDynamicSize();
-  }
-  // Is this a lambda that was originally created via invoke-lambda?
-  // -- Non-innate lambdas (learned lambdas) come from a regular class that was boxed to lambda.
-  bool IsInnateLambda() const {
-    return innate_lambda_;
-  }
-
-  // How many variables were captured?
-  // (Each nested lambda counts as 1 captured var regardless of how many captures it itself has).
-  size_t GetNumberOfCapturedVariables() const {
-    return strlen(captured_variables_shorty_);
-  }
-
- private:
-  // TODO: ArtMethod, or at least the entry points should be inlined into this struct
-  // to avoid an extra indirect load when doing invokes.
-  // Target method that invoke-lambda will jump to.
-  ArtMethod* method_;
-  // How big the closure is (in bytes). Only includes the constant size.
-  size_t closure_size_;
-  // The type descriptor for the captured variables, e.g. "IS" for [int, short]
-  const char* captured_variables_type_descriptor_;
-  // The shorty type descriptor for captured vars, (e.g. using 'L' instead of 'LObject;')
-  const char* captured_variables_shorty_;
-  // Whether or not the size is dynamic. If it is, copiers need to read the Closure size at runtime.
-  bool dynamic_size_;
-  // True if this lambda was originally made with create-lambda,
-  // false if it came from a class instance (through new-instance and then unbox-lambda).
-  bool innate_lambda_;
-
-  DISALLOW_COPY_AND_ASSIGN(ArtLambdaMethod);
-};
-
-}  // namespace lambda
-}  // namespace art
-
-#endif  // ART_RUNTIME_LAMBDA_ART_LAMBDA_METHOD_H_
diff --git a/runtime/lambda/box_table.cc b/runtime/lambda/box_table.cc
deleted file mode 100644
index 9918bb7..0000000
--- a/runtime/lambda/box_table.cc
+++ /dev/null
@@ -1,315 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "lambda/box_table.h"
-
-#include "base/mutex.h"
-#include "common_throws.h"
-#include "gc_root-inl.h"
-#include "lambda/closure.h"
-#include "lambda/leaking_allocator.h"
-#include "mirror/method.h"
-#include "mirror/object-inl.h"
-#include "thread.h"
-
-#include <vector>
-
-namespace art {
-namespace lambda {
-// Temporarily represent the lambda Closure as its raw bytes in an array.
-// TODO: Generate a proxy class for the closure when boxing the first time.
-using BoxedClosurePointerType = mirror::ByteArray*;
-
-static mirror::Class* GetBoxedClosureClass() SHARED_REQUIRES(Locks::mutator_lock_) {
-  return mirror::ByteArray::GetArrayClass();
-}
-
-namespace {
-  // Convenience functions to allocating/deleting box table copies of the closures.
-  struct ClosureAllocator {
-    // Deletes a Closure that was allocated through ::Allocate.
-    static void Delete(Closure* ptr) {
-      delete[] reinterpret_cast<char*>(ptr);
-    }
-
-    // Returns a well-aligned pointer to a newly allocated Closure on the 'new' heap.
-    static Closure* Allocate(size_t size) {
-      DCHECK_GE(size, sizeof(Closure));
-
-      // TODO: Maybe point to the interior of the boxed closure object after we add proxy support?
-      Closure* closure = reinterpret_cast<Closure*>(new char[size]);
-      DCHECK_ALIGNED(closure, alignof(Closure));
-      return closure;
-    }
-  };
-}  // namespace
-
-BoxTable::BoxTable()
-  : allow_new_weaks_(true),
-    new_weaks_condition_("lambda box table allowed weaks", *Locks::lambda_table_lock_) {}
-
-BoxTable::~BoxTable() {
-  // Free all the copies of our closures.
-  for (auto map_iterator = map_.begin(); map_iterator != map_.end(); ) {
-    std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator;
-
-    Closure* closure = key_value_pair.first;
-
-    // Remove from the map first, so that it doesn't try to access dangling pointer.
-    map_iterator = map_.Erase(map_iterator);
-
-    // Safe to delete, no dangling pointers.
-    ClosureAllocator::Delete(closure);
-  }
-}
-
-mirror::Object* BoxTable::BoxLambda(const ClosureType& closure) {
-  Thread* self = Thread::Current();
-
-  {
-    // TODO: Switch to ReaderMutexLock if ConditionVariable ever supports RW Mutexes
-    /*Reader*/MutexLock mu(self, *Locks::lambda_table_lock_);
-    BlockUntilWeaksAllowed();
-
-    // Attempt to look up this object, it's possible it was already boxed previously.
-    // If this is the case we *must* return the same object as before to maintain
-    // referential equality.
-    //
-    // In managed code:
-    //   Functional f = () -> 5;  // vF = create-lambda
-    //   Object a = f;            // vA = box-lambda vA
-    //   Object b = f;            // vB = box-lambda vB
-    //   assert(a == f)
-    ValueType value = FindBoxedLambda(closure);
-    if (!value.IsNull()) {
-      return value.Read();
-    }
-
-    // Otherwise we need to box ourselves and insert it into the hash map
-  }
-
-  // Release the lambda table lock here, so that thread suspension is allowed.
-
-  // Convert the Closure into a managed byte[] which will serve
-  // as the temporary 'boxed' version of the lambda. This is good enough
-  // to check all the basic object identities that a boxed lambda must retain.
-  // It's also good enough to contain all the captured primitive variables.
-
-  // TODO: Boxing an innate lambda (i.e. made with create-lambda) should make a proxy class
-  // TODO: Boxing a learned lambda (i.e. made with unbox-lambda) should return the original object
-  BoxedClosurePointerType closure_as_array_object =
-      mirror::ByteArray::Alloc(self, closure->GetSize());
-
-  // There are no thread suspension points after this, so we don't need to put it into a handle.
-
-  if (UNLIKELY(closure_as_array_object == nullptr)) {
-    // Most likely an OOM has occurred.
-    CHECK(self->IsExceptionPending());
-    return nullptr;
-  }
-
-  // Write the raw closure data into the byte[].
-  closure->CopyTo(closure_as_array_object->GetRawData(sizeof(uint8_t),  // component size
-                                                      0 /*index*/),     // index
-                  closure_as_array_object->GetLength());
-
-  // The method has been successfully boxed into an object, now insert it into the hash map.
-  {
-    MutexLock mu(self, *Locks::lambda_table_lock_);
-    BlockUntilWeaksAllowed();
-
-    // Lookup the object again, it's possible another thread already boxed it while
-    // we were allocating the object before.
-    ValueType value = FindBoxedLambda(closure);
-    if (UNLIKELY(!value.IsNull())) {
-      // Let the GC clean up method_as_object at a later time.
-      return value.Read();
-    }
-
-    // Otherwise we need to insert it into the hash map in this thread.
-
-    // Make a copy for the box table to keep, in case the closure gets collected from the stack.
-    // TODO: GC may need to sweep for roots in the box table's copy of the closure.
-    Closure* closure_table_copy = ClosureAllocator::Allocate(closure->GetSize());
-    closure->CopyTo(closure_table_copy, closure->GetSize());
-
-    // The closure_table_copy needs to be deleted by us manually when we erase it from the map.
-
-    // Actually insert into the table.
-    map_.Insert({closure_table_copy, ValueType(closure_as_array_object)});
-  }
-
-  return closure_as_array_object;
-}
-
-bool BoxTable::UnboxLambda(mirror::Object* object, ClosureType* out_closure) {
-  DCHECK(object != nullptr);
-  *out_closure = nullptr;
-
-  Thread* self = Thread::Current();
-
-  // Note that we do not need to access lambda_table_lock_ here
-  // since we don't need to look at the map.
-
-  mirror::Object* boxed_closure_object = object;
-
-  // Raise ClassCastException if object is not instanceof byte[]
-  if (UNLIKELY(!boxed_closure_object->InstanceOf(GetBoxedClosureClass()))) {
-    ThrowClassCastException(GetBoxedClosureClass(), boxed_closure_object->GetClass());
-    return false;
-  }
-
-  // TODO(iam): We must check that the closure object extends/implements the type
-  // specified in [type id]. This is not currently implemented since it's always a byte[].
-
-  // If we got this far, the inputs are valid.
-  // Shuffle the byte[] back into a raw closure, then allocate it, copy, and return it.
-  BoxedClosurePointerType boxed_closure_as_array =
-      down_cast<BoxedClosurePointerType>(boxed_closure_object);
-
-  const int8_t* unaligned_interior_closure = boxed_closure_as_array->GetData();
-
-  // Allocate a copy that can "escape" and copy the closure data into that.
-  Closure* unboxed_closure =
-      LeakingAllocator::MakeFlexibleInstance<Closure>(self, boxed_closure_as_array->GetLength());
-  // TODO: don't just memcpy the closure, it's unsafe when we add references to the mix.
-  memcpy(unboxed_closure, unaligned_interior_closure, boxed_closure_as_array->GetLength());
-
-  DCHECK_EQ(unboxed_closure->GetSize(), static_cast<size_t>(boxed_closure_as_array->GetLength()));
-
-  *out_closure = unboxed_closure;
-  return true;
-}
-
-BoxTable::ValueType BoxTable::FindBoxedLambda(const ClosureType& closure) const {
-  auto map_iterator = map_.Find(closure);
-  if (map_iterator != map_.end()) {
-    const std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator;
-    const ValueType& value = key_value_pair.second;
-
-    DCHECK(!value.IsNull());  // Never store null boxes.
-    return value;
-  }
-
-  return ValueType(nullptr);
-}
-
-void BoxTable::BlockUntilWeaksAllowed() {
-  Thread* self = Thread::Current();
-  while (UNLIKELY((!kUseReadBarrier && !allow_new_weaks_) ||
-                  (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) {
-    new_weaks_condition_.WaitHoldingLocks(self);  // wait while holding mutator lock
-  }
-}
-
-void BoxTable::SweepWeakBoxedLambdas(IsMarkedVisitor* visitor) {
-  DCHECK(visitor != nullptr);
-
-  Thread* self = Thread::Current();
-  MutexLock mu(self, *Locks::lambda_table_lock_);
-
-  /*
-   * Visit every weak root in our lambda box table.
-   * Remove unmarked objects, update marked objects to new address.
-   */
-  std::vector<ClosureType> remove_list;
-  for (auto map_iterator = map_.begin(); map_iterator != map_.end(); ) {
-    std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator;
-
-    const ValueType& old_value = key_value_pair.second;
-
-    // This does not need a read barrier because this is called by GC.
-    mirror::Object* old_value_raw = old_value.Read<kWithoutReadBarrier>();
-    mirror::Object* new_value = visitor->IsMarked(old_value_raw);
-
-    if (new_value == nullptr) {
-      // The object has been swept away.
-      const ClosureType& closure = key_value_pair.first;
-
-      // Delete the entry from the map.
-      map_iterator = map_.Erase(map_iterator);
-
-      // Clean up the memory by deleting the closure.
-      ClosureAllocator::Delete(closure);
-
-    } else {
-      // The object has been moved.
-      // Update the map.
-      key_value_pair.second = ValueType(new_value);
-      ++map_iterator;
-    }
-  }
-
-  // Occasionally shrink the map to avoid growing very large.
-  if (map_.CalculateLoadFactor() < kMinimumLoadFactor) {
-    map_.ShrinkToMaximumLoad();
-  }
-}
-
-void BoxTable::DisallowNewWeakBoxedLambdas() {
-  CHECK(!kUseReadBarrier);
-  Thread* self = Thread::Current();
-  MutexLock mu(self, *Locks::lambda_table_lock_);
-
-  allow_new_weaks_ = false;
-}
-
-void BoxTable::AllowNewWeakBoxedLambdas() {
-  CHECK(!kUseReadBarrier);
-  Thread* self = Thread::Current();
-  MutexLock mu(self, *Locks::lambda_table_lock_);
-
-  allow_new_weaks_ = true;
-  new_weaks_condition_.Broadcast(self);
-}
-
-void BoxTable::BroadcastForNewWeakBoxedLambdas() {
-  CHECK(kUseReadBarrier);
-  Thread* self = Thread::Current();
-  MutexLock mu(self, *Locks::lambda_table_lock_);
-  new_weaks_condition_.Broadcast(self);
-}
-
-void BoxTable::EmptyFn::MakeEmpty(std::pair<UnorderedMapKeyType, ValueType>& item) const {
-  item.first = nullptr;
-
-  Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
-  item.second = ValueType();  // Also clear the GC root.
-}
-
-bool BoxTable::EmptyFn::IsEmpty(const std::pair<UnorderedMapKeyType, ValueType>& item) const {
-  return item.first == nullptr;
-}
-
-bool BoxTable::EqualsFn::operator()(const UnorderedMapKeyType& lhs,
-                                    const UnorderedMapKeyType& rhs) const {
-  // Nothing needs this right now, but leave this assertion for later when
-  // we need to look at the references inside of the closure.
-  Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
-
-  return lhs->ReferenceEquals(rhs);
-}
-
-size_t BoxTable::HashFn::operator()(const UnorderedMapKeyType& key) const {
-  const lambda::Closure* closure = key;
-  DCHECK_ALIGNED(closure, alignof(lambda::Closure));
-
-  // Need to hold mutator_lock_ before calling into Closure::GetHashCode.
-  Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
-  return closure->GetHashCode();
-}
-
-}  // namespace lambda
-}  // namespace art
diff --git a/runtime/lambda/box_table.h b/runtime/lambda/box_table.h
deleted file mode 100644
index adb7332..0000000
--- a/runtime/lambda/box_table.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ART_RUNTIME_LAMBDA_BOX_TABLE_H_
-#define ART_RUNTIME_LAMBDA_BOX_TABLE_H_
-
-#include "base/allocator.h"
-#include "base/hash_map.h"
-#include "gc_root.h"
-#include "base/macros.h"
-#include "base/mutex.h"
-#include "object_callbacks.h"
-
-#include <stdint.h>
-
-namespace art {
-
-class ArtMethod;  // forward declaration
-
-namespace mirror {
-class Object;  // forward declaration
-}  // namespace mirror
-
-namespace lambda {
-struct Closure;  // forward declaration
-
-/*
- * Store a table of boxed lambdas. This is required to maintain object referential equality
- * when a lambda is re-boxed.
- *
- * Conceptually, we store a mapping of Closures -> Weak Reference<Boxed Lambda Object>.
- * When too many objects get GCd, we shrink the underlying table to use less space.
- */
-class BoxTable FINAL {
- public:
-  using ClosureType = art::lambda::Closure*;
-
-  // Boxes a closure into an object. Returns null and throws an exception on failure.
-  mirror::Object* BoxLambda(const ClosureType& closure)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Locks::lambda_table_lock_);
-
-  // Unboxes an object back into the lambda. Returns false and throws an exception on failure.
-  bool UnboxLambda(mirror::Object* object, ClosureType* out_closure)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Sweep weak references to lambda boxes. Update the addresses if the objects have been
-  // moved, and delete them from the table if the objects have been cleaned up.
-  void SweepWeakBoxedLambdas(IsMarkedVisitor* visitor)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Locks::lambda_table_lock_);
-
-  // GC callback: Temporarily block anyone from touching the map.
-  void DisallowNewWeakBoxedLambdas()
-      REQUIRES(!Locks::lambda_table_lock_);
-
-  // GC callback: Unblock any readers who have been queued waiting to touch the map.
-  void AllowNewWeakBoxedLambdas()
-      REQUIRES(!Locks::lambda_table_lock_);
-
-  // GC callback: Unblock any readers who have been queued waiting to touch the map.
-  void BroadcastForNewWeakBoxedLambdas()
-      REQUIRES(!Locks::lambda_table_lock_);
-
-  BoxTable();
-  ~BoxTable();
-
- private:
-  // Explanation:
-  // - After all threads are suspended (exclusive mutator lock),
-  //   the concurrent-copying GC can move objects from the "from" space to the "to" space.
-  // If an object is moved at that time and *before* SweepSystemWeaks are called then
-  // we don't know if the move has happened yet.
-  // Successive reads will then (incorrectly) look at the objects in the "from" space,
-  // which is a problem since the objects have been already forwarded and mutations
-  // would not be visible in the right space.
-  // Instead, use a GcRoot here which will be automatically updated by the GC.
-  //
-  // Also, any reads should be protected by a read barrier to always give us the "to" space address.
-  using ValueType = GcRoot<mirror::Object>;
-
-  // Attempt to look up the lambda in the map, or return null if it's not there yet.
-  ValueType FindBoxedLambda(const ClosureType& closure) const
-      SHARED_REQUIRES(Locks::lambda_table_lock_);
-
-  // If the GC has come in and temporarily disallowed touching weaks, block until is it allowed.
-  void BlockUntilWeaksAllowed()
-      SHARED_REQUIRES(Locks::lambda_table_lock_);
-
-  // Wrap the Closure into a unique_ptr so that the HashMap can delete its memory automatically.
-  using UnorderedMapKeyType = ClosureType;
-
-  // EmptyFn implementation for art::HashMap
-  struct EmptyFn {
-    void MakeEmpty(std::pair<UnorderedMapKeyType, ValueType>& item) const
-        NO_THREAD_SAFETY_ANALYSIS;  // SHARED_REQUIRES(Locks::mutator_lock_)
-
-    bool IsEmpty(const std::pair<UnorderedMapKeyType, ValueType>& item) const;
-  };
-
-  // HashFn implementation for art::HashMap
-  struct HashFn {
-    size_t operator()(const UnorderedMapKeyType& key) const
-        NO_THREAD_SAFETY_ANALYSIS;  // SHARED_REQUIRES(Locks::mutator_lock_)
-  };
-
-  // EqualsFn implementation for art::HashMap
-  struct EqualsFn {
-    bool operator()(const UnorderedMapKeyType& lhs, const UnorderedMapKeyType& rhs) const
-        NO_THREAD_SAFETY_ANALYSIS;  // SHARED_REQUIRES(Locks::mutator_lock_)
-  };
-
-  using UnorderedMap = art::HashMap<UnorderedMapKeyType,
-                                    ValueType,
-                                    EmptyFn,
-                                    HashFn,
-                                    EqualsFn,
-                                    TrackingAllocator<std::pair<ClosureType, ValueType>,
-                                                      kAllocatorTagLambdaBoxTable>>;
-
-  UnorderedMap map_                                          GUARDED_BY(Locks::lambda_table_lock_);
-  bool allow_new_weaks_                                      GUARDED_BY(Locks::lambda_table_lock_);
-  ConditionVariable new_weaks_condition_                     GUARDED_BY(Locks::lambda_table_lock_);
-
-  // Shrink the map when we get below this load factor.
-  // (This is an arbitrary value that should be large enough to prevent aggressive map erases
-  // from shrinking the table too often.)
-  static constexpr double kMinimumLoadFactor = UnorderedMap::kDefaultMinLoadFactor / 2;
-
-  DISALLOW_COPY_AND_ASSIGN(BoxTable);
-};
-
-}  // namespace lambda
-}  // namespace art
-
-#endif  // ART_RUNTIME_LAMBDA_BOX_TABLE_H_
diff --git a/runtime/lambda/closure.cc b/runtime/lambda/closure.cc
deleted file mode 100644
index 179e4ee..0000000
--- a/runtime/lambda/closure.cc
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "lambda/closure.h"
-
-#include "base/logging.h"
-#include "lambda/art_lambda_method.h"
-#include "runtime/mirror/object_reference.h"
-
-static constexpr const bool kClosureSupportsReferences = false;
-static constexpr const bool kClosureSupportsGarbageCollection = false;
-
-namespace art {
-namespace lambda {
-
-template <typename T>
-// TODO: can I return T __attribute__((__aligned__(1)))* here instead?
-const uint8_t* Closure::GetUnsafeAtOffset(size_t offset) const {
-  // Do not DCHECK here with existing helpers since most of them will call into this function.
-  return reinterpret_cast<const uint8_t*>(captured_) + offset;
-}
-
-size_t Closure::GetCapturedVariableSize(ShortyFieldType variable_type, size_t offset) const {
-  switch (variable_type) {
-    case ShortyFieldType::kLambda:
-    {
-      return GetClosureSize(GetUnsafeAtOffset<Closure>(offset));
-    }
-    default:
-      DCHECK(variable_type.IsStaticSize());
-      return variable_type.GetStaticSize();
-  }
-}
-
-// Templatize the flags to give the compiler a fighting chance to eliminate
-// any unnecessary code through different uses of this function.
-template <Closure::VariableInfo::Flags flags>
-inline Closure::VariableInfo Closure::ParseTypeDescriptor(const char* type_descriptor,
-                                                          size_t upto_index) const {
-  DCHECK(type_descriptor != nullptr);
-
-  VariableInfo result;
-
-  ShortyFieldType last_type;
-  size_t offset = (flags & VariableInfo::kOffset) ? GetStartingOffset() : 0;
-  size_t prev_offset = 0;
-  size_t count = 0;
-
-  while ((type_descriptor =
-      ShortyFieldType::ParseFromFieldTypeDescriptor(type_descriptor, &last_type)) != nullptr) {
-    count++;
-
-    if (flags & VariableInfo::kOffset) {
-      // Accumulate the sizes of all preceding captured variables as the current offset only.
-      offset += prev_offset;
-      prev_offset = GetCapturedVariableSize(last_type, offset);
-    }
-
-    if ((count > upto_index)) {
-      break;
-    }
-  }
-
-  if (flags & VariableInfo::kVariableType) {
-    result.variable_type_ = last_type;
-  }
-
-  if (flags & VariableInfo::kIndex) {
-    result.index_ = count;
-  }
-
-  if (flags & VariableInfo::kCount) {
-    result.count_ = count;
-  }
-
-  if (flags & VariableInfo::kOffset) {
-    result.offset_ = offset;
-  }
-
-  // TODO: We should probably store the result of this in the ArtLambdaMethod,
-  // to avoid re-computing the data every single time for static closures.
-  return result;
-}
-
-size_t Closure::GetCapturedVariablesSize() const {
-  const size_t captured_variable_offset = offsetof(Closure, captured_);
-  DCHECK_GE(GetSize(), captured_variable_offset);  // Prevent underflows.
-  return GetSize() - captured_variable_offset;
-}
-
-size_t Closure::GetSize() const {
-  const size_t static_closure_size = lambda_info_->GetStaticClosureSize();
-  if (LIKELY(lambda_info_->IsStaticSize())) {
-    return static_closure_size;
-  }
-
-  DCHECK_GE(static_closure_size, sizeof(captured_[0].dynamic_.size_));
-  const size_t dynamic_closure_size = captured_[0].dynamic_.size_;
-  // The dynamic size better be at least as big as the static size.
-  DCHECK_GE(dynamic_closure_size, static_closure_size);
-
-  return dynamic_closure_size;
-}
-
-void Closure::CopyTo(void* target, size_t target_size) const {
-  DCHECK_GE(target_size, GetSize());
-
-  // TODO: using memcpy is unsafe with read barriers, fix this once we add reference support
-  static_assert(kClosureSupportsReferences == false,
-                "Do not use memcpy with readbarrier references");
-  memcpy(target, this, GetSize());
-}
-
-ArtMethod* Closure::GetTargetMethod() const {
-  return const_cast<ArtMethod*>(lambda_info_->GetArtMethod());
-}
-
-uint32_t Closure::GetHashCode() const {
-  // Start with a non-zero constant, a prime number.
-  uint32_t result = 17;
-
-  // Include the hash with the ArtMethod.
-  {
-    uintptr_t method = reinterpret_cast<uintptr_t>(GetTargetMethod());
-    result = 31 * result + Low32Bits(method);
-    if (sizeof(method) == sizeof(uint64_t)) {
-      result = 31 * result + High32Bits(method);
-    }
-  }
-
-  // Include a hash for each captured variable.
-  for (size_t i = 0; i < GetCapturedVariablesSize(); ++i) {
-    // TODO: not safe for GC-able values since the address can move and the hash code would change.
-    uint8_t captured_variable_raw_value;
-    CopyUnsafeAtOffset<uint8_t>(i, /*out*/&captured_variable_raw_value);  // NOLINT: [whitespace/comma] [3]
-
-    result = 31 * result + captured_variable_raw_value;
-  }
-
-  // TODO: Fix above loop to work for objects and lambdas.
-  static_assert(kClosureSupportsGarbageCollection == false,
-               "Need to update above loop to read the hash code from the "
-                "objects and lambdas recursively");
-
-  return result;
-}
-
-bool Closure::ReferenceEquals(const Closure* other) const {
-  DCHECK(other != nullptr);
-
-  // TODO: Need rework to use read barriers once closures have references inside of them that can
-  // move. Until then, it's safe to just compare the data inside of it directly.
-  static_assert(kClosureSupportsReferences == false,
-                "Unsafe to use memcmp in read barrier collector");
-
-  if (GetSize() != other->GetSize()) {
-    return false;
-  }
-
-  return memcmp(this, other, GetSize());
-}
-
-size_t Closure::GetNumberOfCapturedVariables() const {
-  // TODO: refactor into art_lambda_method.h. Parsing should only be required here as a DCHECK.
-  VariableInfo variable_info =
-      ParseTypeDescriptor<VariableInfo::kCount>(GetCapturedVariablesTypeDescriptor(),
-                                                VariableInfo::kUpToIndexMax);
-  size_t count = variable_info.count_;
-  // Assuming each variable was 1 byte, the size should always be greater or equal than the count.
-  DCHECK_LE(count, GetCapturedVariablesSize());
-  return count;
-}
-
-const char* Closure::GetCapturedVariablesTypeDescriptor() const {
-  return lambda_info_->GetCapturedVariablesTypeDescriptor();
-}
-
-ShortyFieldType Closure::GetCapturedShortyType(size_t index) const {
-  DCHECK_LT(index, GetNumberOfCapturedVariables());
-
-  VariableInfo variable_info =
-      ParseTypeDescriptor<VariableInfo::kVariableType>(GetCapturedVariablesTypeDescriptor(),
-                                                       index);
-
-  return variable_info.variable_type_;
-}
-
-uint32_t Closure::GetCapturedPrimitiveNarrow(size_t index) const {
-  DCHECK(GetCapturedShortyType(index).IsPrimitiveNarrow());
-
-  ShortyFieldType variable_type;
-  size_t offset;
-  GetCapturedVariableTypeAndOffset(index, &variable_type, &offset);
-
-  // TODO: Restructure to use template specialization, e.g. GetCapturedPrimitive<T>
-  // so that we can avoid this nonsense regarding memcpy always overflowing.
-  // Plus, this additional switching seems redundant since the interpreter
-  // would've done it already, and knows the exact type.
-  uint32_t result = 0;
-  static_assert(ShortyFieldTypeTraits::IsPrimitiveNarrowType<decltype(result)>(),
-                "result must be a primitive narrow type");
-  switch (variable_type) {
-    case ShortyFieldType::kBoolean:
-      CopyUnsafeAtOffset<bool>(offset, &result);
-      break;
-    case ShortyFieldType::kByte:
-      CopyUnsafeAtOffset<uint8_t>(offset, &result);
-      break;
-    case ShortyFieldType::kChar:
-      CopyUnsafeAtOffset<uint16_t>(offset, &result);
-      break;
-    case ShortyFieldType::kShort:
-      CopyUnsafeAtOffset<int16_t>(offset, &result);
-      break;
-    case ShortyFieldType::kInt:
-      CopyUnsafeAtOffset<int32_t>(offset, &result);
-      break;
-    case ShortyFieldType::kFloat:
-      // XX: Maybe there should just be a GetCapturedPrimitive<T> to avoid this shuffle?
-      // The interpreter's invoke seems to only special case references and wides,
-      // everything else is treated as a generic 32-bit pattern.
-      CopyUnsafeAtOffset<float>(offset, &result);
-      break;
-    default:
-      LOG(FATAL)
-          << "expected a valid narrow primitive shorty type but got "
-          << static_cast<char>(variable_type);
-      UNREACHABLE();
-  }
-
-  return result;
-}
-
-uint64_t Closure::GetCapturedPrimitiveWide(size_t index) const {
-  DCHECK(GetCapturedShortyType(index).IsPrimitiveWide());
-
-  ShortyFieldType variable_type;
-  size_t offset;
-  GetCapturedVariableTypeAndOffset(index, &variable_type, &offset);
-
-  // TODO: Restructure to use template specialization, e.g. GetCapturedPrimitive<T>
-  // so that we can avoid this nonsense regarding memcpy always overflowing.
-  // Plus, this additional switching seems redundant since the interpreter
-  // would've done it already, and knows the exact type.
-  uint64_t result = 0;
-  static_assert(ShortyFieldTypeTraits::IsPrimitiveWideType<decltype(result)>(),
-                "result must be a primitive wide type");
-  switch (variable_type) {
-    case ShortyFieldType::kLong:
-      CopyUnsafeAtOffset<int64_t>(offset, &result);
-      break;
-    case ShortyFieldType::kDouble:
-      CopyUnsafeAtOffset<double>(offset, &result);
-      break;
-    default:
-      LOG(FATAL)
-          << "expected a valid primitive wide shorty type but got "
-          << static_cast<char>(variable_type);
-      UNREACHABLE();
-  }
-
-  return result;
-}
-
-mirror::Object* Closure::GetCapturedObject(size_t index) const {
-  DCHECK(GetCapturedShortyType(index).IsObject());
-
-  ShortyFieldType variable_type;
-  size_t offset;
-  GetCapturedVariableTypeAndOffset(index, &variable_type, &offset);
-
-  // TODO: Restructure to use template specialization, e.g. GetCapturedPrimitive<T>
-  // so that we can avoid this nonsense regarding memcpy always overflowing.
-  // Plus, this additional switching seems redundant since the interpreter
-  // would've done it already, and knows the exact type.
-  mirror::Object* result = nullptr;
-  static_assert(ShortyFieldTypeTraits::IsObjectType<decltype(result)>(),
-                "result must be an object type");
-  switch (variable_type) {
-    case ShortyFieldType::kObject:
-      // TODO: This seems unsafe. This may need to use gcroots.
-      static_assert(kClosureSupportsGarbageCollection == false,
-                    "May need GcRoots and definitely need mutator locks");
-      {
-        mirror::CompressedReference<mirror::Object> compressed_result;
-        CopyUnsafeAtOffset<uint32_t>(offset, &compressed_result);
-        result = compressed_result.AsMirrorPtr();
-      }
-      break;
-    default:
-      CHECK(false)
-          << "expected a valid shorty type but got " << static_cast<char>(variable_type);
-      UNREACHABLE();
-  }
-
-  return result;
-}
-
-size_t Closure::GetCapturedClosureSize(size_t index) const {
-  DCHECK(GetCapturedShortyType(index).IsLambda());
-  size_t offset = GetCapturedVariableOffset(index);
-
-  auto* captured_ptr = reinterpret_cast<const uint8_t*>(&captured_);
-  size_t closure_size = GetClosureSize(captured_ptr + offset);
-
-  return closure_size;
-}
-
-void Closure::CopyCapturedClosure(size_t index, void* destination, size_t destination_room) const {
-  DCHECK(GetCapturedShortyType(index).IsLambda());
-  size_t offset = GetCapturedVariableOffset(index);
-
-  auto* captured_ptr = reinterpret_cast<const uint8_t*>(&captured_);
-  size_t closure_size = GetClosureSize(captured_ptr + offset);
-
-  static_assert(ShortyFieldTypeTraits::IsLambdaType<Closure*>(),
-                "result must be a lambda type");
-
-  CopyUnsafeAtOffset<Closure>(offset, destination, closure_size, destination_room);
-}
-
-size_t Closure::GetCapturedVariableOffset(size_t index) const {
-  VariableInfo variable_info =
-      ParseTypeDescriptor<VariableInfo::kOffset>(GetCapturedVariablesTypeDescriptor(),
-                                                 index);
-
-  size_t offset = variable_info.offset_;
-
-  return offset;
-}
-
-void Closure::GetCapturedVariableTypeAndOffset(size_t index,
-                                               ShortyFieldType* out_type,
-                                               size_t* out_offset) const {
-  DCHECK(out_type != nullptr);
-  DCHECK(out_offset != nullptr);
-
-  static constexpr const VariableInfo::Flags kVariableTypeAndOffset =
-      static_cast<VariableInfo::Flags>(VariableInfo::kVariableType | VariableInfo::kOffset);
-  VariableInfo variable_info =
-      ParseTypeDescriptor<kVariableTypeAndOffset>(GetCapturedVariablesTypeDescriptor(),
-                                                  index);
-
-  ShortyFieldType variable_type = variable_info.variable_type_;
-  size_t offset = variable_info.offset_;
-
-  *out_type = variable_type;
-  *out_offset = offset;
-}
-
-template <typename T>
-void Closure::CopyUnsafeAtOffset(size_t offset,
-                                 void* destination,
-                                 size_t src_size,
-                                 size_t destination_room) const {
-  DCHECK_GE(destination_room, src_size);
-  const uint8_t* data_ptr = GetUnsafeAtOffset<T>(offset);
-  memcpy(destination, data_ptr, sizeof(T));
-}
-
-// TODO: This is kind of ugly. I would prefer an unaligned_ptr<Closure> here.
-// Unfortunately C++ doesn't let you lower the alignment (i.e. alignas(1) Closure*) is not legal.
-size_t Closure::GetClosureSize(const uint8_t* closure) {
-  DCHECK(closure != nullptr);
-
-  static_assert(!std::is_base_of<mirror::Object, Closure>::value,
-                "It might be unsafe to call memcpy on a managed object");
-
-  // Safe as long as it's not a mirror Object.
-  // TODO: Should probably wrap this in like MemCpyNative or some such which statically asserts
-  // we aren't trying to copy mirror::Object data around.
-  ArtLambdaMethod* closure_info;
-  memcpy(&closure_info, closure + offsetof(Closure, lambda_info_), sizeof(closure_info));
-
-  if (LIKELY(closure_info->IsStaticSize())) {
-    return closure_info->GetStaticClosureSize();
-  }
-
-  // The size is dynamic, so we need to read it from captured_variables_ portion.
-  size_t dynamic_size;
-  memcpy(&dynamic_size,
-         closure + offsetof(Closure, captured_[0].dynamic_.size_),
-         sizeof(dynamic_size));
-  static_assert(sizeof(dynamic_size) == sizeof(captured_[0].dynamic_.size_),
-                "Dynamic size type must match the structural type of the size");
-
-  DCHECK_GE(dynamic_size, closure_info->GetStaticClosureSize());
-  return dynamic_size;
-}
-
-size_t Closure::GetStartingOffset() const {
-  static constexpr const size_t captured_offset = offsetof(Closure, captured_);
-  if (LIKELY(lambda_info_->IsStaticSize())) {
-    return offsetof(Closure, captured_[0].static_variables_) - captured_offset;
-  } else {
-    return offsetof(Closure, captured_[0].dynamic_.variables_) - captured_offset;
-  }
-}
-
-}  // namespace lambda
-}  // namespace art
diff --git a/runtime/lambda/closure.h b/runtime/lambda/closure.h
deleted file mode 100644
index 31ff194..0000000
--- a/runtime/lambda/closure.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ART_RUNTIME_LAMBDA_CLOSURE_H_
-#define ART_RUNTIME_LAMBDA_CLOSURE_H_
-
-#include "base/macros.h"
-#include "base/mutex.h"  // For Locks::mutator_lock_.
-#include "lambda/shorty_field_type.h"
-
-#include <stdint.h>
-
-namespace art {
-class ArtMethod;  // forward declaration
-
-namespace mirror {
-class Object;  // forward declaration
-}  // namespace mirror
-
-namespace lambda {
-class ArtLambdaMethod;  // forward declaration
-class ClosureBuilder;   // forward declaration
-
-// Inline representation of a lambda closure.
-// Contains the target method and the set of packed captured variables as a copy.
-//
-// The closure itself is logically immutable, although in practice any object references
-// it (recursively) contains can be moved and updated by the GC.
-struct PACKED(sizeof(ArtLambdaMethod*)) Closure {
-  // Get the size of the Closure in bytes.
-  // This is necessary in order to allocate a large enough area to copy the Closure into.
-  // Do *not* copy the closure with memcpy, since references also need to get moved.
-  size_t GetSize() const;
-
-  // Copy this closure into the target, whose memory size is specified by target_size.
-  // Any object references are fixed up during the copy (if there was a read barrier).
-  // The target_size must be at least as large as GetSize().
-  void CopyTo(void* target, size_t target_size) const;
-
-  // Get the target method, i.e. the method that will be dispatched into with invoke-lambda.
-  ArtMethod* GetTargetMethod() const;
-
-  // Calculates the hash code. Value is recomputed each time.
-  uint32_t GetHashCode() const SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Is this the same closure as other? e.g. same target method, same variables captured.
-  //
-  // Determines whether the two Closures are interchangeable instances.
-  // Does *not* call Object#equals recursively. If two Closures compare ReferenceEquals true that
-  // means that they are interchangeable values (usually for the purpose of boxing/unboxing).
-  bool ReferenceEquals(const Closure* other) const SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // How many variables were captured?
-  size_t GetNumberOfCapturedVariables() const;
-
-  // Returns a type descriptor string that represents each captured variable.
-  // e.g. "Ljava/lang/Object;ZB" would mean a capture tuple of (Object, boolean, byte)
-  const char* GetCapturedVariablesTypeDescriptor() const;
-
-  // Returns the short type for the captured variable at index.
-  // Index must be less than the number of captured variables.
-  ShortyFieldType GetCapturedShortyType(size_t index) const;
-
-  // Returns the 32-bit representation of a non-wide primitive at the captured variable index.
-  // Smaller types are zero extended.
-  // Index must be less than the number of captured variables.
-  uint32_t GetCapturedPrimitiveNarrow(size_t index) const;
-  // Returns the 64-bit representation of a wide primitive at the captured variable index.
-  // Smaller types are zero extended.
-  // Index must be less than the number of captured variables.
-  uint64_t GetCapturedPrimitiveWide(size_t index) const;
-  // Returns the object reference at the captured variable index.
-  // The type at the index *must* be an object reference or a CHECK failure will occur.
-  // Index must be less than the number of captured variables.
-  mirror::Object* GetCapturedObject(size_t index) const SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Gets the size of a nested capture closure in bytes, at the captured variable index.
-  // The type at the index *must* be a lambda closure or a CHECK failure will occur.
-  size_t GetCapturedClosureSize(size_t index) const;
-
-  // Copies a nested lambda closure at the captured variable index.
-  // The destination must have enough room for the closure (see GetCapturedClosureSize).
-  void CopyCapturedClosure(size_t index, void* destination, size_t destination_room) const;
-
- private:
-  // Read out any non-lambda value as a copy.
-  template <typename T>
-  T GetCapturedVariable(size_t index) const;
-
-  // Reconstruct the closure's captured variable info at runtime.
-  struct VariableInfo {
-    size_t index_;
-    ShortyFieldType variable_type_;
-    size_t offset_;
-    size_t count_;
-
-    enum Flags {
-      kIndex = 0x1,
-      kVariableType = 0x2,
-      kOffset = 0x4,
-      kCount = 0x8,
-    };
-
-    // Traverse to the end of the type descriptor list instead of stopping at some particular index.
-    static constexpr size_t kUpToIndexMax = static_cast<size_t>(-1);
-  };
-
-  // Parse a type descriptor, stopping at index "upto_index".
-  // Returns only the information requested in flags. All other fields are indeterminate.
-  template <VariableInfo::Flags flags>
-  inline VariableInfo ALWAYS_INLINE ParseTypeDescriptor(const char* type_descriptor,
-                                                        size_t upto_index) const;
-
-  // Convenience function to call ParseTypeDescriptor with just the type and offset.
-  void GetCapturedVariableTypeAndOffset(size_t index,
-                                        ShortyFieldType* out_type,
-                                        size_t* out_offset) const;
-
-  // How many bytes do the captured variables take up? Runtime sizeof(captured_variables).
-  size_t GetCapturedVariablesSize() const;
-  // Get the size in bytes of the variable_type which is potentially stored at offset.
-  size_t GetCapturedVariableSize(ShortyFieldType variable_type, size_t offset) const;
-  // Get the starting offset (in bytes) for the 0th captured variable.
-  // All offsets are relative to 'captured_'.
-  size_t GetStartingOffset() const;
-  // Get the offset for this index.
-  // All offsets are relative to 'captuerd_'.
-  size_t GetCapturedVariableOffset(size_t index) const;
-
-  // Cast the data at '(char*)captured_[offset]' into T, returning its address.
-  // This value should not be de-referenced directly since its unaligned.
-  template <typename T>
-  inline const uint8_t* GetUnsafeAtOffset(size_t offset) const;
-
-  // Copy the data at the offset into the destination. DCHECKs that
-  // the destination_room is large enough (in bytes) to fit the data.
-  template <typename T>
-  inline void CopyUnsafeAtOffset(size_t offset,
-                                 void* destination,
-                                 size_t src_size = sizeof(T),
-                                 size_t destination_room = sizeof(T)) const;
-
-  // Get the closure size from an unaligned (i.e. interior) closure pointer.
-  static size_t GetClosureSize(const uint8_t* closure);
-
-  ///////////////////////////////////////////////////////////////////////////////////
-
-  // Compile-time known lambda information such as the type descriptor and size.
-  ArtLambdaMethod* lambda_info_;
-
-  // A contiguous list of captured variables, and possibly the closure size.
-  // The runtime size can always be determined through GetSize().
-  union {
-    // Read from here if the closure size is static (ArtLambdaMethod::IsStatic)
-    uint8_t static_variables_[0];
-    struct {
-      // Read from here if the closure size is dynamic (ArtLambdaMethod::IsDynamic)
-      size_t size_;  // The lambda_info_ and the size_ itself is also included as part of the size.
-      uint8_t variables_[0];
-    } dynamic_;
-  } captured_[0];
-  // captured_ will always consist of one array element at runtime.
-  // Set to [0] so that 'size_' is not counted in sizeof(Closure).
-
-  friend class ClosureBuilder;
-  friend class ClosureTest;
-};
-
-}  // namespace lambda
-}  // namespace art
-
-#endif  // ART_RUNTIME_LAMBDA_CLOSURE_H_
diff --git a/runtime/lambda/closure_builder-inl.h b/runtime/lambda/closure_builder-inl.h
deleted file mode 100644
index 3cec21f..0000000
--- a/runtime/lambda/closure_builder-inl.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_LAMBDA_CLOSURE_BUILDER_INL_H_
-#define ART_RUNTIME_LAMBDA_CLOSURE_BUILDER_INL_H_
-
-#include "lambda/closure_builder.h"
-#include <string.h>
-
-namespace art {
-namespace lambda {
-
-template <typename T, ClosureBuilder::ShortyTypeEnum kShortyType>
-void ClosureBuilder::CaptureVariablePrimitive(T value) {
-  static_assert(ShortyFieldTypeTraits::IsPrimitiveType<T>(), "T must be a primitive type");
-  const size_t type_size = ShortyFieldType(kShortyType).GetStaticSize();
-  DCHECK_EQ(type_size, sizeof(T));
-
-  // Copy the data while retaining the bit pattern. Strict-aliasing safe.
-  ShortyFieldTypeTraits::MaxType value_storage = 0;
-  memcpy(&value_storage, &value, sizeof(T));
-
-  values_.push_back(value_storage);
-  size_ += sizeof(T);
-
-  shorty_types_ += kShortyType;
-}
-
-}  // namespace lambda
-}  // namespace art
-
-#endif  // ART_RUNTIME_LAMBDA_CLOSURE_BUILDER_INL_H_
diff --git a/runtime/lambda/closure_builder.cc b/runtime/lambda/closure_builder.cc
deleted file mode 100644
index 739e965..0000000
--- a/runtime/lambda/closure_builder.cc
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "lambda/closure_builder.h"
-
-#include "base/macros.h"
-#include "base/value_object.h"
-#include "lambda/art_lambda_method.h"
-#include "lambda/closure.h"
-#include "lambda/shorty_field_type.h"
-#include "runtime/mirror/object_reference.h"
-
-#include <stdint.h>
-#include <vector>
-
-namespace art {
-namespace lambda {
-
-/*
- * GC support TODOs:
- * (Although there's some code for storing objects, it is UNIMPLEMENTED(FATAL) because it is
- * incomplete).
- *
- * 1) GC needs to be able to traverse the Closure and visit any references.
- *    It might be possible to get away with global roots in the short term.
- *
- * 2) Add brooks read barrier support. We can store the black/gray/white bits
- *    in the lower 2 bits of the lambda art method pointer. Whenever a closure is copied
- *    [to the stack] we'd need to add a cold path to turn it black.
- *    (since there's only 3 colors, I can use the 4th value to indicate no-refs).
- *    e.g. 0x0 = gray, 0x1 = white, 0x2 = black, 0x3 = no-nested-references
- *    - Alternatively the GC can mark reference-less closures as always-black,
- *      although it would need extra work to check for references.
- */
-
-void ClosureBuilder::CaptureVariableObject(mirror::Object* object) {
-  auto compressed_reference = mirror::CompressedReference<mirror::Object>::FromMirrorPtr(object);
-  ShortyFieldTypeTraits::MaxType storage = 0;
-
-  static_assert(sizeof(storage) >= sizeof(compressed_reference),
-                "not enough room to store a compressed reference");
-  memcpy(&storage, &compressed_reference, sizeof(compressed_reference));
-
-  values_.push_back(storage);
-  size_ += kObjectReferenceSize;
-
-  static_assert(kObjectReferenceSize == sizeof(compressed_reference), "reference size mismatch");
-
-  // TODO: needs more work to support concurrent GC
-  if (kIsDebugBuild) {
-    if (kUseReadBarrier) {
-      UNIMPLEMENTED(FATAL) << "can't yet safely capture objects with read barrier";
-    }
-  }
-
-  shorty_types_ += ShortyFieldType::kObject;
-}
-
-void ClosureBuilder::CaptureVariableLambda(Closure* closure) {
-  DCHECK(closure != nullptr);  // null closures not allowed, target method must be null instead.
-  values_.push_back(reinterpret_cast<ShortyFieldTypeTraits::MaxType>(closure));
-
-  if (LIKELY(is_dynamic_size_ == false)) {
-    // Write in the extra bytes to store the dynamic size the first time.
-    is_dynamic_size_ = true;
-    size_ += sizeof(Closure::captured_[0].dynamic_.size_);
-  }
-
-  // A closure may be sized dynamically, so always query it for the true size.
-  size_ += closure->GetSize();
-
-  shorty_types_ += ShortyFieldType::kLambda;
-}
-
-size_t ClosureBuilder::GetSize() const {
-  return size_;
-}
-
-size_t ClosureBuilder::GetCaptureCount() const {
-  DCHECK_EQ(values_.size(), shorty_types_.size());
-  return values_.size();
-}
-
-const std::string& ClosureBuilder::GetCapturedVariableShortyTypes() const {
-  DCHECK_EQ(values_.size(), shorty_types_.size());
-  return shorty_types_;
-}
-
-Closure* ClosureBuilder::CreateInPlace(void* memory, ArtLambdaMethod* target_method) const {
-  DCHECK(memory != nullptr);
-  DCHECK(target_method != nullptr);
-  DCHECK_EQ(is_dynamic_size_, target_method->IsDynamicSize());
-
-  CHECK_EQ(target_method->GetNumberOfCapturedVariables(), values_.size())
-    << "number of variables captured at runtime does not match "
-    << "number of variables captured at compile time";
-
-  Closure* closure = new (memory) Closure;
-  closure->lambda_info_ = target_method;
-
-  static_assert(offsetof(Closure, captured_) == kInitialSize, "wrong initial size");
-
-  size_t written_size;
-  if (UNLIKELY(is_dynamic_size_)) {
-    // The closure size must be set dynamically (i.e. nested lambdas).
-    closure->captured_[0].dynamic_.size_ = GetSize();
-    size_t header_size = offsetof(Closure, captured_[0].dynamic_.variables_);
-    DCHECK_LE(header_size, GetSize());
-    size_t variables_size = GetSize() - header_size;
-    written_size =
-        WriteValues(target_method,
-                    closure->captured_[0].dynamic_.variables_,
-                    header_size,
-                    variables_size);
-  } else {
-    // The closure size is known statically (i.e. no nested lambdas).
-    DCHECK(GetSize() == target_method->GetStaticClosureSize());
-    size_t header_size = offsetof(Closure, captured_[0].static_variables_);
-    DCHECK_LE(header_size, GetSize());
-    size_t variables_size = GetSize() - header_size;
-    written_size =
-        WriteValues(target_method,
-                    closure->captured_[0].static_variables_,
-                    header_size,
-                    variables_size);
-  }
-
-  DCHECK_EQ(written_size, closure->GetSize());
-
-  return closure;
-}
-
-size_t ClosureBuilder::WriteValues(ArtLambdaMethod* target_method,
-                                   uint8_t variables[],
-                                   size_t header_size,
-                                   size_t variables_size) const {
-  size_t total_size = header_size;
-  const char* shorty_types = target_method->GetCapturedVariablesShortyTypeDescriptor();
-  DCHECK_STREQ(shorty_types, shorty_types_.c_str());
-
-  size_t variables_offset = 0;
-  size_t remaining_size = variables_size;
-
-  const size_t shorty_count = target_method->GetNumberOfCapturedVariables();
-  DCHECK_EQ(shorty_count, GetCaptureCount());
-
-  for (size_t i = 0; i < shorty_count; ++i) {
-    ShortyFieldType shorty{shorty_types[i]};  // NOLINT [readability/braces] [4]
-
-    size_t var_size;
-    if (LIKELY(shorty.IsStaticSize())) {
-      // TODO: needs more work to support concurrent GC, e.g. read barriers
-      if (kUseReadBarrier == false) {
-        if (UNLIKELY(shorty.IsObject())) {
-          UNIMPLEMENTED(FATAL) << "can't yet safely write objects with read barrier";
-        }
-      } else {
-        if (UNLIKELY(shorty.IsObject())) {
-          UNIMPLEMENTED(FATAL) << "writing objects not yet supported, no GC support";
-        }
-      }
-
-      var_size = shorty.GetStaticSize();
-      DCHECK_LE(var_size, sizeof(values_[i]));
-
-      // Safe even for objects (non-read barrier case) if we never suspend
-      // while the ClosureBuilder is live.
-      // FIXME: Need to add GC support for references in a closure.
-      memcpy(&variables[variables_offset], &values_[i], var_size);
-    } else {
-      DCHECK(shorty.IsLambda())
-          << " don't support writing dynamically sized types other than lambda";
-
-      ShortyFieldTypeTraits::MaxType closure_raw = values_[i];
-      Closure* nested_closure = reinterpret_cast<Closure*>(closure_raw);
-
-      DCHECK(nested_closure != nullptr);
-      nested_closure->CopyTo(&variables[variables_offset], remaining_size);
-
-      var_size = nested_closure->GetSize();
-    }
-
-    total_size += var_size;
-    DCHECK_GE(remaining_size, var_size);
-    remaining_size -= var_size;
-
-    variables_offset += var_size;
-  }
-
-  DCHECK_EQ('\0', shorty_types[shorty_count]);
-  DCHECK_EQ(variables_offset, variables_size);
-
-  return total_size;
-}
-
-
-}  // namespace lambda
-}  // namespace art
diff --git a/runtime/lambda/closure_builder.h b/runtime/lambda/closure_builder.h
deleted file mode 100644
index 23eb484..0000000
--- a/runtime/lambda/closure_builder.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ART_RUNTIME_LAMBDA_CLOSURE_BUILDER_H_
-#define ART_RUNTIME_LAMBDA_CLOSURE_BUILDER_H_
-
-#include "base/macros.h"
-#include "base/mutex.h"  // For Locks::mutator_lock_.
-#include "base/value_object.h"
-#include "lambda/shorty_field_type.h"
-
-#include <stdint.h>
-#include <vector>
-
-namespace art {
-class ArtMethod;  // forward declaration
-
-namespace mirror {
-class Object;  // forward declaration
-}  // namespace mirror
-
-namespace lambda {
-class ArtLambdaMethod;  // forward declaration
-
-// Build a closure by capturing variables one at a time.
-// When all variables have been marked captured, the closure can be created in-place into
-// a target memory address.
-//
-// The mutator lock must be held for the duration of the lifetime of this object,
-// since it needs to temporarily store heap references into an internal list.
-class ClosureBuilder {
- public:
-  using ShortyTypeEnum = decltype(ShortyFieldType::kByte);
-
-  // Mark this primitive value to be captured as the specified type.
-  template <typename T, ShortyTypeEnum kShortyType = ShortyFieldTypeSelectEnum<T>::value>
-  void CaptureVariablePrimitive(T value);
-
-  // Mark this object reference to be captured.
-  void CaptureVariableObject(mirror::Object* object) SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Mark this lambda closure to be captured.
-  void CaptureVariableLambda(Closure* closure);
-
-  // Get the size (in bytes) of the closure.
-  // This size is used to be able to allocate memory large enough to write the closure into.
-  // Call 'CreateInPlace' to actually write the closure out.
-  size_t GetSize() const;
-
-  // Returns how many variables have been captured so far.
-  size_t GetCaptureCount() const;
-
-  // Get the list of captured variables' shorty field types.
-  const std::string& GetCapturedVariableShortyTypes() const;
-
-  // Creates a closure in-place and writes out the data into 'memory'.
-  // Memory must be at least 'GetSize' bytes large.
-  // All previously marked data to be captured is now written out.
-  Closure* CreateInPlace(void* memory, ArtLambdaMethod* target_method) const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Locks need to be held for entire lifetime of ClosureBuilder.
-  ClosureBuilder() SHARED_REQUIRES(Locks::mutator_lock_)
-  {}
-
-  // Locks need to be held for entire lifetime of ClosureBuilder.
-  ~ClosureBuilder() SHARED_REQUIRES(Locks::mutator_lock_)
-  {}
-
- private:
-  // Initial size a closure starts out before any variables are written.
-  // Header size only.
-  static constexpr size_t kInitialSize = sizeof(ArtLambdaMethod*);
-
-  // Write a Closure's variables field from the captured variables.
-  // variables_size specified in bytes, and only includes enough room to write variables into.
-  // Returns the calculated actual size of the closure.
-  size_t WriteValues(ArtLambdaMethod* target_method,
-                     uint8_t variables[],
-                     size_t header_size,
-                     size_t variables_size) const SHARED_REQUIRES(Locks::mutator_lock_);
-
-  size_t size_ = kInitialSize;
-  bool is_dynamic_size_ = false;
-  std::vector<ShortyFieldTypeTraits::MaxType> values_;
-  std::string shorty_types_;
-};
-
-}  // namespace lambda
-}  // namespace art
-
-#endif  // ART_RUNTIME_LAMBDA_CLOSURE_BUILDER_H_
diff --git a/runtime/lambda/closure_test.cc b/runtime/lambda/closure_test.cc
deleted file mode 100644
index 7c1bd0d..0000000
--- a/runtime/lambda/closure_test.cc
+++ /dev/null
@@ -1,356 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "art_method.h"
-#include "lambda/art_lambda_method.h"
-#include "lambda/closure.h"
-#include "lambda/closure_builder.h"
-#include "lambda/closure_builder-inl.h"
-#include "utils.h"
-
-#include <numeric>
-#include <stdint.h>
-#include <type_traits>
-#include "gtest/gtest.h"
-
-// Turn this on for some extra printfs to help with debugging, since some code is optimized out.
-static constexpr const bool kDebuggingClosureTest = true;
-
-namespace std {
-  using Closure = art::lambda::Closure;
-
-  // Specialize std::default_delete so it knows how to properly delete closures
-  // through the way we allocate them in this test.
-  //
-  // This is test-only because we don't want the rest of Art to do this.
-  template <>
-  struct default_delete<Closure> {
-    void operator()(Closure* closure) const {
-      delete[] reinterpret_cast<char*>(closure);
-    }
-  };
-}  // namespace std
-
-namespace art {
-
-// Fake lock acquisition to please clang lock checker.
-// This doesn't actually acquire any locks because we don't need multiple threads in this gtest.
-struct SCOPED_CAPABILITY ScopedFakeLock {
-  explicit ScopedFakeLock(MutatorMutex& mu) ACQUIRE(mu)
-      : mu_(mu) {
-  }
-
-  ~ScopedFakeLock() RELEASE()
-  {}
-
-  MutatorMutex& mu_;
-};
-
-namespace lambda {
-
-class ClosureTest : public ::testing::Test {
- public:
-  ClosureTest() = default;
-  ~ClosureTest() = default;
-
- protected:
-  static void SetUpTestCase() {
-  }
-
-  virtual void SetUp() {
-    // Create a completely dummy method here.
-    // It's "OK" because the Closure never needs to look inside of the ArtMethod
-    // (it just needs to be non-null).
-    uintptr_t ignore = 0xbadbad;
-    fake_method_ = reinterpret_cast<ArtMethod*>(ignore);
-  }
-
-  static ::testing::AssertionResult IsResultSuccessful(bool result) {
-    if (result) {
-      return ::testing::AssertionSuccess();
-    } else {
-      return ::testing::AssertionFailure();
-    }
-  }
-
-  // Create a closure that captures the static variables from 'args' by-value.
-  // The lambda method's captured variables types must match the ones in 'args'.
-  // -- This creates the closure directly in-memory by using memcpy.
-  template <typename ... Args>
-  static std::unique_ptr<Closure> CreateClosureStaticVariables(ArtLambdaMethod* lambda_method,
-                                                               Args&& ... args) {
-    constexpr size_t header_size = sizeof(ArtLambdaMethod*);
-    const size_t static_size = GetArgsSize(args ...) + header_size;
-    EXPECT_GE(static_size, sizeof(Closure));
-
-    // Can't just 'new' the Closure since we don't know the size up front.
-    char* closure_as_char_array = new char[static_size];
-    Closure* closure_ptr = new (closure_as_char_array) Closure;
-
-    // Set up the data
-    closure_ptr->lambda_info_ = lambda_method;
-    CopyArgs(closure_ptr->captured_[0].static_variables_, args ...);
-
-    // Make sure the entire thing is deleted once the unique_ptr goes out of scope.
-    return std::unique_ptr<Closure>(closure_ptr);  // NOLINT [whitespace/braces] [5]
-  }
-
-  // Copy variadic arguments into the destination array with memcpy.
-  template <typename T, typename ... Args>
-  static void CopyArgs(uint8_t destination[], T&& arg, Args&& ... args) {
-    memcpy(destination, &arg, sizeof(arg));
-    CopyArgs(destination + sizeof(arg), args ...);
-  }
-
-  // Base case: Done.
-  static void CopyArgs(uint8_t destination[]) {
-    UNUSED(destination);
-  }
-
-  // Create a closure that captures the static variables from 'args' by-value.
-  // The lambda method's captured variables types must match the ones in 'args'.
-  // -- This uses ClosureBuilder interface to set up the closure indirectly.
-  template <typename ... Args>
-  static std::unique_ptr<Closure> CreateClosureStaticVariablesFromBuilder(
-      ArtLambdaMethod* lambda_method,
-      Args&& ... args) {
-    // Acquire a fake lock since closure_builder needs it.
-    ScopedFakeLock fake_lock(*Locks::mutator_lock_);
-
-    ClosureBuilder closure_builder;
-    CaptureVariableFromArgsList(/*out*/closure_builder, args ...);
-
-    EXPECT_EQ(sizeof...(args), closure_builder.GetCaptureCount());
-
-    constexpr size_t header_size = sizeof(ArtLambdaMethod*);
-    const size_t static_size = GetArgsSize(args ...) + header_size;
-    EXPECT_GE(static_size, sizeof(Closure));
-
-    // For static variables, no nested closure, so size must match exactly.
-    EXPECT_EQ(static_size, closure_builder.GetSize());
-
-    // Can't just 'new' the Closure since we don't know the size up front.
-    char* closure_as_char_array = new char[static_size];
-    Closure* closure_ptr = new (closure_as_char_array) Closure;
-
-    // The closure builder packs the captured variables into a Closure.
-    closure_builder.CreateInPlace(closure_ptr, lambda_method);
-
-    // Make sure the entire thing is deleted once the unique_ptr goes out of scope.
-    return std::unique_ptr<Closure>(closure_ptr);  // NOLINT [whitespace/braces] [5]
-  }
-
-  // Call the correct ClosureBuilder::CaptureVariableXYZ function based on the type of args.
-  // Invokes for each arg in args.
-  template <typename ... Args>
-  static void CaptureVariableFromArgsList(/*out*/ClosureBuilder& closure_builder, Args ... args) {
-    int ignore[] = {
-        (CaptureVariableFromArgs(/*out*/closure_builder, args),0)...  // NOLINT [whitespace/comma] [3]
-    };
-    UNUSED(ignore);
-  }
-
-  // ClosureBuilder::CaptureVariablePrimitive for types that are primitive only.
-  template <typename T>
-  typename std::enable_if<ShortyFieldTypeTraits::IsPrimitiveType<T>()>::type
-  static CaptureVariableFromArgs(/*out*/ClosureBuilder& closure_builder, T value) {
-    static_assert(ShortyFieldTypeTraits::IsPrimitiveType<T>(), "T must be a shorty primitive");
-    closure_builder.CaptureVariablePrimitive<T, ShortyFieldTypeSelectEnum<T>::value>(value);
-  }
-
-  // ClosureBuilder::CaptureVariableObject for types that are objects only.
-  template <typename T>
-  typename std::enable_if<ShortyFieldTypeTraits::IsObjectType<T>()>::type
-  static CaptureVariableFromArgs(/*out*/ClosureBuilder& closure_builder, const T* object) {
-    ScopedFakeLock fake_lock(*Locks::mutator_lock_);
-    closure_builder.CaptureVariableObject(object);
-  }
-
-  // Sum of sizeof(Args...).
-  template <typename T, typename ... Args>
-  static constexpr size_t GetArgsSize(T&& arg, Args&& ... args) {
-    return sizeof(arg) + GetArgsSize(args ...);
-  }
-
-  // Base case: Done.
-  static constexpr size_t GetArgsSize() {
-    return 0;
-  }
-
-  // Take "U" and memcpy it into a "T". T starts out as (T)0.
-  template <typename T, typename U>
-  static T ExpandingBitCast(const U& val) {
-    static_assert(sizeof(T) >= sizeof(U), "U too large");
-    T new_val = static_cast<T>(0);
-    memcpy(&new_val, &val, sizeof(U));
-    return new_val;
-  }
-
-  // Templatized extraction from closures by checking their type with enable_if.
-  template <typename T>
-  static typename std::enable_if<ShortyFieldTypeTraits::IsPrimitiveNarrowType<T>()>::type
-  ExpectCapturedVariable(const Closure* closure, size_t index, T value) {
-    EXPECT_EQ(ExpandingBitCast<uint32_t>(value), closure->GetCapturedPrimitiveNarrow(index))
-        << " with index " << index;
-  }
-
-  template <typename T>
-  static typename std::enable_if<ShortyFieldTypeTraits::IsPrimitiveWideType<T>()>::type
-  ExpectCapturedVariable(const Closure* closure, size_t index, T value) {
-    EXPECT_EQ(ExpandingBitCast<uint64_t>(value), closure->GetCapturedPrimitiveWide(index))
-        << " with index " << index;
-  }
-
-  // Templatized SFINAE for Objects so we can get better error messages.
-  template <typename T>
-  static typename std::enable_if<ShortyFieldTypeTraits::IsObjectType<T>()>::type
-  ExpectCapturedVariable(const Closure* closure, size_t index, const T* object) {
-    EXPECT_EQ(object, closure->GetCapturedObject(index))
-        << " with index " << index;
-  }
-
-  template <typename ... Args>
-  void TestPrimitive(const char *descriptor, Args ... args) {
-    const char* shorty = descriptor;
-
-    SCOPED_TRACE(descriptor);
-
-    ASSERT_EQ(strlen(shorty), sizeof...(args))
-        << "test error: descriptor must have same # of types as the # of captured variables";
-
-    // Important: This fake lambda method needs to out-live any Closures we create with it.
-    ArtLambdaMethod lambda_method{fake_method_,                    // NOLINT [whitespace/braces] [5]
-                                  descriptor,                      // NOLINT [whitespace/blank_line] [2]
-                                  shorty,
-                                 };
-
-    std::unique_ptr<Closure> closure_a;
-    std::unique_ptr<Closure> closure_b;
-
-    // Test the closure twice when it's constructed in different ways.
-    {
-      // Create the closure in a "raw" manner, that is directly with memcpy
-      // since we know the underlying data format.
-      // This simulates how the compiler would lay out the data directly.
-      SCOPED_TRACE("raw closure");
-      std::unique_ptr<Closure> closure_raw = CreateClosureStaticVariables(&lambda_method, args ...);
-
-      if (kDebuggingClosureTest) {
-        std::cerr << "closure raw address: " << closure_raw.get() << std::endl;
-      }
-      TestPrimitiveWithClosure(closure_raw.get(), descriptor, shorty, args ...);
-      closure_a = std::move(closure_raw);
-    }
-
-    {
-      // Create the closure with the ClosureBuilder, which is done indirectly.
-      // This simulates how the interpreter would create the closure dynamically at runtime.
-      SCOPED_TRACE("closure from builder");
-      std::unique_ptr<Closure> closure_built =
-          CreateClosureStaticVariablesFromBuilder(&lambda_method, args ...);
-      if (kDebuggingClosureTest) {
-        std::cerr << "closure built address: " << closure_built.get() << std::endl;
-      }
-      TestPrimitiveWithClosure(closure_built.get(), descriptor, shorty, args ...);
-      closure_b = std::move(closure_built);
-    }
-
-    // The closures should be identical memory-wise as well.
-    EXPECT_EQ(closure_a->GetSize(), closure_b->GetSize());
-    EXPECT_TRUE(memcmp(closure_a.get(),
-                       closure_b.get(),
-                       std::min(closure_a->GetSize(), closure_b->GetSize())) == 0);
-  }
-
-  template <typename ... Args>
-  static void TestPrimitiveWithClosure(Closure* closure,
-                                       const char* descriptor,
-                                       const char* shorty,
-                                       Args ... args) {
-    EXPECT_EQ(sizeof(ArtLambdaMethod*) + GetArgsSize(args...), closure->GetSize());
-    EXPECT_EQ(sizeof...(args), closure->GetNumberOfCapturedVariables());
-    EXPECT_STREQ(descriptor, closure->GetCapturedVariablesTypeDescriptor());
-    TestPrimitiveExpects(closure, shorty, /*index*/0, args ...);
-  }
-
-  // Call EXPECT_EQ for each argument in the closure's #GetCapturedX.
-  template <typename T, typename ... Args>
-  static void TestPrimitiveExpects(
-      const Closure* closure, const char* shorty, size_t index, T arg, Args ... args) {
-    ASSERT_EQ(ShortyFieldType(shorty[index]).GetStaticSize(), sizeof(T))
-        << "Test error: Type mismatch at index " << index;
-    ExpectCapturedVariable(closure, index, arg);
-    EXPECT_EQ(ShortyFieldType(shorty[index]), closure->GetCapturedShortyType(index));
-    TestPrimitiveExpects(closure, shorty, index + 1, args ...);
-  }
-
-  // Base case for EXPECT_EQ.
-  static void TestPrimitiveExpects(const Closure* closure, const char* shorty, size_t index) {
-    UNUSED(closure, shorty, index);
-  }
-
-  ArtMethod* fake_method_;
-};
-
-TEST_F(ClosureTest, TestTrivial) {
-  ArtLambdaMethod lambda_method{fake_method_,                    // NOLINT [whitespace/braces] [5]
-                                "",  // No captured variables    // NOLINT [whitespace/blank_line] [2]
-                                "",  // No captured variables
-                               };
-
-  std::unique_ptr<Closure> closure = CreateClosureStaticVariables(&lambda_method);
-
-  EXPECT_EQ(sizeof(ArtLambdaMethod*), closure->GetSize());
-  EXPECT_EQ(0u, closure->GetNumberOfCapturedVariables());
-}  // TEST_F
-
-TEST_F(ClosureTest, TestPrimitiveSingle) {
-  TestPrimitive("Z", true);
-  TestPrimitive("B", int8_t(0xde));
-  TestPrimitive("C", uint16_t(0xbeef));
-  TestPrimitive("S", int16_t(0xdead));
-  TestPrimitive("I", int32_t(0xdeadbeef));
-  TestPrimitive("F", 0.123f);
-  TestPrimitive("J", int64_t(0xdeadbeef00c0ffee));
-  TestPrimitive("D", 123.456);
-}  // TEST_F
-
-TEST_F(ClosureTest, TestPrimitiveMany) {
-  TestPrimitive("ZZ", true, false);
-  TestPrimitive("ZZZ", true, false, true);
-  TestPrimitive("BBBB", int8_t(0xde), int8_t(0xa0), int8_t(0xff), int8_t(0xcc));
-  TestPrimitive("CC", uint16_t(0xbeef), uint16_t(0xdead));
-  TestPrimitive("SSSS", int16_t(0xdead), int16_t(0xc0ff), int16_t(0xf000), int16_t(0xbaba));
-  TestPrimitive("III", int32_t(0xdeadbeef), int32_t(0xc0ffee), int32_t(0xbeefdead));
-  TestPrimitive("FF", 0.123f, 555.666f);
-  TestPrimitive("JJJ", int64_t(0xdeadbeef00c0ffee), int64_t(0x123), int64_t(0xc0ffee));
-  TestPrimitive("DD", 123.456, 777.888);
-}  // TEST_F
-
-TEST_F(ClosureTest, TestPrimitiveMixed) {
-  TestPrimitive("ZZBBCCSSIIFFJJDD",
-                true, false,
-                int8_t(0xde), int8_t(0xa0),
-                uint16_t(0xbeef), uint16_t(0xdead),
-                int16_t(0xdead), int16_t(0xc0ff),
-                int32_t(0xdeadbeef), int32_t(0xc0ffee),
-                0.123f, 555.666f,
-                int64_t(0xdeadbeef00c0ffee), int64_t(0x123),
-                123.456, 777.888);
-}  // TEST_F
-
-}  // namespace lambda
-}  // namespace art
diff --git a/runtime/lambda/leaking_allocator.cc b/runtime/lambda/leaking_allocator.cc
deleted file mode 100644
index 22bb294..0000000
--- a/runtime/lambda/leaking_allocator.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "base/bit_utils.h"
-#include "lambda/leaking_allocator.h"
-#include "linear_alloc.h"
-#include "runtime.h"
-
-namespace art {
-namespace lambda {
-
-void* LeakingAllocator::AllocateMemoryImpl(Thread* self, size_t byte_size, size_t align_size) {
-  // TODO: use GetAllocatorForClassLoader to allocate lambda ArtMethod data.
-  void* mem = Runtime::Current()->GetLinearAlloc()->Alloc(self, byte_size);
-  DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(mem), align_size);
-  return mem;
-}
-
-}  // namespace lambda
-}  // namespace art
diff --git a/runtime/lambda/leaking_allocator.h b/runtime/lambda/leaking_allocator.h
deleted file mode 100644
index cb5a1bf..0000000
--- a/runtime/lambda/leaking_allocator.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ART_RUNTIME_LAMBDA_LEAKING_ALLOCATOR_H_
-#define ART_RUNTIME_LAMBDA_LEAKING_ALLOCATOR_H_
-
-#include <utility>  // std::forward
-#include <type_traits>  // std::aligned_storage
-
-namespace art {
-class Thread;  // forward declaration
-
-namespace lambda {
-
-// Temporary class to centralize all the leaking allocations.
-// Allocations made through this class are never freed, but it is a placeholder
-// that means that the calling code needs to be rewritten to properly:
-//
-// (a) Have a lifetime scoped to some other entity.
-// (b) Not be allocated over and over again if it was already allocated once (immutable data).
-//
-// TODO: do all of the above a/b for each callsite, and delete this class.
-class LeakingAllocator {
- public:
-  // An opaque type which is guaranteed for:
-  // * a) be large enough to hold T (e.g. for in-place new)
-  // * b) be well-aligned (so that reads/writes are well-defined) to T
-  // * c) strict-aliasing compatible with T*
-  //
-  // Nominally used to allocate memory for yet unconstructed instances of T.
-  template <typename T>
-  using AlignedMemoryStorage = typename std::aligned_storage<sizeof(T), alignof(T)>::type;
-
-  // Allocate byte_size bytes worth of memory. Never freed.
-  template <typename T>
-  static AlignedMemoryStorage<T>* AllocateMemory(Thread* self, size_t byte_size = sizeof(T)) {
-    return reinterpret_cast<AlignedMemoryStorage<T>*>(
-        AllocateMemoryImpl(self, byte_size, alignof(T)));
-  }
-
-  // Make a new instance of T, flexibly sized, in-place at newly allocated memory. Never freed.
-  template <typename T, typename... Args>
-  static T* MakeFlexibleInstance(Thread* self, size_t byte_size, Args&&... args) {
-    return new (AllocateMemory<T>(self, byte_size)) T(std::forward<Args>(args)...);
-  }
-
-  // Make a new instance of T in-place at newly allocated memory. Never freed.
-  template <typename T, typename... Args>
-  static T* MakeInstance(Thread* self, Args&&... args) {
-    return new (AllocateMemory<T>(self, sizeof(T))) T(std::forward<Args>(args)...);
-  }
-
- private:
-  static void* AllocateMemoryImpl(Thread* self, size_t byte_size, size_t align_size);
-};
-
-}  // namespace lambda
-}  // namespace art
-
-#endif  // ART_RUNTIME_LAMBDA_LEAKING_ALLOCATOR_H_
diff --git a/runtime/lambda/shorty_field_type.h b/runtime/lambda/shorty_field_type.h
deleted file mode 100644
index c314fd2..0000000
--- a/runtime/lambda/shorty_field_type.h
+++ /dev/null
@@ -1,475 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ART_RUNTIME_LAMBDA_SHORTY_FIELD_TYPE_H_
-#define ART_RUNTIME_LAMBDA_SHORTY_FIELD_TYPE_H_
-
-#include "base/logging.h"
-#include "base/macros.h"
-#include "base/value_object.h"
-#include "globals.h"
-#include "runtime/primitive.h"
-
-#include <ostream>
-
-namespace art {
-
-namespace mirror {
-class Object;  // forward declaration
-}  // namespace mirror
-
-namespace lambda {
-
-struct Closure;  // forward declaration
-
-// TODO: Refactor together with primitive.h
-
-// The short form of a field type descriptor. Corresponds to ShortyFieldType in dex specification.
-// Only types usable by a field (and locals) are allowed (i.e. no void type).
-// Note that arrays and objects are treated both as 'L'.
-//
-// This is effectively a 'char' enum-like zero-cost type-safe wrapper with extra helper functions.
-struct ShortyFieldType : ValueObject {
-  // Use as if this was an enum class, e.g. 'ShortyFieldType::kBoolean'.
-  enum : char {
-    // Primitives (Narrow):
-    kBoolean = 'Z',
-    kByte = 'B',
-    kChar = 'C',
-    kShort = 'S',
-    kInt = 'I',
-    kFloat = 'F',
-    // Primitives (Wide):
-    kLong = 'J',
-    kDouble = 'D',
-    // Managed types:
-    kObject = 'L',  // This can also be an array (which is otherwise '[' in a non-shorty).
-    kLambda = '\\',
-  };  // NOTE: This is an anonymous enum so we can get exhaustive switch checking from the compiler.
-
-  // Implicitly construct from the enum above. Value must be one of the enum list members above.
-  // Always safe to use, does not do any DCHECKs.
-  inline constexpr ShortyFieldType(decltype(kByte) c) : value_(c) {
-  }
-
-  // Default constructor. The initial value is undefined. Initialize before calling methods.
-  // This is very unsafe but exists as a convenience to having undefined values.
-  explicit ShortyFieldType() : value_(StaticCastValue(0)) {
-  }
-
-  // Explicitly construct from a char. Value must be one of the enum list members above.
-  // Conversion is potentially unsafe, so DCHECKing is performed.
-  explicit inline ShortyFieldType(char c) : value_(StaticCastValue(c)) {
-    if (kIsDebugBuild) {
-      // Verify at debug-time that our conversion is safe.
-      ShortyFieldType ignored;
-      DCHECK(MaybeCreate(c, &ignored)) << "unknown shorty field type '" << c << "'";
-    }
-  }
-
-  // Attempts to parse the character in 'shorty_field_type' into its strongly typed version.
-  // Returns false if the character was out of range of the grammar.
-  static bool MaybeCreate(char shorty_field_type, ShortyFieldType* out) {
-    DCHECK(out != nullptr);
-    switch (shorty_field_type) {
-      case kBoolean:
-      case kByte:
-      case kChar:
-      case kShort:
-      case kInt:
-      case kFloat:
-      case kLong:
-      case kDouble:
-      case kObject:
-      case kLambda:
-        *out = ShortyFieldType(static_cast<decltype(kByte)>(shorty_field_type));
-        return true;
-      default:
-        break;
-    }
-
-    return false;
-  }
-
-  // Convert the first type in a field type descriptor string into a shorty.
-  // Arrays are converted into objects.
-  // Does not work for 'void' types (as they are illegal in a field type descriptor).
-  static ShortyFieldType CreateFromFieldTypeDescriptor(const char* field_type_descriptor) {
-    DCHECK(field_type_descriptor != nullptr);
-    char c = *field_type_descriptor;
-    if (UNLIKELY(c == kArray)) {  // Arrays are treated as object references.
-      c = kObject;
-    }
-    return ShortyFieldType{c};  // NOLINT [readability/braces] [4]
-  }
-
-  // Parse the first type in the field type descriptor string into a shorty.
-  // See CreateFromFieldTypeDescriptor for more details.
-  //
-  // Returns the pointer offset into the middle of the field_type_descriptor
-  // that would either point to the next shorty type, or to null if there are
-  // no more types.
-  //
-  // DCHECKs that each of the nested types is a valid shorty field type. This
-  // means the type descriptor must be already valid.
-  static const char* ParseFromFieldTypeDescriptor(const char* field_type_descriptor,
-                                                  ShortyFieldType* out_type) {
-    DCHECK(field_type_descriptor != nullptr);
-
-    if (UNLIKELY(field_type_descriptor[0] == '\0')) {
-      // Handle empty strings by immediately returning null.
-      return nullptr;
-    }
-
-    // All non-empty strings must be a valid list of field type descriptors, otherwise
-    // the DCHECKs will kick in and the program will crash.
-    const char shorter_type = *field_type_descriptor;
-
-    ShortyFieldType safe_type;
-    bool type_set = MaybeCreate(shorter_type, &safe_type);
-
-    // Lambda that keeps skipping characters until it sees ';'.
-    // Stops one character -after- the ';'.
-    auto skip_until_semicolon = [&field_type_descriptor]() {
-      while (*field_type_descriptor != ';' && *field_type_descriptor != '\0') {
-        ++field_type_descriptor;
-      }
-      DCHECK_NE(*field_type_descriptor, '\0')
-          << " type descriptor terminated too early: " << field_type_descriptor;
-      ++field_type_descriptor;  // Skip the ';'
-    };
-
-    ++field_type_descriptor;
-    switch (shorter_type) {
-      case kObject:
-        skip_until_semicolon();
-
-        DCHECK(type_set);
-        DCHECK(safe_type == kObject);
-        break;
-      case kArray:
-        // Strip out all of the leading [[[[[s, we don't care if it's a multi-dimensional array.
-        while (*field_type_descriptor == '[' && *field_type_descriptor != '\0') {
-          ++field_type_descriptor;
-        }
-        DCHECK_NE(*field_type_descriptor, '\0')
-            << " type descriptor terminated too early: " << field_type_descriptor;
-        // Either a primitive, object, or closure left. No more arrays.
-        {
-          // Now skip all the characters that form the array's interior-most element type
-          // (which itself is guaranteed not to be an array).
-          ShortyFieldType array_interior_type;
-          type_set = MaybeCreate(*field_type_descriptor, &array_interior_type);
-          DCHECK(type_set) << " invalid remaining type descriptor " << field_type_descriptor;
-
-          // Handle array-of-objects case like [[[[[LObject; and array-of-closures like [[[[[\Foo;
-          if (*field_type_descriptor == kObject || *field_type_descriptor == kLambda) {
-            skip_until_semicolon();
-          } else {
-            // Handle primitives which are exactly one character we can skip.
-            DCHECK(array_interior_type.IsPrimitive());
-            ++field_type_descriptor;
-          }
-        }
-
-        safe_type = kObject;
-        type_set = true;
-        break;
-      case kLambda:
-        skip_until_semicolon();
-
-        DCHECK(safe_type == kLambda);
-        DCHECK(type_set);
-        break;
-      default:
-        DCHECK_NE(kVoid, shorter_type) << "cannot make a ShortyFieldType from a void type";
-        break;
-    }
-
-    DCHECK(type_set) << "invalid shorty type descriptor " << shorter_type;
-
-    *out_type = safe_type;
-    return type_set ? field_type_descriptor : nullptr;
-  }
-
-  // Explicitly convert to a char.
-  inline explicit operator char() const {
-    return value_;
-  }
-
-  // Is this a primitive?
-  inline bool IsPrimitive() const {
-    return IsPrimitiveNarrow() || IsPrimitiveWide();
-  }
-
-  // Is this a narrow primitive (i.e. can fit into 1 virtual register)?
-  inline bool IsPrimitiveNarrow() const {
-    switch (value_) {
-      case kBoolean:
-      case kByte:
-      case kChar:
-      case kShort:
-      case kInt:
-      case kFloat:
-        return true;
-      default:
-        return false;
-    }
-  }
-
-  // Is this a wide primitive (i.e. needs exactly 2 virtual registers)?
-  inline bool IsPrimitiveWide() const {
-    switch (value_) {
-      case kLong:
-      case kDouble:
-        return true;
-      default:
-        return false;
-    }
-  }
-
-  // Is this an object reference (which can also be an array)?
-  inline bool IsObject() const {
-    return value_ == kObject;
-  }
-
-  // Is this a lambda?
-  inline bool IsLambda() const {
-    return value_ == kLambda;
-  }
-
-  // Is the size of this (to store inline as a field) always known at compile-time?
-  inline bool IsStaticSize() const {
-    return !IsLambda();
-  }
-
-  // Get the compile-time size (to be able to store it inline as a field or on stack).
-  // Dynamically-sized values such as lambdas return the guaranteed lower bound.
-  inline size_t GetStaticSize() const {
-    switch (value_) {
-      case kBoolean:
-        return sizeof(bool);
-      case kByte:
-        return sizeof(uint8_t);
-      case kChar:
-        return sizeof(int16_t);
-      case kShort:
-        return sizeof(uint16_t);
-      case kInt:
-        return sizeof(int32_t);
-      case kLong:
-        return sizeof(int64_t);
-      case kFloat:
-        return sizeof(float);
-      case kDouble:
-        return sizeof(double);
-      case kObject:
-        return kObjectReferenceSize;
-      case kLambda:
-        return sizeof(void*);  // Large enough to store the ArtLambdaMethod
-      default:
-        DCHECK(false) << "unknown shorty field type '" << static_cast<char>(value_) << "'";
-        UNREACHABLE();
-    }
-  }
-
-  // Implicitly convert to the anonymous nested inner type. Used for exhaustive switch detection.
-  inline operator decltype(kByte)() const {
-    return value_;
-  }
-
-  // Returns a read-only static string representing the enum name, useful for printing/debug only.
-  inline const char* ToString() const {
-    switch (value_) {
-      case kBoolean:
-        return "kBoolean";
-      case kByte:
-        return "kByte";
-      case kChar:
-        return "kChar";
-      case kShort:
-        return "kShort";
-      case kInt:
-        return "kInt";
-      case kLong:
-        return "kLong";
-      case kFloat:
-        return "kFloat";
-      case kDouble:
-        return "kDouble";
-      case kObject:
-        return "kObject";
-      case kLambda:
-        return "kLambda";
-      default:
-        // Undefined behavior if we get this far. Pray the compiler gods are merciful.
-        return "<undefined>";
-    }
-  }
-
- private:
-  static constexpr const char kArray = '[';
-  static constexpr const char kVoid  = 'V';
-
-  // Helper to statically cast anything into our nested anonymous enum type.
-  template <typename T>
-  inline static decltype(kByte) StaticCastValue(const T& anything) {
-    return static_cast<decltype(value_)>(anything);
-  }
-
-  // The only field in this struct.
-  decltype(kByte) value_;
-};
-
-
-  // Print to an output stream.
-inline std::ostream& operator<<(std::ostream& ostream, ShortyFieldType shorty) {
-  return ostream << shorty.ToString();
-}
-
-static_assert(sizeof(ShortyFieldType) == sizeof(char),
-              "ShortyFieldType must be lightweight just like a char");
-
-// Compile-time trait information regarding the ShortyFieldType.
-// Used by static_asserts to verify that the templates are correctly used at compile-time.
-//
-// For example,
-//     ShortyFieldTypeTraits::IsPrimitiveNarrowType<int64_t>() == true
-//     ShortyFieldTypeTraits::IsObjectType<mirror::Object*>() == true
-struct ShortyFieldTypeTraits {
-  // A type guaranteed to be large enough to holds any of the shorty field types.
-  using MaxType = uint64_t;
-
-  // Type traits: Returns true if 'T' is a valid type that can be represented by a shorty field type.
-  template <typename T>
-  static inline constexpr bool IsType() {
-    return IsPrimitiveType<T>() || IsObjectType<T>() || IsLambdaType<T>();
-  }
-
-  // Returns true if 'T' is a primitive type (i.e. a built-in without nested references).
-  template <typename T>
-  static inline constexpr bool IsPrimitiveType() {
-    return IsPrimitiveNarrowType<T>() || IsPrimitiveWideType<T>();
-  }
-
-  // Returns true if 'T' is a primitive type that is narrow (i.e. can be stored into 1 vreg).
-  template <typename T>
-  static inline constexpr bool IsPrimitiveNarrowType() {
-    return IsPrimitiveNarrowTypeImpl(static_cast<T* const>(nullptr));
-  }
-
-  // Returns true if 'T' is a primitive type that is wide (i.e. needs 2 vregs for storage).
-  template <typename T>
-  static inline constexpr bool IsPrimitiveWideType() {
-    return IsPrimitiveWideTypeImpl(static_cast<T* const>(nullptr));
-  }
-
-  // Returns true if 'T' is an object (i.e. it is a managed GC reference).
-  // Note: This is equivalent to std::base_of<mirror::Object*, T>::value
-  template <typename T>
-  static inline constexpr bool IsObjectType() {
-    return IsObjectTypeImpl(static_cast<T* const>(nullptr));
-  }
-
-  // Returns true if 'T' is a lambda (i.e. it is a closure with unknown static data);
-  template <typename T>
-  static inline constexpr bool IsLambdaType() {
-    return IsLambdaTypeImpl(static_cast<T* const>(nullptr));
-  }
-
- private:
-#define IS_VALID_TYPE_SPECIALIZATION(type, name) \
-  static inline constexpr bool Is ## name ## TypeImpl(type* const  = 0) { /*NOLINT*/ \
-    return true; \
-  } \
-  \
-  static_assert(sizeof(MaxType) >= sizeof(type), "MaxType too small")
-
-  IS_VALID_TYPE_SPECIALIZATION(bool, PrimitiveNarrow);
-  IS_VALID_TYPE_SPECIALIZATION(int8_t, PrimitiveNarrow);
-  IS_VALID_TYPE_SPECIALIZATION(uint8_t, PrimitiveNarrow);  // Not strictly true, but close enough.
-  IS_VALID_TYPE_SPECIALIZATION(int16_t, PrimitiveNarrow);
-  IS_VALID_TYPE_SPECIALIZATION(uint16_t, PrimitiveNarrow);  // Chars are unsigned.
-  IS_VALID_TYPE_SPECIALIZATION(int32_t, PrimitiveNarrow);
-  IS_VALID_TYPE_SPECIALIZATION(uint32_t, PrimitiveNarrow);  // Not strictly true, but close enough.
-  IS_VALID_TYPE_SPECIALIZATION(float, PrimitiveNarrow);
-  IS_VALID_TYPE_SPECIALIZATION(int64_t, PrimitiveWide);
-  IS_VALID_TYPE_SPECIALIZATION(uint64_t, PrimitiveWide);  // Not strictly true, but close enough.
-  IS_VALID_TYPE_SPECIALIZATION(double, PrimitiveWide);
-  IS_VALID_TYPE_SPECIALIZATION(mirror::Object*, Object);
-  IS_VALID_TYPE_SPECIALIZATION(Closure*, Lambda);
-#undef IS_VALID_TYPE_SPECIALIZATION
-
-#define IS_VALID_TYPE_SPECIALIZATION_IMPL(name) \
-  template <typename T> \
-  static inline constexpr bool Is ## name ## TypeImpl(T* const = 0) { \
-    return false; \
-  }
-
-  IS_VALID_TYPE_SPECIALIZATION_IMPL(PrimitiveNarrow);
-  IS_VALID_TYPE_SPECIALIZATION_IMPL(PrimitiveWide);
-  IS_VALID_TYPE_SPECIALIZATION_IMPL(Object);
-  IS_VALID_TYPE_SPECIALIZATION_IMPL(Lambda);
-
-#undef IS_VALID_TYPE_SPECIALIZATION_IMPL
-};
-
-// Maps the ShortyFieldType enum into it's C++ type equivalent, into the "type" typedef.
-// For example:
-//     ShortyFieldTypeSelectType<ShortyFieldType::kBoolean>::type => bool
-//     ShortyFieldTypeSelectType<ShortyFieldType::kLong>::type => int64_t
-//
-// Invalid enums will not have the type defined.
-template <decltype(ShortyFieldType::kByte) Shorty>
-struct ShortyFieldTypeSelectType {
-};
-
-// Maps the C++ type into it's ShortyFieldType enum equivalent, into the "value" constexpr.
-// For example:
-//     ShortyFieldTypeSelectEnum<bool>::value => ShortyFieldType::kBoolean
-//     ShortyFieldTypeSelectEnum<int64_t>::value => ShortyFieldType::kLong
-//
-// Signed-ness must match for a valid select, e.g. uint64_t will not map to kLong, but int64_t will.
-// Invalid types will not have the value defined (see e.g. ShortyFieldTypeTraits::IsType<T>())
-template <typename T>
-struct ShortyFieldTypeSelectEnum {
-};
-
-#define SHORTY_FIELD_TYPE_SELECT_IMPL(cpp_type, enum_element)      \
-template <> \
-struct ShortyFieldTypeSelectType<ShortyFieldType::enum_element> { \
-  using type = cpp_type; \
-}; \
-\
-template <> \
-struct ShortyFieldTypeSelectEnum<cpp_type> { \
-  static constexpr const auto value = ShortyFieldType::enum_element; \
-}; \
-
-SHORTY_FIELD_TYPE_SELECT_IMPL(bool, kBoolean);
-SHORTY_FIELD_TYPE_SELECT_IMPL(int8_t, kByte);
-SHORTY_FIELD_TYPE_SELECT_IMPL(int16_t, kShort);
-SHORTY_FIELD_TYPE_SELECT_IMPL(uint16_t, kChar);
-SHORTY_FIELD_TYPE_SELECT_IMPL(int32_t, kInt);
-SHORTY_FIELD_TYPE_SELECT_IMPL(float, kFloat);
-SHORTY_FIELD_TYPE_SELECT_IMPL(int64_t, kLong);
-SHORTY_FIELD_TYPE_SELECT_IMPL(double, kDouble);
-SHORTY_FIELD_TYPE_SELECT_IMPL(mirror::Object*, kObject);
-SHORTY_FIELD_TYPE_SELECT_IMPL(Closure*, kLambda);
-
-}  // namespace lambda
-}  // namespace art
-
-#endif  // ART_RUNTIME_LAMBDA_SHORTY_FIELD_TYPE_H_
diff --git a/runtime/lambda/shorty_field_type_test.cc b/runtime/lambda/shorty_field_type_test.cc
deleted file mode 100644
index 32bade9..0000000
--- a/runtime/lambda/shorty_field_type_test.cc
+++ /dev/null
@@ -1,354 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "lambda/shorty_field_type.h"
-#include "mirror/object_reference.h"
-
-#include "utils.h"
-#include <numeric>
-#include <stdint.h>
-#include "gtest/gtest.h"
-
-#define EXPECT_NULL(expected) EXPECT_EQ(reinterpret_cast<const void*>(expected), \
-                                        reinterpret_cast<void*>(nullptr));
-
-namespace art {
-namespace lambda {
-
-class ShortyFieldTypeTest : public ::testing::Test {
- public:
-  ShortyFieldTypeTest() = default;
-  ~ShortyFieldTypeTest() = default;
-
- protected:
-  static void SetUpTestCase() {
-  }
-
-  virtual void SetUp() {
-  }
-
-  static ::testing::AssertionResult IsResultSuccessful(bool result) {
-    if (result) {
-      return ::testing::AssertionSuccess();
-    } else {
-      return ::testing::AssertionFailure();
-    }
-  }
-
-  template <typename T>
-  static std::string ListToString(const T& list) {
-    std::stringstream stream;
-
-    stream << "[";
-    for (auto&& val : list) {
-      stream << val << ", ";
-    }
-    stream << "]";
-
-    return stream.str();
-  }
-
-  // Compare two vector-like types for equality.
-  template <typename T>
-  static ::testing::AssertionResult AreListsEqual(const T& expected, const T& actual) {
-    bool success = true;
-    std::stringstream stream;
-
-    if (expected.size() != actual.size()) {
-      success = false;
-      stream << "Expected list size: " << expected.size()
-             << ", but got list size: " << actual.size();
-      stream << std::endl;
-    }
-
-    for (size_t j = 0; j < std::min(expected.size(), actual.size()); ++j) {
-      if (expected[j] != actual[j]) {
-        success = false;
-        stream << "Expected element '" << j << "' to be '" << expected[j] << "', but got actual: '"
-               << actual[j] << "'.";
-        stream << std::endl;
-      }
-    }
-
-    if (success) {
-      return ::testing::AssertionSuccess();
-    }
-
-    stream << "Expected list was: " << ListToString(expected)
-           << ", actual list was: " << ListToString(actual);
-
-    return ::testing::AssertionFailure() << stream.str();
-  }
-
-  static std::vector<ShortyFieldType> ParseLongTypeDescriptorsToList(const char* type_descriptor) {
-    std::vector<ShortyFieldType> lst;
-
-    ShortyFieldType shorty;
-
-    const char* parsed = type_descriptor;
-    while ((parsed = ShortyFieldType::ParseFromFieldTypeDescriptor(parsed, &shorty)) != nullptr) {
-      lst.push_back(shorty);
-    }
-
-    return lst;
-  }
-
- protected:
-  // Shorthands for the ShortyFieldType constants.
-  // The letters are the same as JNI letters, with kS_ being a lambda since \ is not available.
-  static constexpr ShortyFieldType kSZ = ShortyFieldType::kBoolean;
-  static constexpr ShortyFieldType kSB = ShortyFieldType::kByte;
-  static constexpr ShortyFieldType kSC = ShortyFieldType::kChar;
-  static constexpr ShortyFieldType kSS = ShortyFieldType::kShort;
-  static constexpr ShortyFieldType kSI = ShortyFieldType::kInt;
-  static constexpr ShortyFieldType kSF = ShortyFieldType::kFloat;
-  static constexpr ShortyFieldType kSJ = ShortyFieldType::kLong;
-  static constexpr ShortyFieldType kSD = ShortyFieldType::kDouble;
-  static constexpr ShortyFieldType kSL = ShortyFieldType::kObject;
-  static constexpr ShortyFieldType kS_ = ShortyFieldType::kLambda;
-};
-
-TEST_F(ShortyFieldTypeTest, TestMaybeCreate) {
-  ShortyFieldType shorty;
-
-  std::vector<char> shorties = {'Z', 'B', 'C', 'S', 'I', 'F', 'J', 'D', 'L', '\\'};
-
-  // All valid 'shorty' characters are created successfully.
-  for (const char c : shorties) {
-    EXPECT_TRUE(ShortyFieldType::MaybeCreate(c, &shorty)) << c;
-    EXPECT_EQ(c, static_cast<char>(c));
-  }
-
-  // All other characters can never be created.
-  for (unsigned char c = 0; c < std::numeric_limits<unsigned char>::max(); ++c) {
-    // Skip the valid characters.
-    if (std::find(shorties.begin(), shorties.end(), c) != shorties.end()) { continue; }
-    // All invalid characters should fail.
-    EXPECT_FALSE(ShortyFieldType::MaybeCreate(static_cast<char>(c), &shorty)) << c;
-  }
-}  // TEST_F
-
-TEST_F(ShortyFieldTypeTest, TestCreateFromFieldTypeDescriptor) {
-  // Sample input.
-  std::vector<const char*> lengthies = {
-      "Z", "B", "C", "S", "I", "F", "J", "D", "LObject;", "\\Closure;",
-      "[Z", "[[B", "[[LObject;"
-  };
-
-  // Expected output.
-  std::vector<ShortyFieldType> expected = {
-      ShortyFieldType::kBoolean,
-      ShortyFieldType::kByte,
-      ShortyFieldType::kChar,
-      ShortyFieldType::kShort,
-      ShortyFieldType::kInt,
-      ShortyFieldType::kFloat,
-      ShortyFieldType::kLong,
-      ShortyFieldType::kDouble,
-      ShortyFieldType::kObject,
-      ShortyFieldType::kLambda,
-      // Arrays are always treated as objects.
-      ShortyFieldType::kObject,
-      ShortyFieldType::kObject,
-      ShortyFieldType::kObject,
-  };
-
-  // All valid lengthy types are correctly turned into the expected shorty type.
-  for (size_t i = 0; i < lengthies.size(); ++i) {
-    EXPECT_EQ(expected[i], ShortyFieldType::CreateFromFieldTypeDescriptor(lengthies[i]));
-  }
-}  // TEST_F
-
-TEST_F(ShortyFieldTypeTest, TestParseFromFieldTypeDescriptor) {
-  // Sample input.
-  std::vector<const char*> lengthies = {
-      // Empty list
-      "",
-      // Primitives
-      "Z", "B", "C", "S", "I", "F", "J", "D",
-      // Non-primitives
-      "LObject;", "\\Closure;",
-      // Arrays. The biggest PITA.
-      "[Z", "[[B", "[[LObject;", "[[[[\\Closure;",
-      // Multiple things at once:
-      "ZBCSIFJD",
-      "LObject;LObject;SSI",
-      "[[ZDDZ",
-      "[[LObject;[[Z[F\\Closure;LObject;",
-  };
-
-  // Expected output.
-  std::vector<std::vector<ShortyFieldType>> expected = {
-      // Empty list
-      {},
-      // Primitives
-      {kSZ}, {kSB}, {kSC}, {kSS}, {kSI}, {kSF}, {kSJ}, {kSD},
-      // Non-primitives.
-      { ShortyFieldType::kObject }, { ShortyFieldType::kLambda },
-      // Arrays are always treated as objects.
-      { kSL }, { kSL }, { kSL }, { kSL },
-      // Multiple things at once:
-      { kSZ, kSB, kSC, kSS, kSI, kSF, kSJ, kSD },
-      { kSL, kSL, kSS, kSS, kSI },
-      { kSL, kSD, kSD, kSZ },
-      { kSL, kSL, kSL, kS_, kSL },
-  };
-
-  // Sanity check that the expected/actual lists are the same size.. when adding new entries.
-  ASSERT_EQ(expected.size(), lengthies.size());
-
-  // All valid lengthy types are correctly turned into the expected shorty type.
-  for (size_t i = 0; i < expected.size(); ++i) {
-    const std::vector<ShortyFieldType>& expected_list = expected[i];
-    std::vector<ShortyFieldType> actual_list = ParseLongTypeDescriptorsToList(lengthies[i]);
-    EXPECT_TRUE(AreListsEqual(expected_list, actual_list));
-  }
-}  // TEST_F
-
-// Helper class to probe a shorty's characteristics by minimizing copy-and-paste tests.
-template <typename T, decltype(ShortyFieldType::kByte) kShortyEnum>
-struct ShortyTypeCharacteristics {
-  bool is_primitive_ = false;
-  bool is_primitive_narrow_ = false;
-  bool is_primitive_wide_ = false;
-  bool is_object_ = false;
-  bool is_lambda_ = false;
-  size_t size_ = sizeof(T);
-  bool is_dynamic_sized_ = false;
-
-  void CheckExpects() {
-    ShortyFieldType shorty = kShortyEnum;
-
-    // Test the main non-parsing-related ShortyFieldType characteristics.
-    EXPECT_EQ(is_primitive_, shorty.IsPrimitive());
-    EXPECT_EQ(is_primitive_narrow_, shorty.IsPrimitiveNarrow());
-    EXPECT_EQ(is_primitive_wide_, shorty.IsPrimitiveWide());
-    EXPECT_EQ(is_object_, shorty.IsObject());
-    EXPECT_EQ(is_lambda_, shorty.IsLambda());
-    EXPECT_EQ(size_, shorty.GetStaticSize());
-    EXPECT_EQ(is_dynamic_sized_, !shorty.IsStaticSize());
-
-    // Test compile-time ShortyFieldTypeTraits.
-    EXPECT_TRUE(ShortyFieldTypeTraits::IsType<T>());
-    EXPECT_EQ(is_primitive_, ShortyFieldTypeTraits::IsPrimitiveType<T>());
-    EXPECT_EQ(is_primitive_narrow_, ShortyFieldTypeTraits::IsPrimitiveNarrowType<T>());
-    EXPECT_EQ(is_primitive_wide_, ShortyFieldTypeTraits::IsPrimitiveWideType<T>());
-    EXPECT_EQ(is_object_, ShortyFieldTypeTraits::IsObjectType<T>());
-    EXPECT_EQ(is_lambda_, ShortyFieldTypeTraits::IsLambdaType<T>());
-
-    // Test compile-time ShortyFieldType selectors
-    static_assert(std::is_same<T, typename ShortyFieldTypeSelectType<kShortyEnum>::type>::value,
-                  "ShortyFieldType Enum->Type incorrect mapping");
-    auto kActualEnum = ShortyFieldTypeSelectEnum<T>::value;  // Do not ODR-use, avoid linker error.
-    EXPECT_EQ(kShortyEnum, kActualEnum);
-  }
-};
-
-TEST_F(ShortyFieldTypeTest, TestCharacteristicsAndTraits) {
-  // Boolean test
-  {
-    SCOPED_TRACE("boolean");
-    ShortyTypeCharacteristics<bool, ShortyFieldType::kBoolean> chars;
-    chars.is_primitive_ = true;
-    chars.is_primitive_narrow_ = true;
-    chars.CheckExpects();
-  }
-
-  // Byte test
-  {
-    SCOPED_TRACE("byte");
-    ShortyTypeCharacteristics<int8_t, ShortyFieldType::kByte> chars;
-    chars.is_primitive_ = true;
-    chars.is_primitive_narrow_ = true;
-    chars.CheckExpects();
-  }
-
-  // Char test
-  {
-    SCOPED_TRACE("char");
-    ShortyTypeCharacteristics<uint16_t, ShortyFieldType::kChar> chars;  // Char is unsigned.
-    chars.is_primitive_ = true;
-    chars.is_primitive_narrow_ = true;
-    chars.CheckExpects();
-  }
-
-  // Short test
-  {
-    SCOPED_TRACE("short");
-    ShortyTypeCharacteristics<int16_t, ShortyFieldType::kShort> chars;
-    chars.is_primitive_ = true;
-    chars.is_primitive_narrow_ = true;
-    chars.CheckExpects();
-  }
-
-  // Int test
-  {
-    SCOPED_TRACE("int");
-    ShortyTypeCharacteristics<int32_t, ShortyFieldType::kInt> chars;
-    chars.is_primitive_ = true;
-    chars.is_primitive_narrow_ = true;
-    chars.CheckExpects();
-  }
-
-  // Long test
-  {
-    SCOPED_TRACE("long");
-    ShortyTypeCharacteristics<int64_t, ShortyFieldType::kLong> chars;
-    chars.is_primitive_ = true;
-    chars.is_primitive_wide_ = true;
-    chars.CheckExpects();
-  }
-
-  // Float test
-  {
-    SCOPED_TRACE("float");
-    ShortyTypeCharacteristics<float, ShortyFieldType::kFloat> chars;
-    chars.is_primitive_ = true;
-    chars.is_primitive_narrow_ = true;
-    chars.CheckExpects();
-  }
-
-  // Double test
-  {
-    SCOPED_TRACE("double");
-    ShortyTypeCharacteristics<double, ShortyFieldType::kDouble> chars;
-    chars.is_primitive_ = true;
-    chars.is_primitive_wide_ = true;
-    chars.CheckExpects();
-  }
-
-  // Object test
-  {
-    SCOPED_TRACE("object");
-    ShortyTypeCharacteristics<mirror::Object*, ShortyFieldType::kObject> chars;
-    chars.is_object_ = true;
-    chars.size_ = kObjectReferenceSize;
-    chars.CheckExpects();
-    EXPECT_EQ(kObjectReferenceSize, sizeof(mirror::CompressedReference<mirror::Object>));
-  }
-
-  // Lambda test
-  {
-    SCOPED_TRACE("lambda");
-    ShortyTypeCharacteristics<Closure*, ShortyFieldType::kLambda> chars;
-    chars.is_lambda_ = true;
-    chars.is_dynamic_sized_ = true;
-    chars.CheckExpects();
-  }
-}
-
-}  // namespace lambda
-}  // namespace art
diff --git a/runtime/lock_word-inl.h b/runtime/lock_word-inl.h
index 341501b..4a2a293 100644
--- a/runtime/lock_word-inl.h
+++ b/runtime/lock_word-inl.h
@@ -43,17 +43,15 @@
 
 inline size_t LockWord::ForwardingAddress() const {
   DCHECK_EQ(GetState(), kForwardingAddress);
-  return value_ << kStateSize;
+  return value_ << kForwardingAddressShift;
 }
 
 inline LockWord::LockWord() : value_(0) {
   DCHECK_EQ(GetState(), kUnlocked);
 }
 
-inline LockWord::LockWord(Monitor* mon, uint32_t rb_state)
-    : value_(mon->GetMonitorId() | (rb_state << kReadBarrierStateShift) |
-             (kStateFat << kStateShift)) {
-  DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
+inline LockWord::LockWord(Monitor* mon, uint32_t gc_state)
+    : value_(mon->GetMonitorId() | (gc_state << kGCStateShift) | (kStateFat << kStateShift)) {
 #ifndef __LP64__
   DCHECK_ALIGNED(mon, kMonitorIdAlignment);
 #endif
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index 5d0d204..538b6eb 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -35,27 +35,27 @@
  * the state. The four possible states are fat locked, thin/unlocked, hash code, and forwarding
  * address. When the lock word is in the "thin" state and its bits are formatted as follows:
  *
- *  |33|22|222222221111|1111110000000000|
- *  |10|98|765432109876|5432109876543210|
- *  |00|rb| lock count |thread id owner |
+ *  |33|2|2|222222221111|1111110000000000|
+ *  |10|9|8|765432109876|5432109876543210|
+ *  |00|m|r| lock count |thread id owner |
  *
  * When the lock word is in the "fat" state and its bits are formatted as follows:
  *
- *  |33|22|2222222211111111110000000000|
- *  |10|98|7654321098765432109876543210|
- *  |01|rb| MonitorId                  |
+ *  |33|2|2|2222222211111111110000000000|
+ *  |10|9|8|7654321098765432109876543210|
+ *  |01|m|r| MonitorId                  |
  *
  * When the lock word is in hash state and its bits are formatted as follows:
  *
- *  |33|22|2222222211111111110000000000|
- *  |10|98|7654321098765432109876543210|
- *  |10|rb| HashCode                   |
+ *  |33|2|2|2222222211111111110000000000|
+ *  |10|9|8|7654321098765432109876543210|
+ *  |10|m|r| HashCode                   |
  *
- * When the lock word is in fowarding address state and its bits are formatted as follows:
+ * When the lock word is in forwarding address state and its bits are formatted as follows:
  *
- *  |33|22|2222222211111111110000000000|
- *  |10|98|7654321098765432109876543210|
- *  |11| ForwardingAddress             |
+ *  |33|2|22222222211111111110000000000|
+ *  |10|9|87654321098765432109876543210|
+ *  |11|0| ForwardingAddress           |
  *
  * The rb bits store the read barrier state.
  */
@@ -64,11 +64,13 @@
   enum SizeShiftsAndMasks {  // private marker to avoid generate-operator-out.py from processing.
     // Number of bits to encode the state, currently just fat or thin/unlocked or hash code.
     kStateSize = 2,
-    kReadBarrierStateSize = 2,
+    kReadBarrierStateSize = 1,
+    kMarkBitStateSize = 1,
     // Number of bits to encode the thin lock owner.
     kThinLockOwnerSize = 16,
     // Remaining bits are the recursive lock count.
-    kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize - kReadBarrierStateSize,
+    kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize - kReadBarrierStateSize -
+        kMarkBitStateSize,
     // Thin lock bits. Owner in lowest bits.
 
     kThinLockOwnerShift = 0,
@@ -81,25 +83,43 @@
     kThinLockCountOne = 1 << kThinLockCountShift,  // == 65536 (0x10000)
 
     // State in the highest bits.
-    kStateShift = kReadBarrierStateSize + kThinLockCountSize + kThinLockCountShift,
+    kStateShift = kReadBarrierStateSize + kThinLockCountSize + kThinLockCountShift +
+        kMarkBitStateSize,
     kStateMask = (1 << kStateSize) - 1,
     kStateMaskShifted = kStateMask << kStateShift,
     kStateThinOrUnlocked = 0,
     kStateFat = 1,
     kStateHash = 2,
     kStateForwardingAddress = 3,
+
+    // Read barrier bit.
     kReadBarrierStateShift = kThinLockCountSize + kThinLockCountShift,
     kReadBarrierStateMask = (1 << kReadBarrierStateSize) - 1,
     kReadBarrierStateMaskShifted = kReadBarrierStateMask << kReadBarrierStateShift,
     kReadBarrierStateMaskShiftedToggled = ~kReadBarrierStateMaskShifted,
 
+    // Mark bit.
+    kMarkBitStateShift = kReadBarrierStateSize + kReadBarrierStateShift,
+    kMarkBitStateMask = (1 << kMarkBitStateSize) - 1,
+    kMarkBitStateMaskShifted = kMarkBitStateMask << kMarkBitStateShift,
+    kMarkBitStateMaskShiftedToggled = ~kMarkBitStateMaskShifted,
+
+    // GC state is mark bit and read barrier state.
+    kGCStateSize = kReadBarrierStateSize + kMarkBitStateSize,
+    kGCStateShift = kReadBarrierStateShift,
+    kGCStateMaskShifted = kReadBarrierStateMaskShifted | kMarkBitStateMaskShifted,
+    kGCStateMaskShiftedToggled = ~kGCStateMaskShifted,
+
     // When the state is kHashCode, the non-state bits hold the hashcode.
     // Note Object.hashCode() has the hash code layout hardcoded.
     kHashShift = 0,
-    kHashSize = 32 - kStateSize - kReadBarrierStateSize,
+    kHashSize = 32 - kStateSize - kReadBarrierStateSize - kMarkBitStateSize,
     kHashMask = (1 << kHashSize) - 1,
     kMaxHash = kHashMask,
 
+    // Forwarding address shift.
+    kForwardingAddressShift = kObjectAlignmentShift,
+
     kMonitorIdShift = kHashShift,
     kMonitorIdSize = kHashSize,
     kMonitorIdMask = kHashMask,
@@ -108,31 +128,31 @@
     kMaxMonitorId = kMaxHash
   };
 
-  static LockWord FromThinLockId(uint32_t thread_id, uint32_t count, uint32_t rb_state) {
+  static LockWord FromThinLockId(uint32_t thread_id, uint32_t count, uint32_t gc_state) {
     CHECK_LE(thread_id, static_cast<uint32_t>(kThinLockMaxOwner));
     CHECK_LE(count, static_cast<uint32_t>(kThinLockMaxCount));
-    DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
-    return LockWord((thread_id << kThinLockOwnerShift) | (count << kThinLockCountShift) |
-                    (rb_state << kReadBarrierStateShift) |
+    // DCHECK_EQ(gc_bits & kGCStateMaskToggled, 0U);
+    return LockWord((thread_id << kThinLockOwnerShift) |
+                    (count << kThinLockCountShift) |
+                    (gc_state << kGCStateShift) |
                     (kStateThinOrUnlocked << kStateShift));
   }
 
   static LockWord FromForwardingAddress(size_t target) {
     DCHECK_ALIGNED(target, (1 << kStateSize));
-    return LockWord((target >> kStateSize) | (kStateForwardingAddress << kStateShift));
+    return LockWord((target >> kForwardingAddressShift) | (kStateForwardingAddress << kStateShift));
   }
 
-  static LockWord FromHashCode(uint32_t hash_code, uint32_t rb_state) {
+  static LockWord FromHashCode(uint32_t hash_code, uint32_t gc_state) {
     CHECK_LE(hash_code, static_cast<uint32_t>(kMaxHash));
-    DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
+    // DCHECK_EQ(gc_bits & kGCStateMaskToggled, 0U);
     return LockWord((hash_code << kHashShift) |
-                    (rb_state << kReadBarrierStateShift) |
+                    (gc_state << kGCStateShift) |
                     (kStateHash << kStateShift));
   }
 
-  static LockWord FromDefault(uint32_t rb_state) {
-    DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
-    return LockWord(rb_state << kReadBarrierStateShift);
+  static LockWord FromDefault(uint32_t gc_state) {
+    return LockWord(gc_state << kGCStateShift);
   }
 
   static bool IsDefault(LockWord lw) {
@@ -154,7 +174,7 @@
   LockState GetState() const {
     CheckReadBarrierState();
     if ((!kUseReadBarrier && UNLIKELY(value_ == 0)) ||
-        (kUseReadBarrier && UNLIKELY((value_ & kReadBarrierStateMaskShiftedToggled) == 0))) {
+        (kUseReadBarrier && UNLIKELY((value_ & kGCStateMaskShiftedToggled) == 0))) {
       return kUnlocked;
     } else {
       uint32_t internal_state = (value_ >> kStateShift) & kStateMask;
@@ -176,6 +196,10 @@
     return (value_ >> kReadBarrierStateShift) & kReadBarrierStateMask;
   }
 
+  uint32_t GCState() const {
+    return (value_ & kGCStateMaskShifted) >> kGCStateShift;
+  }
+
   void SetReadBarrierState(uint32_t rb_state) {
     DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
     DCHECK_NE(static_cast<uint32_t>(GetState()), static_cast<uint32_t>(kForwardingAddress));
@@ -184,6 +208,19 @@
     value_ |= (rb_state & kReadBarrierStateMask) << kReadBarrierStateShift;
   }
 
+
+  uint32_t MarkBitState() const {
+    return (value_ >> kMarkBitStateShift) & kMarkBitStateMask;
+  }
+
+  void SetMarkBitState(uint32_t mark_bit) {
+    DCHECK_EQ(mark_bit & ~kMarkBitStateMask, 0U);
+    DCHECK_NE(static_cast<uint32_t>(GetState()), static_cast<uint32_t>(kForwardingAddress));
+    // Clear and or the bits.
+    value_ &= kMarkBitStateMaskShiftedToggled;
+    value_ |= mark_bit << kMarkBitStateShift;
+  }
+
   // Return the owner thin lock thread id.
   uint32_t ThinLockOwner() const;
 
@@ -197,7 +234,7 @@
   size_t ForwardingAddress() const;
 
   // Constructor a lock word for inflation to use a Monitor.
-  LockWord(Monitor* mon, uint32_t rb_state);
+  LockWord(Monitor* mon, uint32_t gc_state);
 
   // Return the hash code stored in the lock word, must be kHashCode state.
   int32_t GetHashCode() const;
@@ -207,7 +244,7 @@
     if (kIncludeReadBarrierState) {
       return lw1.GetValue() == lw2.GetValue();
     }
-    return lw1.GetValueWithoutReadBarrierState() == lw2.GetValueWithoutReadBarrierState();
+    return lw1.GetValueWithoutGCState() == lw2.GetValueWithoutGCState();
   }
 
   void Dump(std::ostream& os) {
@@ -248,9 +285,9 @@
     return value_;
   }
 
-  uint32_t GetValueWithoutReadBarrierState() const {
+  uint32_t GetValueWithoutGCState() const {
     CheckReadBarrierState();
-    return value_ & ~(kReadBarrierStateMask << kReadBarrierStateShift);
+    return value_ & kGCStateMaskShiftedToggled;
   }
 
   // Only Object should be converting LockWords to/from uints.
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 8f5419c..8ad47eb 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -636,8 +636,9 @@
   static_assert(sizeof(Primitive::Type) == sizeof(int32_t),
                 "art::Primitive::Type and int32_t have different sizes.");
   int32_t v32 = GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_));
-  Primitive::Type type = static_cast<Primitive::Type>(v32 & 0xFFFF);
-  DCHECK_EQ(static_cast<size_t>(v32 >> 16), Primitive::ComponentSizeShift(type));
+  Primitive::Type type = static_cast<Primitive::Type>(v32 & kPrimitiveTypeMask);
+  DCHECK_EQ(static_cast<size_t>(v32 >> kPrimitiveTypeSizeShiftShift),
+            Primitive::ComponentSizeShift(type));
   return type;
 }
 
@@ -646,8 +647,9 @@
   static_assert(sizeof(Primitive::Type) == sizeof(int32_t),
                 "art::Primitive::Type and int32_t have different sizes.");
   int32_t v32 = GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_));
-  size_t size_shift = static_cast<Primitive::Type>(v32 >> 16);
-  DCHECK_EQ(size_shift, Primitive::ComponentSizeShift(static_cast<Primitive::Type>(v32 & 0xFFFF)));
+  size_t size_shift = static_cast<Primitive::Type>(v32 >> kPrimitiveTypeSizeShiftShift);
+  DCHECK_EQ(size_shift,
+            Primitive::ComponentSizeShift(static_cast<Primitive::Type>(v32 & kPrimitiveTypeMask)));
   return size_shift;
 }
 
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 5c490de..8f6ce44 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -64,6 +64,12 @@
   // 2 ref instance fields.]
   static constexpr uint32_t kClassWalkSuper = 0xC0000000;
 
+  // Shift primitive type by kPrimitiveTypeSizeShiftShift to get the component type size shift
+  // Used for computing array size as follows:
+  // array_bytes = header_size + (elements << (primitive_type >> kPrimitiveTypeSizeShiftShift))
+  static constexpr uint32_t kPrimitiveTypeSizeShiftShift = 16;
+  static constexpr uint32_t kPrimitiveTypeMask = (1u << kPrimitiveTypeSizeShiftShift) - 1;
+
   // Class Status
   //
   // kStatusRetired: Class that's temporarily used till class linking time
@@ -371,10 +377,10 @@
 
   void SetPrimitiveType(Primitive::Type new_type) SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK_EQ(sizeof(Primitive::Type), sizeof(int32_t));
-    int32_t v32 = static_cast<int32_t>(new_type);
-    DCHECK_EQ(v32 & 0xFFFF, v32) << "upper 16 bits aren't zero";
+    uint32_t v32 = static_cast<uint32_t>(new_type);
+    DCHECK_EQ(v32 & kPrimitiveTypeMask, v32) << "upper 16 bits aren't zero";
     // Store the component size shift in the upper 16 bits.
-    v32 |= Primitive::ComponentSizeShift(new_type) << 16;
+    v32 |= Primitive::ComponentSizeShift(new_type) << kPrimitiveTypeSizeShiftShift;
     SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_), v32);
   }
 
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 0592c6c..0495c95 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -147,10 +147,20 @@
 #endif
 }
 
+inline uint32_t Object::GetMarkBit() {
+#ifdef USE_READ_BARRIER
+  return GetLockWord(false).MarkBitState();
+#else
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+#endif
+}
+
 inline void Object::SetReadBarrierPointer(Object* rb_ptr) {
 #ifdef USE_BAKER_READ_BARRIER
   DCHECK(kUseBakerReadBarrier);
   DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
+  DCHECK_NE(rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
   LockWord lw = GetLockWord(false);
   lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
   SetLockWord(lw, false);
@@ -173,6 +183,8 @@
   DCHECK(kUseBakerReadBarrier);
   DCHECK_EQ(reinterpret_cast<uint64_t>(expected_rb_ptr) >> 32, 0U);
   DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
+  DCHECK_NE(expected_rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
+  DCHECK_NE(rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
   LockWord expected_lw;
   LockWord new_lw;
   do {
@@ -216,6 +228,24 @@
 #endif
 }
 
+inline bool Object::AtomicSetMarkBit(uint32_t expected_mark_bit, uint32_t mark_bit) {
+  LockWord expected_lw;
+  LockWord new_lw;
+  do {
+    LockWord lw = GetLockWord(false);
+    if (UNLIKELY(lw.MarkBitState() != expected_mark_bit)) {
+      // Lost the race.
+      return false;
+    }
+    expected_lw = lw;
+    new_lw = lw;
+    new_lw.SetMarkBitState(mark_bit);
+    // Since this is only set from the mutator, we can use the non release Cas.
+  } while (!CasLockWordWeakRelaxed(expected_lw, new_lw));
+  return true;
+}
+
+
 inline void Object::AssertReadBarrierPointer() const {
   if (kUseBakerReadBarrier) {
     Object* obj = const_cast<Object*>(this);
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 701c600..13c536e 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -163,8 +163,7 @@
       case LockWord::kUnlocked: {
         // Try to compare and swap in a new hash, if we succeed we will return the hash on the next
         // loop iteration.
-        LockWord hash_word = LockWord::FromHashCode(GenerateIdentityHashCode(),
-                                                    lw.ReadBarrierState());
+        LockWord hash_word = LockWord::FromHashCode(GenerateIdentityHashCode(), lw.GCState());
         DCHECK_EQ(hash_word.GetState(), LockWord::kHashCode);
         if (const_cast<Object*>(this)->CasLockWordWeakRelaxed(lw, hash_word)) {
           return hash_word.GetHashCode();
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index a4bdbad..5b129bf 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -93,6 +93,7 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   void SetClass(Class* new_klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // TODO: Clean this up and change to return int32_t
   Object* GetReadBarrierPointer() SHARED_REQUIRES(Locks::mutator_lock_);
 
 #ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
@@ -103,6 +104,12 @@
   template<bool kCasRelease = false>
   ALWAYS_INLINE bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr)
       SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE uint32_t GetMarkBit() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE bool AtomicSetMarkBit(uint32_t expected_mark_bit, uint32_t mark_bit)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   void AssertReadBarrierPointer() const SHARED_REQUIRES(Locks::mutator_lock_);
 
   // The verifier treats all interfaces as java.lang.Object and relies on runtime checks in
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index bf9f931..e863ea9 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -155,7 +155,7 @@
       return false;
     }
   }
-  LockWord fat(this, lw.ReadBarrierState());
+  LockWord fat(this, lw.GCState());
   // Publish the updated lock word, which may race with other threads.
   bool success = GetObject()->CasLockWordWeakSequentiallyConsistent(lw, fat);
   // Lock profiling.
@@ -774,20 +774,21 @@
         return false;
       }
       // Deflate to a thin lock.
-      LockWord new_lw = LockWord::FromThinLockId(owner->GetThreadId(), monitor->lock_count_,
-                                                 lw.ReadBarrierState());
+      LockWord new_lw = LockWord::FromThinLockId(owner->GetThreadId(),
+                                                 monitor->lock_count_,
+                                                 lw.GCState());
       // Assume no concurrent read barrier state changes as mutators are suspended.
       obj->SetLockWord(new_lw, false);
       VLOG(monitor) << "Deflated " << obj << " to thin lock " << owner->GetTid() << " / "
           << monitor->lock_count_;
     } else if (monitor->HasHashCode()) {
-      LockWord new_lw = LockWord::FromHashCode(monitor->GetHashCode(), lw.ReadBarrierState());
+      LockWord new_lw = LockWord::FromHashCode(monitor->GetHashCode(), lw.GCState());
       // Assume no concurrent read barrier state changes as mutators are suspended.
       obj->SetLockWord(new_lw, false);
       VLOG(monitor) << "Deflated " << obj << " to hash monitor " << monitor->GetHashCode();
     } else {
       // No lock and no hash, just put an empty lock word inside the object.
-      LockWord new_lw = LockWord::FromDefault(lw.ReadBarrierState());
+      LockWord new_lw = LockWord::FromDefault(lw.GCState());
       // Assume no concurrent read barrier state changes as mutators are suspended.
       obj->SetLockWord(new_lw, false);
       VLOG(monitor) << "Deflated" << obj << " to empty lock word";
@@ -876,7 +877,7 @@
     LockWord lock_word = h_obj->GetLockWord(true);
     switch (lock_word.GetState()) {
       case LockWord::kUnlocked: {
-        LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0, lock_word.ReadBarrierState()));
+        LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0, lock_word.GCState()));
         if (h_obj->CasLockWordWeakSequentiallyConsistent(lock_word, thin_locked)) {
           AtraceMonitorLock(self, h_obj.Get(), false /* is_wait */);
           // CasLockWord enforces more than the acquire ordering we need here.
@@ -890,8 +891,9 @@
           // We own the lock, increase the recursion count.
           uint32_t new_count = lock_word.ThinLockCount() + 1;
           if (LIKELY(new_count <= LockWord::kThinLockMaxCount)) {
-            LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count,
-                                                          lock_word.ReadBarrierState()));
+            LockWord thin_locked(LockWord::FromThinLockId(thread_id,
+                                                          new_count,
+                                                          lock_word.GCState()));
             if (!kUseReadBarrier) {
               h_obj->SetLockWord(thin_locked, true);
               AtraceMonitorLock(self, h_obj.Get(), false /* is_wait */);
@@ -975,9 +977,9 @@
           LockWord new_lw = LockWord::Default();
           if (lock_word.ThinLockCount() != 0) {
             uint32_t new_count = lock_word.ThinLockCount() - 1;
-            new_lw = LockWord::FromThinLockId(thread_id, new_count, lock_word.ReadBarrierState());
+            new_lw = LockWord::FromThinLockId(thread_id, new_count, lock_word.GCState());
           } else {
-            new_lw = LockWord::FromDefault(lock_word.ReadBarrierState());
+            new_lw = LockWord::FromDefault(lock_word.GCState());
           }
           if (!kUseReadBarrier) {
             DCHECK_EQ(new_lw.ReadBarrierState(), 0U);
diff --git a/runtime/native_stack_dump.cc b/runtime/native_stack_dump.cc
new file mode 100644
index 0000000..c20c8b8
--- /dev/null
+++ b/runtime/native_stack_dump.cc
@@ -0,0 +1,422 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "native_stack_dump.h"
+
+#include <ostream>
+
+#include <stdio.h>
+
+#include "art_method.h"
+
+// For DumpNativeStack.
+#include <backtrace/Backtrace.h>
+#include <backtrace/BacktraceMap.h>
+
+#if defined(__linux__)
+
+#include <memory>
+#include <vector>
+
+#include <linux/unistd.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include "arch/instruction_set.h"
+#include "base/memory_tool.h"
+#include "base/mutex.h"
+#include "base/stringprintf.h"
+#include "base/unix_file/fd_file.h"
+#include "oat_quick_method_header.h"
+#include "os.h"
+#include "thread-inl.h"
+#include "utils.h"
+
+#endif
+
+namespace art {
+
+#if defined(__linux__)
+
+static constexpr bool kUseAddr2line = !kIsTargetBuild;
+
+ALWAYS_INLINE
+static inline void WritePrefix(std::ostream& os, const char* prefix, bool odd) {
+  if (prefix != nullptr) {
+    os << prefix;
+  }
+  os << "  ";
+  if (!odd) {
+    os << " ";
+  }
+}
+
+// The state of an open pipe to addr2line. In "server" mode, addr2line takes input on stdin
+// and prints the result to stdout. This struct keeps the state of the open connection.
+struct Addr2linePipe {
+  Addr2linePipe(int in_fd, int out_fd, const std::string& file_name, pid_t pid)
+      : in(in_fd, false), out(out_fd, false), file(file_name), child_pid(pid), odd(true) {}
+
+  ~Addr2linePipe() {
+    kill(child_pid, SIGKILL);
+  }
+
+  File in;      // The file descriptor that is connected to the output of addr2line.
+  File out;     // The file descriptor that is connected to the input of addr2line.
+
+  const std::string file;     // The file addr2line is working on, so that we know when to close
+                              // and restart.
+  const pid_t child_pid;      // The pid of the child, which we should kill when we're done.
+  bool odd;                   // Print state for indentation of lines.
+};
+
+static std::unique_ptr<Addr2linePipe> Connect(const std::string& name, const char* args[]) {
+  int caller_to_addr2line[2];
+  int addr2line_to_caller[2];
+
+  if (pipe(caller_to_addr2line) == -1) {
+    return nullptr;
+  }
+  if (pipe(addr2line_to_caller) == -1) {
+    close(caller_to_addr2line[0]);
+    close(caller_to_addr2line[1]);
+    return nullptr;
+  }
+
+  pid_t pid = fork();
+  if (pid == -1) {
+    close(caller_to_addr2line[0]);
+    close(caller_to_addr2line[1]);
+    close(addr2line_to_caller[1]);
+    close(addr2line_to_caller[1]);
+    return nullptr;
+  }
+
+  if (pid == 0) {
+    dup2(caller_to_addr2line[0], STDIN_FILENO);
+    dup2(addr2line_to_caller[1], STDOUT_FILENO);
+
+    close(caller_to_addr2line[0]);
+    close(caller_to_addr2line[1]);
+    close(addr2line_to_caller[0]);
+    close(addr2line_to_caller[1]);
+
+    execv(args[0], const_cast<char* const*>(args));
+    exit(1);
+  } else {
+    close(caller_to_addr2line[0]);
+    close(addr2line_to_caller[1]);
+    return std::unique_ptr<Addr2linePipe>(new Addr2linePipe(addr2line_to_caller[0],
+                                                            caller_to_addr2line[1],
+                                                            name,
+                                                            pid));
+  }
+}
+
+static void Drain(size_t expected,
+                  const char* prefix,
+                  std::unique_ptr<Addr2linePipe>* pipe /* inout */,
+                  std::ostream& os) {
+  DCHECK(pipe != nullptr);
+  DCHECK(pipe->get() != nullptr);
+  int in = pipe->get()->in.Fd();
+  DCHECK_GE(in, 0);
+
+  bool prefix_written = false;
+
+  for (;;) {
+    constexpr uint32_t kWaitTimeExpectedMicros = 500 * 1000;
+    constexpr uint32_t kWaitTimeUnexpectedMicros = 50 * 1000;
+
+    struct timeval tv;
+    tv.tv_sec = 0;
+    tv.tv_usec = expected > 0 ? kWaitTimeExpectedMicros : kWaitTimeUnexpectedMicros;
+
+    fd_set rfds;
+    FD_ZERO(&rfds);
+    FD_SET(in, &rfds);
+
+    int retval = TEMP_FAILURE_RETRY(select(in + 1, &rfds, nullptr, nullptr, &tv));
+
+    if (retval < 0) {
+      // Other side may have crashed or other errors.
+      pipe->reset();
+      return;
+    }
+
+    if (retval == 0) {
+      // Timeout.
+      return;
+    }
+
+    DCHECK_EQ(retval, 1);
+
+    constexpr size_t kMaxBuffer = 128;  // Relatively small buffer. Should be OK as we're on an
+    // alt stack, but just to be sure...
+    char buffer[kMaxBuffer];
+    memset(buffer, 0, kMaxBuffer);
+    int bytes_read = TEMP_FAILURE_RETRY(read(in, buffer, kMaxBuffer - 1));
+
+    if (bytes_read < 0) {
+      // This should not really happen...
+      pipe->reset();
+      return;
+    }
+
+    char* tmp = buffer;
+    while (*tmp != 0) {
+      if (!prefix_written) {
+        WritePrefix(os, prefix, (*pipe)->odd);
+        prefix_written = true;
+      }
+      char* new_line = strchr(tmp, '\n');
+      if (new_line == nullptr) {
+        os << tmp;
+
+        break;
+      } else {
+        char saved = *(new_line + 1);
+        *(new_line + 1) = 0;
+        os << tmp;
+        *(new_line + 1) = saved;
+
+        tmp = new_line + 1;
+        prefix_written = false;
+        (*pipe)->odd = !(*pipe)->odd;
+
+        if (expected > 0) {
+          expected--;
+        }
+      }
+    }
+  }
+}
+
+static void Addr2line(const std::string& map_src,
+                      uintptr_t offset,
+                      std::ostream& os,
+                      const char* prefix,
+                      std::unique_ptr<Addr2linePipe>* pipe /* inout */) {
+  DCHECK(pipe != nullptr);
+
+  if (map_src == "[vdso]") {
+    // Special-case this, our setup has problems with this.
+    return;
+  }
+
+  if (*pipe == nullptr || (*pipe)->file != map_src) {
+    if (*pipe != nullptr) {
+      Drain(0, prefix, pipe, os);
+    }
+    pipe->reset();  // Close early.
+
+    const char* args[7] = {
+        "/usr/bin/addr2line",
+        "--functions",
+        "--inlines",
+        "--demangle",
+        "-e",
+        map_src.c_str(),
+        nullptr
+    };
+    *pipe = Connect(map_src, args);
+  }
+
+  Addr2linePipe* pipe_ptr = pipe->get();
+  if (pipe_ptr == nullptr) {
+    // Failed...
+    return;
+  }
+
+  // Send the offset.
+  const std::string hex_offset = StringPrintf("%zx\n", offset);
+
+  if (!pipe_ptr->out.WriteFully(hex_offset.data(), hex_offset.length())) {
+    // Error. :-(
+    pipe->reset();
+    return;
+  }
+
+  // Now drain (expecting two lines).
+  Drain(2U, prefix, pipe, os);
+}
+
+static bool RunCommand(std::string cmd) {
+  FILE* stream = popen(cmd.c_str(), "r");
+  if (stream) {
+    pclose(stream);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+static bool PcIsWithinQuickCode(ArtMethod* method, uintptr_t pc) NO_THREAD_SAFETY_ANALYSIS {
+  uintptr_t code = reinterpret_cast<uintptr_t>(EntryPointToCodePointer(
+      method->GetEntryPointFromQuickCompiledCode()));
+  if (code == 0) {
+    return pc == 0;
+  }
+  uintptr_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
+  return code <= pc && pc <= (code + code_size);
+}
+
+void DumpNativeStack(std::ostream& os,
+                     pid_t tid,
+                     BacktraceMap* existing_map,
+                     const char* prefix,
+                     ArtMethod* current_method,
+                     void* ucontext_ptr) {
+  // b/18119146
+  if (RUNNING_ON_MEMORY_TOOL != 0) {
+    return;
+  }
+
+  BacktraceMap* map = existing_map;
+  std::unique_ptr<BacktraceMap> tmp_map;
+  if (map == nullptr) {
+    tmp_map.reset(BacktraceMap::Create(getpid()));
+    map = tmp_map.get();
+  }
+  std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid, map));
+  if (!backtrace->Unwind(0, reinterpret_cast<ucontext*>(ucontext_ptr))) {
+    os << prefix << "(backtrace::Unwind failed for thread " << tid
+       << ": " <<  backtrace->GetErrorString(backtrace->GetError()) << ")\n";
+    return;
+  } else if (backtrace->NumFrames() == 0) {
+    os << prefix << "(no native stack frames for thread " << tid << ")\n";
+    return;
+  }
+
+  // Check whether we have and should use addr2line.
+  bool use_addr2line;
+  if (kUseAddr2line) {
+    // Try to run it to see whether we have it. Push an argument so that it doesn't assume a.out
+    // and print to stderr.
+    use_addr2line = (gAborting > 0) && RunCommand("addr2line -h");
+  } else {
+    use_addr2line = false;
+  }
+
+  std::unique_ptr<Addr2linePipe> addr2line_state;
+
+  for (Backtrace::const_iterator it = backtrace->begin();
+       it != backtrace->end(); ++it) {
+    // We produce output like this:
+    // ]    #00 pc 000075bb8  /system/lib/libc.so (unwind_backtrace_thread+536)
+    // In order for parsing tools to continue to function, the stack dump
+    // format must at least adhere to this format:
+    //  #XX pc <RELATIVE_ADDR>  <FULL_PATH_TO_SHARED_LIBRARY> ...
+    // The parsers require a single space before and after pc, and two spaces
+    // after the <RELATIVE_ADDR>. There can be any prefix data before the
+    // #XX. <RELATIVE_ADDR> has to be a hex number but with no 0x prefix.
+    os << prefix << StringPrintf("#%02zu pc ", it->num);
+    bool try_addr2line = false;
+    if (!BacktraceMap::IsValid(it->map)) {
+      os << StringPrintf(Is64BitInstructionSet(kRuntimeISA) ? "%016" PRIxPTR "  ???"
+                                                            : "%08" PRIxPTR "  ???",
+                         it->pc);
+    } else {
+      os << StringPrintf(Is64BitInstructionSet(kRuntimeISA) ? "%016" PRIxPTR "  "
+                                                            : "%08" PRIxPTR "  ",
+                         BacktraceMap::GetRelativePc(it->map, it->pc));
+      os << it->map.name;
+      os << " (";
+      if (!it->func_name.empty()) {
+        os << it->func_name;
+        if (it->func_offset != 0) {
+          os << "+" << it->func_offset;
+        }
+        try_addr2line = true;
+      } else if (current_method != nullptr &&
+          Locks::mutator_lock_->IsSharedHeld(Thread::Current()) &&
+          PcIsWithinQuickCode(current_method, it->pc)) {
+        const void* start_of_code = current_method->GetEntryPointFromQuickCompiledCode();
+        os << JniLongName(current_method) << "+"
+           << (it->pc - reinterpret_cast<uintptr_t>(start_of_code));
+      } else {
+        os << "???";
+      }
+      os << ")";
+    }
+    os << "\n";
+    if (try_addr2line && use_addr2line) {
+      Addr2line(it->map.name, it->pc - it->map.start, os, prefix, &addr2line_state);
+    }
+  }
+
+  if (addr2line_state != nullptr) {
+    Drain(0, prefix, &addr2line_state, os);
+  }
+}
+
+void DumpKernelStack(std::ostream& os, pid_t tid, const char* prefix, bool include_count) {
+  if (tid == GetTid()) {
+    // There's no point showing that we're reading our stack out of /proc!
+    return;
+  }
+
+  std::string kernel_stack_filename(StringPrintf("/proc/self/task/%d/stack", tid));
+  std::string kernel_stack;
+  if (!ReadFileToString(kernel_stack_filename, &kernel_stack)) {
+    os << prefix << "(couldn't read " << kernel_stack_filename << ")\n";
+    return;
+  }
+
+  std::vector<std::string> kernel_stack_frames;
+  Split(kernel_stack, '\n', &kernel_stack_frames);
+  // We skip the last stack frame because it's always equivalent to "[<ffffffff>] 0xffffffff",
+  // which looking at the source appears to be the kernel's way of saying "that's all, folks!".
+  kernel_stack_frames.pop_back();
+  for (size_t i = 0; i < kernel_stack_frames.size(); ++i) {
+    // Turn "[<ffffffff8109156d>] futex_wait_queue_me+0xcd/0x110"
+    // into "futex_wait_queue_me+0xcd/0x110".
+    const char* text = kernel_stack_frames[i].c_str();
+    const char* close_bracket = strchr(text, ']');
+    if (close_bracket != nullptr) {
+      text = close_bracket + 2;
+    }
+    os << prefix;
+    if (include_count) {
+      os << StringPrintf("#%02zd ", i);
+    }
+    os << text << "\n";
+  }
+}
+
+#elif defined(__APPLE__)
+
+void DumpNativeStack(std::ostream& os ATTRIBUTE_UNUSED,
+                     pid_t tid ATTRIBUTE_UNUSED,
+                     BacktraceMap* existing_map ATTRIBUTE_UNUSED,
+                     const char* prefix ATTRIBUTE_UNUSED,
+                     ArtMethod* current_method ATTRIBUTE_UNUSED,
+                     void* ucontext_ptr ATTRIBUTE_UNUSED) {
+}
+
+void DumpKernelStack(std::ostream& os ATTRIBUTE_UNUSED,
+                     pid_t tid ATTRIBUTE_UNUSED,
+                     const char* prefix ATTRIBUTE_UNUSED,
+                     bool include_count ATTRIBUTE_UNUSED) {
+}
+
+#else
+#error "Unsupported architecture for native stack dumps."
+#endif
+
+}  // namespace art
diff --git a/runtime/native_stack_dump.h b/runtime/native_stack_dump.h
new file mode 100644
index 0000000..d64bc82
--- /dev/null
+++ b/runtime/native_stack_dump.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_NATIVE_STACK_DUMP_H_
+#define ART_RUNTIME_NATIVE_STACK_DUMP_H_
+
+#include <unistd.h>
+
+#include <iosfwd>
+
+#include "base/macros.h"
+
+class BacktraceMap;
+
+namespace art {
+
+class ArtMethod;
+
+// Dumps the native stack for thread 'tid' to 'os'.
+void DumpNativeStack(std::ostream& os,
+                     pid_t tid,
+                     BacktraceMap* map = nullptr,
+                     const char* prefix = "",
+                     ArtMethod* current_method = nullptr,
+                     void* ucontext = nullptr)
+    NO_THREAD_SAFETY_ANALYSIS;
+
+// Dumps the kernel stack for thread 'tid' to 'os'. Note that this is only available on linux-x86.
+void DumpKernelStack(std::ostream& os,
+                     pid_t tid,
+                     const char* prefix = "",
+                     bool include_count = true);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_NATIVE_STACK_DUMP_H_
diff --git a/runtime/oat.h b/runtime/oat.h
index 2c5c3e6..7c84fe9 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '8', '5', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '8', '6', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 68610a7..5752fd9 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -1073,7 +1073,7 @@
 
 const OatFile::OatDexFile* OatFile::GetOatDexFile(const char* dex_location,
                                                   const uint32_t* dex_location_checksum,
-                                                  bool warn_if_not_found) const {
+                                                  std::string* error_msg) const {
   // NOTE: We assume here that the canonical location for a given dex_location never
   // changes. If it does (i.e. some symlink used by the filename changes) we may return
   // an incorrect OatDexFile. As long as we have a checksum to check, we shall return
@@ -1115,32 +1115,29 @@
       secondary_oat_dex_files_.PutBefore(secondary_lb, key_copy, oat_dex_file);
     }
   }
-  if (oat_dex_file != nullptr &&
-      (dex_location_checksum == nullptr ||
-       oat_dex_file->GetDexFileLocationChecksum() == *dex_location_checksum)) {
-    return oat_dex_file;
+
+  if (oat_dex_file == nullptr) {
+    if (error_msg != nullptr) {
+      std::string dex_canonical_location = DexFile::GetDexCanonicalLocation(dex_location);
+      *error_msg = "Failed to find OatDexFile for DexFile " + std::string(dex_location)
+          + " (canonical path " + dex_canonical_location + ") in OatFile " + GetLocation();
+    }
+    return nullptr;
   }
 
-  if (warn_if_not_found) {
-    std::string dex_canonical_location = DexFile::GetDexCanonicalLocation(dex_location);
-    std::string checksum("<unspecified>");
-    if (dex_location_checksum != nullptr) {
-      checksum = StringPrintf("0x%08x", *dex_location_checksum);
+  if (dex_location_checksum != nullptr &&
+      oat_dex_file->GetDexFileLocationChecksum() != *dex_location_checksum) {
+    if (error_msg != nullptr) {
+      std::string dex_canonical_location = DexFile::GetDexCanonicalLocation(dex_location);
+      std::string checksum = StringPrintf("0x%08x", oat_dex_file->GetDexFileLocationChecksum());
+      std::string required_checksum = StringPrintf("0x%08x", *dex_location_checksum);
+      *error_msg = "OatDexFile for DexFile " + std::string(dex_location)
+          + " (canonical path " + dex_canonical_location + ") in OatFile " + GetLocation()
+          + " has checksum " + checksum + " but " + required_checksum + " was required";
     }
-    LOG(WARNING) << "Failed to find OatDexFile for DexFile " << dex_location
-                 << " ( canonical path " << dex_canonical_location << ")"
-                 << " with checksum " << checksum << " in OatFile " << GetLocation();
-    if (kIsDebugBuild) {
-      for (const OatDexFile* odf : oat_dex_files_storage_) {
-        LOG(WARNING) << "OatFile " << GetLocation()
-                     << " contains OatDexFile " << odf->GetDexFileLocation()
-                     << " (canonical path " << odf->GetCanonicalDexFileLocation() << ")"
-                     << " with checksum 0x" << std::hex << odf->GetDexFileLocationChecksum();
-      }
-    }
+    return nullptr;
   }
-
-  return nullptr;
+  return oat_dex_file;
 }
 
 OatFile::OatDexFile::OatDexFile(const OatFile* oat_file,
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index aa727ff..f5ab9dc 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -213,9 +213,15 @@
 
     friend class art::OatDexFile;
   };
+
+  // Get the OatDexFile for the given dex_location within this oat file.
+  // If dex_location_checksum is non-null, the OatDexFile will only be
+  // returned if it has a matching checksum.
+  // If error_msg is non-null and no OatDexFile is returned, error_msg will
+  // be updated with a description of why no OatDexFile was returned.
   const OatDexFile* GetOatDexFile(const char* dex_location,
                                   const uint32_t* const dex_location_checksum,
-                                  bool exception_if_not_found = true) const
+                                  /*out*/std::string* error_msg = nullptr) const
       REQUIRES(!secondary_lookup_lock_);
 
   const std::vector<const OatDexFile*>& GetOatDexFiles() const {
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index fd58907..2c2a2b8 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -277,10 +277,9 @@
   // Load the primary dex file.
   std::string error_msg;
   const OatFile::OatDexFile* oat_dex_file = oat_file.GetOatDexFile(
-      dex_location, nullptr, false);
+      dex_location, nullptr, &error_msg);
   if (oat_dex_file == nullptr) {
-    LOG(WARNING) << "Attempt to load out-of-date oat file "
-      << oat_file.GetLocation() << " for dex location " << dex_location;
+    LOG(WARNING) << error_msg;
     return std::vector<std::unique_ptr<const DexFile>>();
   }
 
@@ -294,7 +293,7 @@
   // Load secondary multidex files
   for (size_t i = 1; ; i++) {
     std::string secondary_dex_location = DexFile::GetMultiDexLocation(i, dex_location);
-    oat_dex_file = oat_file.GetOatDexFile(secondary_dex_location.c_str(), nullptr, false);
+    oat_dex_file = oat_file.GetOatDexFile(secondary_dex_location.c_str(), nullptr);
     if (oat_dex_file == nullptr) {
       // There are no more secondary dex files to load.
       break;
@@ -389,25 +388,25 @@
   // Verify the dex checksum.
   // Note: GetOatDexFile will return null if the dex checksum doesn't match
   // what we provide, which verifies the primary dex checksum for us.
+  std::string error_msg;
   const uint32_t* dex_checksum_pointer = GetRequiredDexChecksum();
   const OatFile::OatDexFile* oat_dex_file = file.GetOatDexFile(
-      dex_location_.c_str(), dex_checksum_pointer, false);
+      dex_location_.c_str(), dex_checksum_pointer, &error_msg);
   if (oat_dex_file == nullptr) {
+    VLOG(oat) << error_msg;
     return kOatOutOfDate;
   }
 
   // Verify the dex checksums for any secondary multidex files
   for (size_t i = 1; ; i++) {
-    std::string secondary_dex_location
-      = DexFile::GetMultiDexLocation(i, dex_location_.c_str());
+    std::string secondary_dex_location = DexFile::GetMultiDexLocation(i, dex_location_.c_str());
     const OatFile::OatDexFile* secondary_oat_dex_file
-      = file.GetOatDexFile(secondary_dex_location.c_str(), nullptr, false);
+      = file.GetOatDexFile(secondary_dex_location.c_str(), nullptr);
     if (secondary_oat_dex_file == nullptr) {
       // There are no more secondary dex files to check.
       break;
     }
 
-    std::string error_msg;
     uint32_t expected_secondary_checksum = 0;
     if (DexFile::GetChecksum(secondary_dex_location.c_str(),
           &expected_secondary_checksum, &error_msg)) {
@@ -429,7 +428,6 @@
   }
 
   CompilerFilter::Filter current_compiler_filter = file.GetCompilerFilter();
-  VLOG(oat) << "Compiler filter for " << file.GetLocation() << " is " << current_compiler_filter;
 
   // Verify the image checksum
   if (CompilerFilter::DependsOnImageChecksum(current_compiler_filter)) {
@@ -760,8 +758,8 @@
       // Get the checksum from the odex if we can.
       const OatFile* odex_file = odex_.GetFile();
       if (odex_file != nullptr) {
-        const OatFile::OatDexFile* odex_dex_file = odex_file->GetOatDexFile(
-            dex_location_.c_str(), nullptr, false);
+        const OatFile::OatDexFile* odex_dex_file
+            = odex_file->GetOatDexFile(dex_location_.c_str(), nullptr);
         if (odex_dex_file != nullptr) {
           cached_required_dex_checksum_ = odex_dex_file->GetDexFileLocationChecksum();
           required_dex_checksum_found_ = true;
@@ -867,6 +865,8 @@
       status_ = kOatOutOfDate;
     } else {
       status_ = oat_file_assistant_->GivenOatFileStatus(*file);
+      VLOG(oat) << file->GetLocation() << " is " << status_
+          << " with filter " << file->GetCompilerFilter();
     }
   }
   return status_;
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 39848b4..05c5a22 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -320,6 +320,34 @@
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 }
 
+// Case: We have a DEX file and ODEX file for a different dex location.
+// Expect: The status is kDex2OatNeeded.
+TEST_F(OatFileAssistantTest, OatForDifferentDex) {
+  // Generate an odex file for OatForDifferentDex_A.jar
+  std::string dex_location_a = GetScratchDir() + "/OatForDifferentDex_A.jar";
+  std::string odex_location = GetOdexDir() + "/OatForDifferentDex.odex";
+  Copy(GetDexSrc1(), dex_location_a);
+  GenerateOdexForTest(dex_location_a, odex_location, CompilerFilter::kSpeed);
+
+  // Try to use that odex file for OatForDifferentDex.jar
+  std::string dex_location = GetScratchDir() + "/OatForDifferentDex.jar";
+  Copy(GetDexSrc1(), dex_location);
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+
+  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
+  EXPECT_TRUE(oat_file_assistant.OdexFileExists());
+  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
+  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
+  EXPECT_FALSE(oat_file_assistant.OatFileExists());
+  EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
+  EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
+  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
+}
+
 // Case: We have a DEX file and speed-profile OAT file for it.
 // Expect: The status is kNoDexOptNeeded if the profile hasn't changed, but
 // kDex2Oat if the profile has changed.
diff --git a/runtime/os_linux.cc b/runtime/os_linux.cc
index 1d1413b..1db09b4 100644
--- a/runtime/os_linux.cc
+++ b/runtime/os_linux.cc
@@ -53,7 +53,7 @@
 
 File* OS::OpenFileWithFlags(const char* name, int flags) {
   CHECK(name != nullptr);
-  bool read_only = (flags == O_RDONLY);
+  bool read_only = ((flags & O_ACCMODE) == O_RDONLY);
   std::unique_ptr<File> file(new File(name, flags, 0666, !read_only));
   if (!file->IsOpened()) {
     return nullptr;
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 595a47b..c7e4f8b 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -583,12 +583,6 @@
     args.Set(M::HeapGrowthLimit, args.GetOrDefault(M::MemoryMaximumSize));
   }
 
-  if (args.GetOrDefault(M::Experimental) & ExperimentalFlags::kLambdas) {
-    LOG(WARNING) << "Experimental lambdas have been enabled. All lambda opcodes have "
-                 << "an unstable specification and are nearly guaranteed to change over time. "
-                 << "Do not attempt to write shipping code against these opcodes.";
-  }
-
   *runtime_options = std::move(args);
   return true;
 }
@@ -709,8 +703,6 @@
   UsageMessage(stream, "  -X[no]image-dex2oat (Whether to create and use a boot image)\n");
   UsageMessage(stream, "  -Xno-dex-file-fallback "
                        "(Don't fall back to dex files without oat files)\n");
-  UsageMessage(stream, "  -Xexperimental:lambdas "
-                       "(Enable new and experimental dalvik opcodes and semantics)\n");
   UsageMessage(stream, "\n");
 
   UsageMessage(stream, "The following previously supported Dalvik options are ignored:\n");
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index 42e959c..5d32c09 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -99,8 +99,9 @@
   // Note: These couldn't be constexpr pointers as reinterpret_cast isn't compatible with them.
   static constexpr uintptr_t white_ptr_ = 0x0;    // Not marked.
   static constexpr uintptr_t gray_ptr_ = 0x1;     // Marked, but not marked through. On mark stack.
+  // TODO: black_ptr_ is unused, we should remove it.
   static constexpr uintptr_t black_ptr_ = 0x2;    // Marked through. Used for non-moving objects.
-  static constexpr uintptr_t rb_ptr_mask_ = 0x3;  // The low 2 bits for white|gray|black.
+  static constexpr uintptr_t rb_ptr_mask_ = 0x1;  // The low bits for white|gray.
 };
 
 }  // namespace art
diff --git a/runtime/runtime-inl.h b/runtime/runtime-inl.h
index bfa8c54..3245ba0 100644
--- a/runtime/runtime-inl.h
+++ b/runtime/runtime-inl.h
@@ -41,13 +41,15 @@
   DCHECK_NE(method, GetImtConflictMethod());
   DCHECK_NE(method, GetResolutionMethod());
   // Don't use GetCalleeSaveMethod(), some tests don't set all callee save methods.
-  if (method == GetCalleeSaveMethodUnchecked(Runtime::kRefsAndArgs)) {
-    return GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
-  } else if (method == GetCalleeSaveMethodUnchecked(Runtime::kSaveAll)) {
-    return GetCalleeSaveMethodFrameInfo(Runtime::kSaveAll);
+  if (method == GetCalleeSaveMethodUnchecked(Runtime::kSaveRefsAndArgs)) {
+    return GetCalleeSaveMethodFrameInfo(Runtime::kSaveRefsAndArgs);
+  } else if (method == GetCalleeSaveMethodUnchecked(Runtime::kSaveAllCalleeSaves)) {
+    return GetCalleeSaveMethodFrameInfo(Runtime::kSaveAllCalleeSaves);
+  } else if (method == GetCalleeSaveMethodUnchecked(Runtime::kSaveRefsOnly)) {
+    return GetCalleeSaveMethodFrameInfo(Runtime::kSaveRefsOnly);
   } else {
-    DCHECK_EQ(method, GetCalleeSaveMethodUnchecked(Runtime::kRefsOnly));
-    return GetCalleeSaveMethodFrameInfo(Runtime::kRefsOnly);
+    DCHECK_EQ(method, GetCalleeSaveMethodUnchecked(Runtime::kSaveEverything));
+    return GetCalleeSaveMethodFrameInfo(Runtime::kSaveEverything);
   }
 }
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 9f0ef7c..68fa0d3 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -78,7 +78,6 @@
 #include "jit/jit.h"
 #include "jni_internal.h"
 #include "linear_alloc.h"
-#include "lambda/box_table.h"
 #include "mirror/array.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
@@ -115,6 +114,7 @@
 #include "native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.h"
 #include "native/sun_misc_Unsafe.h"
 #include "native_bridge_art_interface.h"
+#include "native_stack_dump.h"
 #include "oat_file.h"
 #include "oat_file_manager.h"
 #include "os.h"
@@ -442,7 +442,6 @@
   GetMonitorList()->SweepMonitorList(visitor);
   GetJavaVM()->SweepJniWeakGlobals(visitor);
   GetHeap()->SweepAllocationRecords(visitor);
-  GetLambdaBoxTable()->SweepWeakBoxedLambdas(visitor);
 }
 
 bool Runtime::ParseOptions(const RuntimeOptions& raw_options,
@@ -1015,9 +1014,6 @@
     jit_options_->SetSaveProfilingInfo(false);
   }
 
-  // Allocate a global table of boxed lambda objects <-> closures.
-  lambda_box_table_ = MakeUnique<lambda::BoxTable>();
-
   // Use MemMap arena pool for jit, malloc otherwise. Malloc arenas are faster to allocate but
   // can't be trimmed as easily.
   const bool use_malloc = IsAotCompiler();
@@ -1638,7 +1634,6 @@
   intern_table_->ChangeWeakRootState(gc::kWeakRootStateNoReadsOrWrites);
   java_vm_->DisallowNewWeakGlobals();
   heap_->DisallowNewAllocationRecords();
-  lambda_box_table_->DisallowNewWeakBoxedLambdas();
 }
 
 void Runtime::AllowNewSystemWeaks() {
@@ -1647,7 +1642,6 @@
   intern_table_->ChangeWeakRootState(gc::kWeakRootStateNormal);  // TODO: Do this in the sweeping.
   java_vm_->AllowNewWeakGlobals();
   heap_->AllowNewAllocationRecords();
-  lambda_box_table_->AllowNewWeakBoxedLambdas();
 }
 
 void Runtime::BroadcastForNewSystemWeaks() {
@@ -1658,7 +1652,6 @@
   intern_table_->BroadcastForNewInterns();
   java_vm_->BroadcastForNewWeakGlobals();
   heap_->BroadcastForNewAllocationRecords();
-  lambda_box_table_->BroadcastForNewWeakBoxedLambdas();
 }
 
 void Runtime::SetInstructionSet(InstructionSet instruction_set) {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index afa8e48..c971646 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -54,10 +54,6 @@
   class JitOptions;
 }  // namespace jit
 
-namespace lambda {
-  class BoxTable;
-}  // namespace lambda
-
 namespace mirror {
   class ClassLoader;
   class Array;
@@ -387,10 +383,11 @@
 
   // Returns a special method that describes all callee saves being spilled to the stack.
   enum CalleeSaveType {
-    kSaveAll,
-    kRefsOnly,
-    kRefsAndArgs,
-    kLastCalleeSaveType  // Value used for iteration
+    kSaveAllCalleeSaves,  // All callee-save registers.
+    kSaveRefsOnly,        // Only those callee-save registers that can hold references.
+    kSaveRefsAndArgs,     // References (see above) and arguments (usually caller-save registers).
+    kSaveEverything,      // All registers, including both callee-save and caller-save.
+    kLastCalleeSaveType   // Value used for iteration
   };
 
   bool HasCalleeSaveMethod(CalleeSaveType type) const {
@@ -552,10 +549,6 @@
     return (experimental_flags_ & flags) != ExperimentalFlags::kNone;
   }
 
-  lambda::BoxTable* GetLambdaBoxTable() const {
-    return lambda_box_table_.get();
-  }
-
   // Create the JIT and instrumentation and code cache.
   void CreateJit();
 
@@ -739,8 +732,6 @@
   std::unique_ptr<jit::Jit> jit_;
   std::unique_ptr<jit::JitOptions> jit_options_;
 
-  std::unique_ptr<lambda::BoxTable> lambda_box_table_;
-
   // Fault message, printed when we get a SIGSEGV.
   Mutex fault_message_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::string fault_message_ GUARDED_BY(fault_message_lock_);
diff --git a/runtime/runtime_linux.cc b/runtime/runtime_linux.cc
index bc963c5..60ebabc 100644
--- a/runtime/runtime_linux.cc
+++ b/runtime/runtime_linux.cc
@@ -28,6 +28,7 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "base/stringprintf.h"
+#include "native_stack_dump.h"
 #include "thread-inl.h"
 #include "thread_list.h"
 #include "utils.h"
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 31206b5..b95dfad 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -117,7 +117,7 @@
 RUNTIME_OPTIONS_KEY (Unit,                NoDexFileFallback)
 RUNTIME_OPTIONS_KEY (std::string,         CpuAbiList)
 RUNTIME_OPTIONS_KEY (std::string,         Fingerprint)
-RUNTIME_OPTIONS_KEY (ExperimentalFlags,   Experimental,     ExperimentalFlags::kNone) // -Xexperimental:{none, lambdas}
+RUNTIME_OPTIONS_KEY (ExperimentalFlags,   Experimental,     ExperimentalFlags::kNone) // -Xexperimental:{none}
 
 // Not parse-able from command line, but can be provided explicitly.
 // (Do not add anything here that is defined in ParsedOptions::MakeParser)
diff --git a/runtime/stack.cc b/runtime/stack.cc
index dc5cada..ababf78 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -729,7 +729,7 @@
   Runtime* runtime = Runtime::Current();
 
   if (method->IsAbstract()) {
-    return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
+    return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kSaveRefsAndArgs);
   }
 
   // This goes before IsProxyMethod since runtime methods have a null declaring class.
@@ -743,7 +743,7 @@
     // compiled method without any stubs. Therefore the method must have a OatQuickMethodHeader.
     DCHECK(!method->IsDirect() && !method->IsConstructor())
         << "Constructors of proxy classes must have a OatQuickMethodHeader";
-    return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
+    return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kSaveRefsAndArgs);
   }
 
   // The only remaining case is if the method is native and uses the generic JNI stub.
@@ -755,7 +755,8 @@
   // Generic JNI frame.
   uint32_t handle_refs = GetNumberOfReferenceArgsWithoutReceiver(method) + 1;
   size_t scope_size = HandleScope::SizeOf(handle_refs);
-  QuickMethodFrameInfo callee_info = runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
+  QuickMethodFrameInfo callee_info =
+      runtime->GetCalleeSaveMethodFrameInfo(Runtime::kSaveRefsAndArgs);
 
   // Callee saves + handle scope + method ref + alignment
   // Note: -sizeof(void*) since callee-save frame stores a whole method pointer.
@@ -833,10 +834,12 @@
             const instrumentation::InstrumentationStackFrame& instrumentation_frame =
                 GetInstrumentationStackFrame(thread_, instrumentation_stack_depth);
             instrumentation_stack_depth++;
-            if (GetMethod() == Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveAll)) {
+            if (GetMethod() ==
+                Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveAllCalleeSaves)) {
               // Skip runtime save all callee frames which are used to deliver exceptions.
             } else if (instrumentation_frame.interpreter_entry_) {
-              ArtMethod* callee = Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs);
+              ArtMethod* callee =
+                  Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs);
               CHECK_EQ(GetMethod(), callee) << "Expected: " << PrettyMethod(callee) << " Found: "
                                             << PrettyMethod(GetMethod());
             } else {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 76f3161..3326736 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -55,6 +55,7 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/stack_trace_element.h"
 #include "monitor.h"
+#include "native_stack_dump.h"
 #include "nth_caller_visitor.h"
 #include "oat_quick_method_header.h"
 #include "object_lock.h"
@@ -1817,22 +1818,12 @@
   ScopedLocalRef<jthrowable> exception(tlsPtr_.jni_env, tlsPtr_.jni_env->ExceptionOccurred());
   tlsPtr_.jni_env->ExceptionClear();
 
-  // If the thread has its own handler, use that.
-  ScopedLocalRef<jobject> handler(tlsPtr_.jni_env,
-                                  tlsPtr_.jni_env->GetObjectField(peer.get(),
-                                      WellKnownClasses::java_lang_Thread_uncaughtHandler));
-  if (handler.get() == nullptr) {
-    // Otherwise use the thread group's default handler.
-    handler.reset(tlsPtr_.jni_env->GetObjectField(peer.get(),
-                                                  WellKnownClasses::java_lang_Thread_group));
-  }
+  // Call the Thread instance's dispatchUncaughtException(Throwable)
+  tlsPtr_.jni_env->CallVoidMethod(peer.get(),
+      WellKnownClasses::java_lang_Thread_dispatchUncaughtException,
+      exception.get());
 
-  // Call the handler.
-  tlsPtr_.jni_env->CallVoidMethod(handler.get(),
-      WellKnownClasses::java_lang_Thread__UncaughtExceptionHandler_uncaughtException,
-      peer.get(), exception.get());
-
-  // If the handler threw, clear that exception too.
+  // If the dispatchUncaughtException threw, clear that exception too.
   tlsPtr_.jni_env->ExceptionClear();
 }
 
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 16ef0ff..419ecec 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -35,6 +35,7 @@
 #include "jni_internal.h"
 #include "lock_word.h"
 #include "monitor.h"
+#include "native_stack_dump.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "trace.h"
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 3f779df..515ba9f 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -46,20 +46,12 @@
 #include <sys/syscall.h>
 #endif
 
-// For DumpNativeStack.
-#include <backtrace/Backtrace.h>
-#include <backtrace/BacktraceMap.h>
-
 #if defined(__linux__)
 #include <linux/unistd.h>
 #endif
 
 namespace art {
 
-#if defined(__linux__)
-static constexpr bool kUseAddr2line = !kIsTargetBuild;
-#endif
-
 pid_t GetTid() {
 #if defined(__APPLE__)
   uint64_t owner;
@@ -1026,210 +1018,6 @@
   return "";
 }
 
-#if defined(__linux__)
-
-ALWAYS_INLINE
-static inline void WritePrefix(std::ostream* os, const char* prefix, bool odd) {
-  if (prefix != nullptr) {
-    *os << prefix;
-  }
-  *os << "  ";
-  if (!odd) {
-    *os << " ";
-  }
-}
-
-static bool RunCommand(std::string cmd, std::ostream* os, const char* prefix) {
-  FILE* stream = popen(cmd.c_str(), "r");
-  if (stream) {
-    if (os != nullptr) {
-      bool odd_line = true;               // We indent them differently.
-      bool wrote_prefix = false;          // Have we already written a prefix?
-      constexpr size_t kMaxBuffer = 128;  // Relatively small buffer. Should be OK as we're on an
-                                          // alt stack, but just to be sure...
-      char buffer[kMaxBuffer];
-      while (!feof(stream)) {
-        if (fgets(buffer, kMaxBuffer, stream) != nullptr) {
-          // Split on newlines.
-          char* tmp = buffer;
-          for (;;) {
-            char* new_line = strchr(tmp, '\n');
-            if (new_line == nullptr) {
-              // Print the rest.
-              if (*tmp != 0) {
-                if (!wrote_prefix) {
-                  WritePrefix(os, prefix, odd_line);
-                }
-                wrote_prefix = true;
-                *os << tmp;
-              }
-              break;
-            }
-            if (!wrote_prefix) {
-              WritePrefix(os, prefix, odd_line);
-            }
-            char saved = *(new_line + 1);
-            *(new_line + 1) = 0;
-            *os << tmp;
-            *(new_line + 1) = saved;
-            tmp = new_line + 1;
-            odd_line = !odd_line;
-            wrote_prefix = false;
-          }
-        }
-      }
-    }
-    pclose(stream);
-    return true;
-  } else {
-    return false;
-  }
-}
-
-static void Addr2line(const std::string& map_src, uintptr_t offset, std::ostream& os,
-                      const char* prefix) {
-  std::string cmdline(StringPrintf("addr2line --functions --inlines --demangle -e %s %zx",
-                                   map_src.c_str(), offset));
-  RunCommand(cmdline.c_str(), &os, prefix);
-}
-
-static bool PcIsWithinQuickCode(ArtMethod* method, uintptr_t pc) NO_THREAD_SAFETY_ANALYSIS {
-  uintptr_t code = reinterpret_cast<uintptr_t>(EntryPointToCodePointer(
-      method->GetEntryPointFromQuickCompiledCode()));
-  if (code == 0) {
-    return pc == 0;
-  }
-  uintptr_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
-  return code <= pc && pc <= (code + code_size);
-}
-#endif
-
-void DumpNativeStack(std::ostream& os, pid_t tid, BacktraceMap* existing_map, const char* prefix,
-    ArtMethod* current_method, void* ucontext_ptr) {
-#if __linux__
-  // b/18119146
-  if (RUNNING_ON_MEMORY_TOOL != 0) {
-    return;
-  }
-
-  BacktraceMap* map = existing_map;
-  std::unique_ptr<BacktraceMap> tmp_map;
-  if (map == nullptr) {
-    tmp_map.reset(BacktraceMap::Create(getpid()));
-    map = tmp_map.get();
-  }
-  std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid, map));
-  if (!backtrace->Unwind(0, reinterpret_cast<ucontext*>(ucontext_ptr))) {
-    os << prefix << "(backtrace::Unwind failed for thread " << tid
-       << ": " <<  backtrace->GetErrorString(backtrace->GetError()) << ")\n";
-    return;
-  } else if (backtrace->NumFrames() == 0) {
-    os << prefix << "(no native stack frames for thread " << tid << ")\n";
-    return;
-  }
-
-  // Check whether we have and should use addr2line.
-  bool use_addr2line;
-  if (kUseAddr2line) {
-    // Try to run it to see whether we have it. Push an argument so that it doesn't assume a.out
-    // and print to stderr.
-    use_addr2line = (gAborting > 0) && RunCommand("addr2line -h", nullptr, nullptr);
-  } else {
-    use_addr2line = false;
-  }
-
-  for (Backtrace::const_iterator it = backtrace->begin();
-       it != backtrace->end(); ++it) {
-    // We produce output like this:
-    // ]    #00 pc 000075bb8  /system/lib/libc.so (unwind_backtrace_thread+536)
-    // In order for parsing tools to continue to function, the stack dump
-    // format must at least adhere to this format:
-    //  #XX pc <RELATIVE_ADDR>  <FULL_PATH_TO_SHARED_LIBRARY> ...
-    // The parsers require a single space before and after pc, and two spaces
-    // after the <RELATIVE_ADDR>. There can be any prefix data before the
-    // #XX. <RELATIVE_ADDR> has to be a hex number but with no 0x prefix.
-    os << prefix << StringPrintf("#%02zu pc ", it->num);
-    bool try_addr2line = false;
-    if (!BacktraceMap::IsValid(it->map)) {
-      os << StringPrintf(Is64BitInstructionSet(kRuntimeISA) ? "%016" PRIxPTR "  ???"
-                                                            : "%08" PRIxPTR "  ???",
-                         it->pc);
-    } else {
-      os << StringPrintf(Is64BitInstructionSet(kRuntimeISA) ? "%016" PRIxPTR "  "
-                                                            : "%08" PRIxPTR "  ",
-                         BacktraceMap::GetRelativePc(it->map, it->pc));
-      os << it->map.name;
-      os << " (";
-      if (!it->func_name.empty()) {
-        os << it->func_name;
-        if (it->func_offset != 0) {
-          os << "+" << it->func_offset;
-        }
-        try_addr2line = true;
-      } else if (current_method != nullptr &&
-          Locks::mutator_lock_->IsSharedHeld(Thread::Current()) &&
-          PcIsWithinQuickCode(current_method, it->pc)) {
-        const void* start_of_code = current_method->GetEntryPointFromQuickCompiledCode();
-        os << JniLongName(current_method) << "+"
-           << (it->pc - reinterpret_cast<uintptr_t>(start_of_code));
-      } else {
-        os << "???";
-      }
-      os << ")";
-    }
-    os << "\n";
-    if (try_addr2line && use_addr2line) {
-      Addr2line(it->map.name, it->pc - it->map.start, os, prefix);
-    }
-  }
-#else
-  UNUSED(os, tid, existing_map, prefix, current_method, ucontext_ptr);
-#endif
-}
-
-#if defined(__APPLE__)
-
-// TODO: is there any way to get the kernel stack on Mac OS?
-void DumpKernelStack(std::ostream&, pid_t, const char*, bool) {}
-
-#else
-
-void DumpKernelStack(std::ostream& os, pid_t tid, const char* prefix, bool include_count) {
-  if (tid == GetTid()) {
-    // There's no point showing that we're reading our stack out of /proc!
-    return;
-  }
-
-  std::string kernel_stack_filename(StringPrintf("/proc/self/task/%d/stack", tid));
-  std::string kernel_stack;
-  if (!ReadFileToString(kernel_stack_filename, &kernel_stack)) {
-    os << prefix << "(couldn't read " << kernel_stack_filename << ")\n";
-    return;
-  }
-
-  std::vector<std::string> kernel_stack_frames;
-  Split(kernel_stack, '\n', &kernel_stack_frames);
-  // We skip the last stack frame because it's always equivalent to "[<ffffffff>] 0xffffffff",
-  // which looking at the source appears to be the kernel's way of saying "that's all, folks!".
-  kernel_stack_frames.pop_back();
-  for (size_t i = 0; i < kernel_stack_frames.size(); ++i) {
-    // Turn "[<ffffffff8109156d>] futex_wait_queue_me+0xcd/0x110"
-    // into "futex_wait_queue_me+0xcd/0x110".
-    const char* text = kernel_stack_frames[i].c_str();
-    const char* close_bracket = strchr(text, ']');
-    if (close_bracket != nullptr) {
-      text = close_bracket + 2;
-    }
-    os << prefix;
-    if (include_count) {
-      os << StringPrintf("#%02zd ", i);
-    }
-    os << text << "\n";
-  }
-}
-
-#endif
-
 const char* GetAndroidRoot() {
   const char* android_root = getenv("ANDROID_ROOT");
   if (android_root == nullptr) {
diff --git a/runtime/utils.h b/runtime/utils.h
index b2746ee..699b732 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -242,21 +242,6 @@
 // implementation-defined limit.
 void SetThreadName(const char* thread_name);
 
-// Dumps the native stack for thread 'tid' to 'os'.
-void DumpNativeStack(std::ostream& os,
-                     pid_t tid,
-                     BacktraceMap* map = nullptr,
-                     const char* prefix = "",
-                     ArtMethod* current_method = nullptr,
-                     void* ucontext = nullptr)
-    NO_THREAD_SAFETY_ANALYSIS;
-
-// Dumps the kernel stack for thread 'tid' to 'os'. Note that this is only available on linux-x86.
-void DumpKernelStack(std::ostream& os,
-                     pid_t tid,
-                     const char* prefix = "",
-                     bool include_count = true);
-
 // Find $ANDROID_ROOT, /system, or abort.
 const char* GetAndroidRoot();
 
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 03de399..40f12e9 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -1157,9 +1157,6 @@
     case Instruction::kVerifyRegCWide:
       result = result && CheckWideRegisterIndex(inst->VRegC());
       break;
-    case Instruction::kVerifyRegCString:
-      result = result && CheckStringIndex(inst->VRegC());
-      break;
   }
   switch (inst->GetVerifyExtraFlags()) {
     case Instruction::kVerifyArrayData:
@@ -3331,69 +3328,15 @@
       }
       break;
     }
-    case Instruction::INVOKE_LAMBDA: {
-      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
-      // If the code would've normally hard-failed, then the interpreter will throw the
-      // appropriate verification errors at runtime.
-      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement invoke-lambda verification
-      break;
-    }
-    case Instruction::CAPTURE_VARIABLE: {
-      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
-      // If the code would've normally hard-failed, then the interpreter will throw the
-      // appropriate verification errors at runtime.
-      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement capture-variable verification
-      break;
-    }
-    case Instruction::CREATE_LAMBDA: {
-      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
-      // If the code would've normally hard-failed, then the interpreter will throw the
-      // appropriate verification errors at runtime.
-      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement create-lambda verification
-      break;
-    }
-    case Instruction::LIBERATE_VARIABLE: {
-      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
-      // If the code would've normally hard-failed, then the interpreter will throw the
-      // appropriate verification errors at runtime.
-      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement liberate-variable verification
-      break;
-    }
-
-    case Instruction::UNUSED_F4: {
-      DCHECK(false);  // TODO(iam): Implement opcodes for lambdas
-      // Conservatively fail verification on release builds.
-      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Unexpected opcode " << inst->DumpString(dex_file_);
-      break;
-    }
-
-    case Instruction::BOX_LAMBDA: {
-      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
-      // If the code would've normally hard-failed, then the interpreter will throw the
-      // appropriate verification errors at runtime.
-      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement box-lambda verification
-
-      // Partial verification. Sets the resulting type to always be an object, which
-      // is good enough for some other verification to occur without hard-failing.
-      const uint32_t vreg_target_object = inst->VRegA_22x();  // box-lambda vA, vB
-      const RegType& reg_type = reg_types_.JavaLangObject(need_precise_constants_);
-      work_line_->SetRegisterType<LockOp::kClear>(this, vreg_target_object, reg_type);
-      break;
-    }
-
-     case Instruction::UNBOX_LAMBDA: {
-      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
-      // If the code would've normally hard-failed, then the interpreter will throw the
-      // appropriate verification errors at runtime.
-      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement unbox-lambda verification
-      break;
-    }
 
     /* These should never appear during verification. */
     case Instruction::UNUSED_3E ... Instruction::UNUSED_43:
-    case Instruction::UNUSED_FA ... Instruction::UNUSED_FF:
+    case Instruction::UNUSED_F3 ... Instruction::UNUSED_F9:
+    case Instruction::UNUSED_FC ... Instruction::UNUSED_FF:
     case Instruction::UNUSED_79:
     case Instruction::UNUSED_7A:
+    case Instruction::INVOKE_POLYMORPHIC:
+    case Instruction::INVOKE_POLYMORPHIC_RANGE:
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Unexpected opcode " << inst->DumpString(dex_file_);
       break;
 
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 2592a21..5fe95c2 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -86,13 +86,10 @@
   VERIFY_ERROR_ACCESS_METHOD = 128,       // IllegalAccessError.
   VERIFY_ERROR_CLASS_CHANGE = 256,        // IncompatibleClassChangeError.
   VERIFY_ERROR_INSTANTIATION = 512,       // InstantiationError.
-  // For opcodes that don't have complete verifier support (such as lambda opcodes),
-  // we need a way to continue execution at runtime without attempting to re-verify
-  // (since we know it will fail no matter what). Instead, run as the interpreter
-  // in a special "do access checks" mode which will perform verifier-like checking
-  // on the fly.
-  //
-  // TODO: Once all new opcodes have implemented full verifier support, this can be removed.
+  // For opcodes that don't have complete verifier support,  we need a way to continue
+  // execution at runtime without attempting to re-verify (since we know it will fail no
+  // matter what). Instead, run as the interpreter in a special "do access checks" mode
+  // which will perform verifier-like checking on the fly.
   VERIFY_ERROR_FORCE_INTERPRETER = 1024,  // Skip the verification phase at runtime;
                                           // force the interpreter to do access checks.
                                           // (sets a soft fail at compile time).
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index 355d552..48deb35 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -57,7 +57,6 @@
 jclass WellKnownClasses::java_lang_StringFactory;
 jclass WellKnownClasses::java_lang_System;
 jclass WellKnownClasses::java_lang_Thread;
-jclass WellKnownClasses::java_lang_Thread__UncaughtExceptionHandler;
 jclass WellKnownClasses::java_lang_ThreadGroup;
 jclass WellKnownClasses::java_lang_Throwable;
 jclass WellKnownClasses::java_nio_DirectByteBuffer;
@@ -121,9 +120,9 @@
 jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromCodePoints;
 jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromStringBuilder;
 jmethodID WellKnownClasses::java_lang_System_runFinalization = nullptr;
+jmethodID WellKnownClasses::java_lang_Thread_dispatchUncaughtException;
 jmethodID WellKnownClasses::java_lang_Thread_init;
 jmethodID WellKnownClasses::java_lang_Thread_run;
-jmethodID WellKnownClasses::java_lang_Thread__UncaughtExceptionHandler_uncaughtException;
 jmethodID WellKnownClasses::java_lang_ThreadGroup_removeThread;
 jmethodID WellKnownClasses::java_nio_DirectByteBuffer_init;
 jmethodID WellKnownClasses::libcore_reflect_AnnotationFactory_createAnnotation;
@@ -141,7 +140,6 @@
 jfieldID WellKnownClasses::java_lang_Thread_lock;
 jfieldID WellKnownClasses::java_lang_Thread_name;
 jfieldID WellKnownClasses::java_lang_Thread_priority;
-jfieldID WellKnownClasses::java_lang_Thread_uncaughtHandler;
 jfieldID WellKnownClasses::java_lang_Thread_nativePeer;
 jfieldID WellKnownClasses::java_lang_ThreadGroup_groups;
 jfieldID WellKnownClasses::java_lang_ThreadGroup_ngroups;
@@ -245,8 +243,6 @@
   java_lang_StringFactory = CacheClass(env, "java/lang/StringFactory");
   java_lang_System = CacheClass(env, "java/lang/System");
   java_lang_Thread = CacheClass(env, "java/lang/Thread");
-  java_lang_Thread__UncaughtExceptionHandler = CacheClass(env,
-      "java/lang/Thread$UncaughtExceptionHandler");
   java_lang_ThreadGroup = CacheClass(env, "java/lang/ThreadGroup");
   java_lang_Throwable = CacheClass(env, "java/lang/Throwable");
   java_nio_DirectByteBuffer = CacheClass(env, "java/nio/DirectByteBuffer");
@@ -273,9 +269,9 @@
   java_lang_ref_ReferenceQueue_add = CacheMethod(env, java_lang_ref_ReferenceQueue.get(), true, "add", "(Ljava/lang/ref/Reference;)V");
 
   java_lang_reflect_Proxy_invoke = CacheMethod(env, java_lang_reflect_Proxy, true, "invoke", "(Ljava/lang/reflect/Proxy;Ljava/lang/reflect/Method;[Ljava/lang/Object;)Ljava/lang/Object;");
+  java_lang_Thread_dispatchUncaughtException = CacheMethod(env, java_lang_Thread, false, "dispatchUncaughtException", "(Ljava/lang/Throwable;)V");
   java_lang_Thread_init = CacheMethod(env, java_lang_Thread, false, "<init>", "(Ljava/lang/ThreadGroup;Ljava/lang/String;IZ)V");
   java_lang_Thread_run = CacheMethod(env, java_lang_Thread, false, "run", "()V");
-  java_lang_Thread__UncaughtExceptionHandler_uncaughtException = CacheMethod(env, java_lang_Thread__UncaughtExceptionHandler, false, "uncaughtException", "(Ljava/lang/Thread;Ljava/lang/Throwable;)V");
   java_lang_ThreadGroup_removeThread = CacheMethod(env, java_lang_ThreadGroup, false, "threadTerminated", "(Ljava/lang/Thread;)V");
   java_nio_DirectByteBuffer_init = CacheMethod(env, java_nio_DirectByteBuffer, false, "<init>", "(JI)V");
   libcore_reflect_AnnotationFactory_createAnnotation = CacheMethod(env, libcore_reflect_AnnotationFactory, true, "createAnnotation", "(Ljava/lang/Class;[Llibcore/reflect/AnnotationMember;)Ljava/lang/annotation/Annotation;");
@@ -349,7 +345,6 @@
   java_lang_Thread_lock = CacheField(env, java_lang_Thread, false, "lock", "Ljava/lang/Object;");
   java_lang_Thread_name = CacheField(env, java_lang_Thread, false, "name", "Ljava/lang/String;");
   java_lang_Thread_priority = CacheField(env, java_lang_Thread, false, "priority", "I");
-  java_lang_Thread_uncaughtHandler = CacheField(env, java_lang_Thread, false, "uncaughtExceptionHandler", "Ljava/lang/Thread$UncaughtExceptionHandler;");
   java_lang_Thread_nativePeer = CacheField(env, java_lang_Thread, false, "nativePeer", "J");
   java_lang_ThreadGroup_groups = CacheField(env, java_lang_ThreadGroup, false, "groups", "[Ljava/lang/ThreadGroup;");
   java_lang_ThreadGroup_ngroups = CacheField(env, java_lang_ThreadGroup, false, "ngroups", "I");
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index cc60b4d..c9faf69 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -69,7 +69,6 @@
   static jclass java_lang_System;
   static jclass java_lang_Thread;
   static jclass java_lang_ThreadGroup;
-  static jclass java_lang_Thread__UncaughtExceptionHandler;
   static jclass java_lang_Throwable;
   static jclass java_util_ArrayList;
   static jclass java_util_Collections;
@@ -132,9 +131,9 @@
   static jmethodID java_lang_StringFactory_newStringFromCodePoints;
   static jmethodID java_lang_StringFactory_newStringFromStringBuilder;
   static jmethodID java_lang_System_runFinalization;
+  static jmethodID java_lang_Thread_dispatchUncaughtException;
   static jmethodID java_lang_Thread_init;
   static jmethodID java_lang_Thread_run;
-  static jmethodID java_lang_Thread__UncaughtExceptionHandler_uncaughtException;
   static jmethodID java_lang_ThreadGroup_removeThread;
   static jmethodID java_nio_DirectByteBuffer_init;
   static jmethodID libcore_reflect_AnnotationFactory_createAnnotation;
@@ -154,7 +153,6 @@
   static jfieldID java_lang_Thread_lock;
   static jfieldID java_lang_Thread_name;
   static jfieldID java_lang_Thread_priority;
-  static jfieldID java_lang_Thread_uncaughtHandler;
   static jfieldID java_lang_Thread_nativePeer;
   static jfieldID java_lang_ThreadGroup_groups;
   static jfieldID java_lang_ThreadGroup_ngroups;
diff --git a/test/004-JniTest/src/Main.java b/test/004-JniTest/src/Main.java
index e0530d8..0221900 100644
--- a/test/004-JniTest/src/Main.java
+++ b/test/004-JniTest/src/Main.java
@@ -220,7 +220,7 @@
         InvocationHandler handler = new DummyInvocationHandler();
         SimpleInterface proxy =
                 (SimpleInterface) Proxy.newProxyInstance(SimpleInterface.class.getClassLoader(),
-                        new Class[] {SimpleInterface.class}, handler);
+                        new Class<?>[] {SimpleInterface.class}, handler);
         if (testGetMethodID(SimpleInterface.class) == 0) {
             throw new AssertionError();
         }
diff --git a/test/004-UnsafeTest/src/Main.java b/test/004-UnsafeTest/src/Main.java
index 9d4618a..d43d374 100644
--- a/test/004-UnsafeTest/src/Main.java
+++ b/test/004-UnsafeTest/src/Main.java
@@ -249,6 +249,6 @@
     public volatile Object volatileObjectVar = null;
   }
 
-  private static native int vmArrayBaseOffset(Class clazz);
-  private static native int vmArrayIndexScale(Class clazz);
+  private static native int vmArrayBaseOffset(Class<?> clazz);
+  private static native int vmArrayIndexScale(Class<?> clazz);
 }
diff --git a/test/005-annotations/src/android/test/anno/AnnoFancyMethod.java b/test/005-annotations/src/android/test/anno/AnnoFancyMethod.java
index 3088866..aa7808f 100644
--- a/test/005-annotations/src/android/test/anno/AnnoFancyMethod.java
+++ b/test/005-annotations/src/android/test/anno/AnnoFancyMethod.java
@@ -10,5 +10,5 @@
     boolean callMe() default false;
     boolean biteMe();
     AnnoFancyMethodEnum enumerated() default AnnoFancyMethodEnum.FOO;
-    Class someClass() default SomeClass.class;
+    Class<?> someClass() default SomeClass.class;
 }
diff --git a/test/005-annotations/src/android/test/anno/AnnoMissingClass.java b/test/005-annotations/src/android/test/anno/AnnoMissingClass.java
index c32e9a2..7933b80 100644
--- a/test/005-annotations/src/android/test/anno/AnnoMissingClass.java
+++ b/test/005-annotations/src/android/test/anno/AnnoMissingClass.java
@@ -20,5 +20,5 @@
 
 @Retention(RetentionPolicy.RUNTIME)
 public @interface AnnoMissingClass {
-    Class value();
+    Class<?> value();
 }
diff --git a/test/005-annotations/src/android/test/anno/TestAnnotations.java b/test/005-annotations/src/android/test/anno/TestAnnotations.java
index 51254b4..8ea8e8e 100644
--- a/test/005-annotations/src/android/test/anno/TestAnnotations.java
+++ b/test/005-annotations/src/android/test/anno/TestAnnotations.java
@@ -42,7 +42,7 @@
         }
     }
 
-    static void printAnnotations(Class clazz) {
+    static void printAnnotations(Class<?> clazz) {
         Annotation[] annos;
         Annotation[][] parAnnos;
 
@@ -52,7 +52,7 @@
         printAnnotationArray("", annos);
         System.out.println();
 
-        for (Constructor c: clazz.getDeclaredConstructors()) {
+        for (Constructor<?> c: clazz.getDeclaredConstructors()) {
             annos = c.getDeclaredAnnotations();
             System.out.println("  annotations on CTOR " + c + ":");
             printAnnotationArray("  ", annos);
@@ -139,8 +139,7 @@
         final IntToString[] mapping;
 
         try {
-            meth = TestAnnotations.class.getMethod("getFocusType",
-                    (Class[])null);
+            meth = TestAnnotations.class.getMethod("getFocusType");
         } catch (NoSuchMethodException nsme) {
             throw new RuntimeException(nsme);
         }
@@ -255,7 +254,7 @@
     }
 
     private static class VMRuntime {
-        private static Class vmRuntimeClass;
+        private static Class<?> vmRuntimeClass;
         private static Method getRuntimeMethod;
         private static Method getTargetSdkVersionMethod;
         private static Method setTargetSdkVersionMethod;
diff --git a/test/021-string2/src/Main.java b/test/021-string2/src/Main.java
index 0226614..d1ea0b1 100644
--- a/test/021-string2/src/Main.java
+++ b/test/021-string2/src/Main.java
@@ -85,7 +85,7 @@
         Assert.assertEquals("this is a path", test.replaceAll("/", " "));
         Assert.assertEquals("this is a path", test.replace("/", " "));
 
-        Class Strings = Class.forName("com.android.org.bouncycastle.util.Strings");
+        Class<?> Strings = Class.forName("com.android.org.bouncycastle.util.Strings");
         Method fromUTF8ByteArray = Strings.getDeclaredMethod("fromUTF8ByteArray", byte[].class);
         String result = (String) fromUTF8ByteArray.invoke(null, new byte[] {'O', 'K'});
         System.out.println(result);
diff --git a/test/031-class-attributes/src/ClassAttrs.java b/test/031-class-attributes/src/ClassAttrs.java
index 38bd525..346e13d 100644
--- a/test/031-class-attributes/src/ClassAttrs.java
+++ b/test/031-class-attributes/src/ClassAttrs.java
@@ -118,14 +118,13 @@
         printClassAttrs(FancyClass.class);
 
         try {
-            Constructor cons;
-            cons = MemberClass.class.getConstructor(
-                    new Class[] { MemberClass.class });
+            Constructor<?> cons;
+            cons = MemberClass.class.getConstructor(MemberClass.class);
             System.out.println("constructor signature: "
                     + getSignatureAttribute(cons));
 
             Method meth;
-            meth = MemberClass.class.getMethod("foo", (Class[]) null);
+            meth = MemberClass.class.getMethod("foo");
             System.out.println("method signature: "
                     + getSignatureAttribute(meth));
 
@@ -222,7 +221,7 @@
     public static String getSignatureAttribute(Object obj) {
         Method method;
         try {
-            Class c = obj.getClass();
+            Class<?> c = obj.getClass();
             if (c == Method.class || c == Constructor.class) {
               c = AbstractMethod.class;
             }
@@ -263,9 +262,7 @@
     /*
      * Dump a variety of class attributes.
      */
-    public static void printClassAttrs(Class clazz) {
-        Class clazz2;
-
+    public static <T> void printClassAttrs(Class<T> clazz) {
         System.out.println("***** " + clazz + ":");
 
         System.out.println("  name: "
@@ -321,7 +318,7 @@
         System.out.println("  genericInterfaces: "
             + stringifyTypeArray(clazz.getGenericInterfaces()));
 
-        TypeVariable<Class<?>>[] typeParameters = clazz.getTypeParameters();
+        TypeVariable<Class<T>>[] typeParameters = clazz.getTypeParameters();
         System.out.println("  typeParameters: "
             + stringifyTypeArray(typeParameters));
     }
diff --git a/test/032-concrete-sub/src/ConcreteSub.java b/test/032-concrete-sub/src/ConcreteSub.java
index 083f25d..95adf63 100644
--- a/test/032-concrete-sub/src/ConcreteSub.java
+++ b/test/032-concrete-sub/src/ConcreteSub.java
@@ -37,13 +37,13 @@
         /*
          * Check reflection stuff.
          */
-        Class absClass = AbstractBase.class;
+        Class<?> absClass = AbstractBase.class;
         Method meth;
 
         System.out.println("class modifiers=" + absClass.getModifiers());
 
         try {
-            meth = absClass.getMethod("redefineMe", (Class[]) null);
+            meth = absClass.getMethod("redefineMe");
         } catch (NoSuchMethodException nsme) {
             nsme.printStackTrace();
             return;
diff --git a/test/042-new-instance/src/Main.java b/test/042-new-instance/src/Main.java
index 8cd6b2e..755d62e 100644
--- a/test/042-new-instance/src/Main.java
+++ b/test/042-new-instance/src/Main.java
@@ -33,7 +33,7 @@
     static void testClassNewInstance() {
         // should succeed
         try {
-            Class c = Class.forName("LocalClass");
+            Class<?> c = Class.forName("LocalClass");
             Object obj = c.newInstance();
             System.out.println("LocalClass succeeded");
         } catch (Exception ex) {
@@ -43,7 +43,7 @@
 
         // should fail
         try {
-            Class c = Class.forName("otherpackage.PackageAccess");
+            Class<?> c = Class.forName("otherpackage.PackageAccess");
             Object obj = c.newInstance();
             System.err.println("ERROR: PackageAccess succeeded unexpectedly");
         } catch (IllegalAccessException iae) {
@@ -71,8 +71,8 @@
     static void testConstructorNewInstance() {
         // should fail -- getConstructor only returns public constructors
         try {
-            Class c = Class.forName("LocalClass");
-            Constructor cons = c.getConstructor(new Class[0] /*(Class[])null*/);
+            Class<?> c = Class.forName("LocalClass");
+            Constructor<?> cons = c.getConstructor();
             System.err.println("Cons LocalClass succeeded unexpectedly");
         } catch (NoSuchMethodException nsme) {
             System.out.println("Cons LocalClass failed as expected");
@@ -83,8 +83,8 @@
 
         // should succeed
         try {
-            Class c = Class.forName("LocalClass2");
-            Constructor cons = c.getConstructor((Class[]) null);
+            Class<?> c = Class.forName("LocalClass2");
+            Constructor<?> cons = c.getConstructor();
             Object obj = cons.newInstance();
             System.out.println("Cons LocalClass2 succeeded");
         } catch (Exception ex) {
@@ -94,8 +94,8 @@
 
         // should succeed
         try {
-            Class c = Class.forName("Main$InnerClass");
-            Constructor cons = c.getDeclaredConstructor(new Class<?>[]{Main.class});
+            Class<?> c = Class.forName("Main$InnerClass");
+            Constructor<?> cons = c.getDeclaredConstructor(Main.class);
             Object obj = cons.newInstance(new Main());
             System.out.println("Cons InnerClass succeeded");
         } catch (Exception ex) {
@@ -105,8 +105,8 @@
 
         // should succeed
         try {
-            Class c = Class.forName("Main$StaticInnerClass");
-            Constructor cons = c.getDeclaredConstructor((Class[]) null);
+            Class<?> c = Class.forName("Main$StaticInnerClass");
+            Constructor<?> cons = c.getDeclaredConstructor();
             Object obj = cons.newInstance();
             System.out.println("Cons StaticInnerClass succeeded");
         } catch (Exception ex) {
@@ -116,8 +116,8 @@
 
         // should fail
         try {
-            Class c = Class.forName("otherpackage.PackageAccess");
-            Constructor cons = c.getConstructor(new Class[0] /*(Class[])null*/);
+            Class<?> c = Class.forName("otherpackage.PackageAccess");
+            Constructor<?> cons = c.getConstructor();
             System.err.println("ERROR: Cons PackageAccess succeeded unexpectedly");
         } catch (NoSuchMethodException nsme) {
             // constructor isn't public
@@ -129,8 +129,8 @@
 
         // should fail
         try {
-            Class c = Class.forName("MaybeAbstract");
-            Constructor cons = c.getConstructor(new Class[0] /*(Class[])null*/);
+            Class<?> c = Class.forName("MaybeAbstract");
+            Constructor<?> cons = c.getConstructor();
             Object obj = cons.newInstance();
             System.err.println("ERROR: Cons MaybeAbstract succeeded unexpectedly");
         } catch (InstantiationException ie) {
@@ -143,8 +143,8 @@
 
         // should fail
         try {
-            Class c = Class.forName("otherpackage.PackageAccess2");
-            Constructor cons = c.getConstructor((Class[]) null);
+            Class<?> c = Class.forName("otherpackage.PackageAccess2");
+            Constructor<?> cons = c.getConstructor();
             if (!FULL_ACCESS_CHECKS) { throw new IllegalAccessException(); }
             Object obj = cons.newInstance();
             System.err.println("ERROR: Cons PackageAccess2 succeeded unexpectedly");
@@ -197,7 +197,7 @@
 
         static Object newInstance() {
             try {
-                Class c = CC.class;
+                Class<?> c = CC.class;
                 return c.newInstance();
             } catch (Exception ex) {
                 ex.printStackTrace();
diff --git a/test/042-new-instance/src/otherpackage/ConstructorAccess.java b/test/042-new-instance/src/otherpackage/ConstructorAccess.java
index a74e9a0..79d572c 100644
--- a/test/042-new-instance/src/otherpackage/ConstructorAccess.java
+++ b/test/042-new-instance/src/otherpackage/ConstructorAccess.java
@@ -29,8 +29,8 @@
     // accessibility using the frame below (in Main class), we will see an
     // IllegalAccessException from #newInstance
     static public void newConstructorInstance() throws Exception {
-      Class c = Inner.class;
-      Constructor cons = c.getDeclaredConstructor((Class[]) null);
+      Class<?> c = Inner.class;
+      Constructor cons = c.getDeclaredConstructor();
       Object obj = cons.newInstance();
     }
 }
diff --git a/test/044-proxy/src/BasicTest.java b/test/044-proxy/src/BasicTest.java
index 445a6cc..5f04b93 100644
--- a/test/044-proxy/src/BasicTest.java
+++ b/test/044-proxy/src/BasicTest.java
@@ -99,18 +99,16 @@
         InvocationHandler handler = new MyInvocationHandler(proxyMe);
 
         /* create the proxy class */
-        Class proxyClass = Proxy.getProxyClass(Shapes.class.getClassLoader(),
-                            new Class[] { Quads.class, Colors.class, Trace.class });
+        Class<?> proxyClass = Proxy.getProxyClass(Shapes.class.getClassLoader(),
+                Quads.class, Colors.class, Trace.class);
         Main.registerProxyClassName(proxyClass.getCanonicalName());
 
         /* create a proxy object, passing the handler object in */
         Object proxy = null;
         try {
-            Constructor<Class> cons;
-            cons = proxyClass.getConstructor(
-                            new Class[] { InvocationHandler.class });
+            Constructor<?> cons = proxyClass.getConstructor(InvocationHandler.class);
             //System.out.println("Constructor is " + cons);
-            proxy = cons.newInstance(new Object[] { handler });
+            proxy = cons.newInstance(handler);
         } catch (NoSuchMethodException nsme) {
             System.err.println("failed: " + nsme);
         } catch (InstantiationException ie) {
diff --git a/test/044-proxy/src/Clash.java b/test/044-proxy/src/Clash.java
index adeffdc..d000112 100644
--- a/test/044-proxy/src/Clash.java
+++ b/test/044-proxy/src/Clash.java
@@ -30,7 +30,7 @@
         /* try passing in the same interface twice */
         try {
             Proxy.newProxyInstance(Clash.class.getClassLoader(),
-                new Class[] { Interface1A.class, Interface1A.class },
+                new Class<?>[] { Interface1A.class, Interface1A.class },
                 handler);
             System.err.println("Dupe did not throw expected exception");
         } catch (IllegalArgumentException iae) {
@@ -39,7 +39,7 @@
 
         try {
             Proxy.newProxyInstance(Clash.class.getClassLoader(),
-                new Class[] { Interface1A.class, Interface1B.class },
+                new Class<?>[] { Interface1A.class, Interface1B.class },
                 handler);
             System.err.println("Clash did not throw expected exception");
         } catch (IllegalArgumentException iae) {
diff --git a/test/044-proxy/src/Clash2.java b/test/044-proxy/src/Clash2.java
index 2a384f4..e405cfe 100644
--- a/test/044-proxy/src/Clash2.java
+++ b/test/044-proxy/src/Clash2.java
@@ -29,7 +29,7 @@
 
         try {
             Proxy.newProxyInstance(Clash.class.getClassLoader(),
-                new Class[] { Interface2A.class, Interface2B.class },
+                new Class<?>[] { Interface2A.class, Interface2B.class },
                 handler);
             System.err.println("Clash2 did not throw expected exception");
         } catch (IllegalArgumentException iae) {
diff --git a/test/044-proxy/src/Clash3.java b/test/044-proxy/src/Clash3.java
index 6d6f2f2..44806ce 100644
--- a/test/044-proxy/src/Clash3.java
+++ b/test/044-proxy/src/Clash3.java
@@ -29,7 +29,7 @@
 
         try {
             Proxy.newProxyInstance(Clash.class.getClassLoader(),
-                new Class[] {
+                new Class<?>[] {
                     Interface3a.class,
                     Interface3base.class,
                     Interface3aa.class,
diff --git a/test/044-proxy/src/Clash4.java b/test/044-proxy/src/Clash4.java
index 1bfb37f..ca5c3ab 100644
--- a/test/044-proxy/src/Clash4.java
+++ b/test/044-proxy/src/Clash4.java
@@ -29,7 +29,7 @@
 
         try {
             Proxy.newProxyInstance(Clash.class.getClassLoader(),
-                new Class[] {
+                new Class<?>[] {
                     Interface4a.class,
                     Interface4aa.class,
                     Interface4base.class,
diff --git a/test/044-proxy/src/FloatSelect.java b/test/044-proxy/src/FloatSelect.java
index febe697..217ccaf 100644
--- a/test/044-proxy/src/FloatSelect.java
+++ b/test/044-proxy/src/FloatSelect.java
@@ -34,7 +34,7 @@
     public static void main(String[] args) {
         FloatSelectI proxyObject = (FloatSelectI) Proxy.newProxyInstance(
             FloatSelectI.class.getClassLoader(),
-            new Class[] { FloatSelectI.class },
+            new Class<?>[] { FloatSelectI.class },
             new FloatSelectIInvoke1());
 
         float floatResult = proxyObject.method(2.1f, 5.8f);
diff --git a/test/044-proxy/src/NativeProxy.java b/test/044-proxy/src/NativeProxy.java
index b425da8..c609dc2 100644
--- a/test/044-proxy/src/NativeProxy.java
+++ b/test/044-proxy/src/NativeProxy.java
@@ -40,7 +40,7 @@
         try {
             NativeInterface inf = (NativeInterface)Proxy.newProxyInstance(
                     NativeProxy.class.getClassLoader(),
-                    new Class[] { NativeInterface.class },
+                    new Class<?>[] { NativeInterface.class },
                     new NativeInvocationHandler());
 
             nativeCall(inf);
diff --git a/test/044-proxy/src/ReturnsAndArgPassing.java b/test/044-proxy/src/ReturnsAndArgPassing.java
index 225cc5b..3d8ebf0 100644
--- a/test/044-proxy/src/ReturnsAndArgPassing.java
+++ b/test/044-proxy/src/ReturnsAndArgPassing.java
@@ -98,7 +98,7 @@
     MyInvocationHandler myHandler = new MyInvocationHandler();
     MyInterface proxyMyInterface =
         (MyInterface)Proxy.newProxyInstance(ReturnsAndArgPassing.class.getClassLoader(),
-                                            new Class[] { MyInterface.class },
+                                            new Class<?>[] { MyInterface.class },
                                             myHandler);
     check(fooInvocations == 0);
     proxyMyInterface.voidFoo();
@@ -441,7 +441,7 @@
     MyInvocationHandler myHandler = new MyInvocationHandler();
     MyInterface proxyMyInterface =
         (MyInterface)Proxy.newProxyInstance(ReturnsAndArgPassing.class.getClassLoader(),
-                                            new Class[] { MyInterface.class },
+                                            new Class<?>[] { MyInterface.class },
                                             myHandler);
 
     check((Integer)proxyMyInterface.selectArg(0, Integer.MAX_VALUE, Long.MAX_VALUE,
diff --git a/test/044-proxy/src/WrappedThrow.java b/test/044-proxy/src/WrappedThrow.java
index 27ae84e..643ba05 100644
--- a/test/044-proxy/src/WrappedThrow.java
+++ b/test/044-proxy/src/WrappedThrow.java
@@ -32,7 +32,7 @@
 
         try {
             proxy = Proxy.newProxyInstance(WrappedThrow.class.getClassLoader(),
-                new Class[] { InterfaceW1.class, InterfaceW2.class },
+                new Class<?>[] { InterfaceW1.class, InterfaceW2.class },
                 handler);
         } catch (IllegalArgumentException iae) {
             System.out.println("WT init failed");
diff --git a/test/046-reflect/src/Main.java b/test/046-reflect/src/Main.java
index 67a0d11..10dad8d 100644
--- a/test/046-reflect/src/Main.java
+++ b/test/046-reflect/src/Main.java
@@ -32,7 +32,7 @@
     public Main(ArrayList<Integer> stuff) {}
 
     void printMethodInfo(Method meth) {
-        Class[] params, exceptions;
+        Class<?>[] params, exceptions;
         int i;
 
         System.out.println("Method name is " + meth.getName());
@@ -62,7 +62,7 @@
     private void showStrings(Target instance)
         throws NoSuchFieldException, IllegalAccessException {
 
-        Class target = Target.class;
+        Class<?> target = Target.class;
         String one, two, three, four;
         Field field = null;
 
@@ -80,15 +80,15 @@
 
     public static void checkAccess() {
         try {
-            Class target = otherpackage.Other.class;
+            Class<?> target = otherpackage.Other.class;
             Object instance = new otherpackage.Other();
             Method meth;
 
-            meth = target.getMethod("publicMethod", (Class[]) null);
+            meth = target.getMethod("publicMethod");
             meth.invoke(instance);
 
             try {
-                meth = target.getMethod("packageMethod", (Class[]) null);
+                meth = target.getMethod("packageMethod");
                 System.err.println("succeeded on package-scope method");
             } catch (NoSuchMethodException nsme) {
                 // good
@@ -97,7 +97,7 @@
 
             instance = otherpackage.Other.getInnerClassInstance();
             target = instance.getClass();
-            meth = target.getMethod("innerMethod", (Class[]) null);
+            meth = target.getMethod("innerMethod");
             try {
                 if (!FULL_ACCESS_CHECKS) { throw new IllegalAccessException(); }
                 meth.invoke(instance);
@@ -121,26 +121,25 @@
     }
 
     public void run() {
-        Class target = Target.class;
+        Class<Target> target = Target.class;
         Method meth = null;
         Field field = null;
         boolean excep;
 
         try {
-            meth = target.getMethod("myMethod", new Class[] { int.class });
+            meth = target.getMethod("myMethod", int.class);
 
             if (meth.getDeclaringClass() != target)
                 throw new RuntimeException();
             printMethodInfo(meth);
 
-            meth = target.getMethod("myMethod", new Class[] { float.class });
+            meth = target.getMethod("myMethod", float.class);
             printMethodInfo(meth);
 
-            meth = target.getMethod("myNoargMethod", (Class[]) null);
+            meth = target.getMethod("myNoargMethod");
             printMethodInfo(meth);
 
-            meth = target.getMethod("myMethod",
-                new Class[] { String[].class, float.class, char.class });
+            meth = target.getMethod("myMethod", String[].class, float.class, char.class);
             printMethodInfo(meth);
 
             Target instance = new Target();
@@ -157,11 +156,11 @@
             System.out.println("Result of invoke: " + boxval.intValue());
 
             System.out.println("Calling no-arg void-return method");
-            meth = target.getMethod("myNoargMethod", (Class[]) null);
+            meth = target.getMethod("myNoargMethod");
             meth.invoke(instance, (Object[]) null);
 
             /* try invoking a method that throws an exception */
-            meth = target.getMethod("throwingMethod", (Class[]) null);
+            meth = target.getMethod("throwingMethod");
             try {
                 meth.invoke(instance, (Object[]) null);
                 System.out.println("GLITCH: didn't throw");
@@ -372,7 +371,7 @@
             Target targ;
             Object[] args;
 
-            cons = target.getConstructor(new Class[] { int.class,float.class });
+            cons = target.getConstructor(int.class, float.class);
             args = new Object[] { new Integer(7), new Float(3.3333) };
             System.out.println("cons modifiers=" + cons.getModifiers());
             targ = cons.newInstance(args);
@@ -458,7 +457,7 @@
     public static void checkClinitForFields() throws Exception {
       // Loading a class constant shouldn't run <clinit>.
       System.out.println("calling const-class FieldNoisyInitUser.class");
-      Class niuClass = FieldNoisyInitUser.class;
+      Class<?> niuClass = FieldNoisyInitUser.class;
       System.out.println("called const-class FieldNoisyInitUser.class");
 
       // Getting the declared fields doesn't run <clinit>.
@@ -480,14 +479,14 @@
     public static void checkClinitForMethods() throws Exception {
       // Loading a class constant shouldn't run <clinit>.
       System.out.println("calling const-class MethodNoisyInitUser.class");
-      Class niuClass = MethodNoisyInitUser.class;
+      Class<?> niuClass = MethodNoisyInitUser.class;
       System.out.println("called const-class MethodNoisyInitUser.class");
 
       // Getting the declared methods doesn't run <clinit>.
       Method[] methods = niuClass.getDeclaredMethods();
       System.out.println("got methods");
 
-      Method method = niuClass.getMethod("staticMethod", (Class[]) null);
+      Method method = niuClass.getMethod("staticMethod");
       System.out.println("got method");
       method.invoke(null);
       System.out.println("invoked method");
@@ -517,8 +516,7 @@
 
         Method method;
         try {
-            method = Main.class.getMethod("fancyMethod",
-                new Class[] { ArrayList.class });
+            method = Main.class.getMethod("fancyMethod", ArrayList.class);
         } catch (NoSuchMethodException nsme) {
             throw new RuntimeException(nsme);
         }
@@ -527,9 +525,9 @@
         System.out.println("generic method " + method.getName() + " params='"
             + stringifyTypeArray(parmTypes) + "' ret='" + ret + "'");
 
-        Constructor ctor;
+        Constructor<?> ctor;
         try {
-            ctor = Main.class.getConstructor(new Class[] { ArrayList.class });
+            ctor = Main.class.getConstructor( ArrayList.class);
         } catch (NoSuchMethodException nsme) {
             throw new RuntimeException(nsme);
         }
@@ -580,8 +578,8 @@
         }
         Method method1, method2;
         try {
-            method1 = Main.class.getMethod("fancyMethod", new Class[] { ArrayList.class });
-            method2 = Main.class.getMethod("fancyMethod", new Class[] { ArrayList.class });
+            method1 = Main.class.getMethod("fancyMethod", ArrayList.class);
+            method2 = Main.class.getMethod("fancyMethod", ArrayList.class);
         } catch (NoSuchMethodException nsme) {
             throw new RuntimeException(nsme);
         }
diff --git a/test/064-field-access/src/Main.java b/test/064-field-access/src/Main.java
index 5d90129..50ad5b9 100644
--- a/test/064-field-access/src/Main.java
+++ b/test/064-field-access/src/Main.java
@@ -38,7 +38,7 @@
     }
 
     try {
-      Class c = Class.forName("SubClassUsingInaccessibleField");
+      Class<?> c = Class.forName("SubClassUsingInaccessibleField");
       Object o = c.newInstance();
       c.getMethod("test").invoke(o, null);
     } catch (InvocationTargetException ite) {
@@ -64,7 +64,7 @@
    * On success, the boxed value retrieved is returned.
    */
   public Object getValue(Field field, Object obj, char type,
-      Class expectedException) {
+      Class<?> expectedException) {
     Object result = null;
     try {
       switch (type) {
@@ -638,7 +638,7 @@
    * reflection call is significant]
    */
   public Object getValue(Field field, Object obj, char type,
-      Class expectedException) {
+      Class<?> expectedException) {
     Object result = null;
     try {
       switch (type) {
@@ -698,7 +698,7 @@
     return result;
   }
 
-  public Object invoke(Method method, Object obj, Class expectedException) {
+  public Object invoke(Method method, Object obj, Class<?> expectedException) {
     Object result = null;
     try {
       result = method.invoke(obj);
diff --git a/test/068-classloader/src/FancyLoader.java b/test/068-classloader/src/FancyLoader.java
index 6a153cc..e616bfc 100644
--- a/test/068-classloader/src/FancyLoader.java
+++ b/test/068-classloader/src/FancyLoader.java
@@ -41,7 +41,7 @@
     static final String DEX_FILE = System.getenv("DEX_LOCATION") + "/068-classloader-ex.jar";
 
     /* on Dalvik, this is a DexFile; otherwise, it's null */
-    private Class mDexClass;
+    private Class<?> mDexClass;
 
     private Object mDexFile;
 
@@ -82,12 +82,12 @@
 
         if (mDexFile == null) {
             synchronized (FancyLoader.class) {
-                Constructor ctor;
+                Constructor<?> ctor;
                 /*
                  * Construct a DexFile object through reflection.
                  */
                 try {
-                    ctor = mDexClass.getConstructor(new Class[] {String.class});
+                    ctor = mDexClass.getConstructor(String.class);
                 } catch (NoSuchMethodException nsme) {
                     throw new ClassNotFoundException("getConstructor failed",
                         nsme);
@@ -111,8 +111,7 @@
         Method meth;
 
         try {
-            meth = mDexClass.getMethod("loadClass",
-                    new Class[] { String.class, ClassLoader.class });
+            meth = mDexClass.getMethod("loadClass", String.class, ClassLoader.class);
         } catch (NoSuchMethodException nsme) {
             throw new ClassNotFoundException("getMethod failed", nsme);
         }
@@ -184,7 +183,7 @@
     protected Class<?> loadClass(String name, boolean resolve)
         throws ClassNotFoundException
     {
-        Class res;
+        Class<?> res;
 
         /*
          * 1. Invoke findLoadedClass(String) to check if the class has
diff --git a/test/068-classloader/src/Main.java b/test/068-classloader/src/Main.java
index b2d843b..01539b7 100644
--- a/test/068-classloader/src/Main.java
+++ b/test/068-classloader/src/Main.java
@@ -74,11 +74,10 @@
             /* this is the "alternate" DEX/Jar file */
             String DEX_FILE = System.getenv("DEX_LOCATION") + "/068-classloader-ex.jar";
             /* on Dalvik, this is a DexFile; otherwise, it's null */
-            Class mDexClass = Class.forName("dalvik.system.DexFile");
-            Constructor ctor = mDexClass.getConstructor(new Class[] {String.class});
+            Class<?> mDexClass = Class.forName("dalvik.system.DexFile");
+            Constructor<?> ctor = mDexClass.getConstructor(String.class);
             Object mDexFile = ctor.newInstance(DEX_FILE);
-            Method meth = mDexClass.getMethod("loadClass",
-                    new Class[] { String.class, ClassLoader.class });
+            Method meth = mDexClass.getMethod("loadClass", String.class, ClassLoader.class);
             Object klass = meth.invoke(mDexFile, "Mutator", null);
             if (klass == null) {
                 throw new AssertionError("loadClass with nullclass loader failed");
@@ -94,15 +93,15 @@
         FancyLoader loader2 = new FancyLoader(ClassLoader.getSystemClassLoader());
 
         try {
-            Class target1 = loader1.loadClass("MutationTarget");
-            Class target2 = loader2.loadClass("MutationTarget");
+            Class<?> target1 = loader1.loadClass("MutationTarget");
+            Class<?> target2 = loader2.loadClass("MutationTarget");
 
             if (target1 == target2) {
                 throw new RuntimeException("target1 should not be equal to target2");
             }
 
-            Class mutator1 = loader1.loadClass("Mutator");
-            Class mutator2 = loader2.loadClass("Mutator");
+            Class<?> mutator1 = loader1.loadClass("Mutator");
+            Class<?> mutator2 = loader2.loadClass("Mutator");
 
             if (mutator1 == mutator2) {
                 throw new RuntimeException("mutator1 should not be equal to mutator2");
@@ -134,12 +133,12 @@
         }
     }
 
-    private static void runMutator(Class c, int v) throws Exception {
+    private static void runMutator(Class<?> c, int v) throws Exception {
         java.lang.reflect.Method m = c.getDeclaredMethod("mutate", int.class);
         m.invoke(null, v);
     }
 
-    private static int getMutationTargetValue(Class c) throws Exception {
+    private static int getMutationTargetValue(Class<?> c) throws Exception {
         java.lang.reflect.Field f = c.getDeclaredField("value");
         return f.getInt(null);
     }
@@ -149,7 +148,7 @@
      * able to load it but not instantiate it.
      */
     static void testAccess1(ClassLoader loader) {
-        Class altClass;
+        Class<?> altClass;
 
         try {
             altClass = loader.loadClass("Inaccessible1");
@@ -179,7 +178,7 @@
      * (though the base *is* accessible to us).
      */
     static void testAccess2(ClassLoader loader) {
-        Class altClass;
+        Class<?> altClass;
 
         try {
             altClass = loader.loadClass("Inaccessible2");
@@ -199,7 +198,7 @@
      * See if we can load a class with an inaccessible interface.
      */
     static void testAccess3(ClassLoader loader) {
-        Class altClass;
+        Class<?> altClass;
 
         try {
             altClass = loader.loadClass("Inaccessible3");
@@ -219,7 +218,7 @@
      * Test a doubled class that extends the base class.
      */
     static void testExtend(ClassLoader loader) {
-        Class doubledExtendClass;
+        Class<?> doubledExtendClass;
         Object obj;
 
         /* get the "alternate" version of DoubledExtend */
@@ -268,7 +267,7 @@
      * it doesn't override the base class method.
      */
     static void testExtendOkay(ClassLoader loader) {
-        Class doubledExtendOkayClass;
+        Class<?> doubledExtendOkayClass;
         Object obj;
 
         /* get the "alternate" version of DoubledExtendOkay */
@@ -316,7 +315,7 @@
      * an interface declared in a different class.
      */
     static void testInterface(ClassLoader loader) {
-        Class getDoubledClass;
+        Class<?> getDoubledClass;
         Object obj;
 
         /* get GetDoubled from the "alternate" class loader */
@@ -362,7 +361,7 @@
      * Throw an abstract class into the middle and see what happens.
      */
     static void testAbstract(ClassLoader loader) {
-        Class abstractGetClass;
+        Class<?> abstractGetClass;
         Object obj;
 
         /* get AbstractGet from the "alternate" loader */
@@ -407,7 +406,7 @@
      * Test a doubled class that implements a common interface.
      */
     static void testImplement(ClassLoader loader) {
-        Class doubledImplementClass;
+        Class<?> doubledImplementClass;
         Object obj;
 
         useImplement(new DoubledImplement(), true);
@@ -465,7 +464,7 @@
      * that refers to a doubled class.
      */
     static void testIfaceImplement(ClassLoader loader) {
-        Class ifaceImplClass;
+        Class<?> ifaceImplClass;
         Object obj;
 
         /*
diff --git a/test/071-dexfile/src/Main.java b/test/071-dexfile/src/Main.java
index 2f85790..c3a9671 100644
--- a/test/071-dexfile/src/Main.java
+++ b/test/071-dexfile/src/Main.java
@@ -66,7 +66,7 @@
      */
     private static void testDexClassLoader() throws Exception {
         ClassLoader dexClassLoader = getDexClassLoader();
-        Class Another = dexClassLoader.loadClass("Another");
+        Class<?> Another = dexClassLoader.loadClass("Another");
         Object another = Another.newInstance();
         // not expected to work; just exercises the call
         dexClassLoader.getResource("nonexistent");
@@ -79,18 +79,21 @@
      */
     private static ClassLoader getDexClassLoader() throws Exception {
         ClassLoader classLoader = Main.class.getClassLoader();
-        Class DexClassLoader = classLoader.loadClass("dalvik.system.DexClassLoader");
-        Constructor DexClassLoader_init = DexClassLoader.getConstructor(String.class,
-                                                                        String.class,
-                                                                        String.class,
-                                                                        ClassLoader.class);
+        Class<?> DexClassLoader = classLoader.loadClass("dalvik.system.DexClassLoader");
+        Constructor<?> DexClassLoader_init = DexClassLoader.getConstructor(String.class,
+                                                                           String.class,
+                                                                           String.class,
+                                                                           ClassLoader.class);
         // create an instance, using the path we found
-        return (ClassLoader) DexClassLoader_init.newInstance(CLASS_PATH, getOdexDir(), LIB_DIR, classLoader);
+        return (ClassLoader) DexClassLoader_init.newInstance(CLASS_PATH,
+                                                             getOdexDir(),
+                                                             LIB_DIR,
+                                                             classLoader);
     }
 
     private static void testDexFile() throws Exception {
         ClassLoader classLoader = Main.class.getClassLoader();
-        Class DexFile = classLoader.loadClass("dalvik.system.DexFile");
+        Class<?> DexFile = classLoader.loadClass("dalvik.system.DexFile");
         Method DexFile_loadDex = DexFile.getMethod("loadDex",
                                                    String.class,
                                                    String.class,
diff --git a/test/074-gc-thrash/src/Main.java b/test/074-gc-thrash/src/Main.java
index f947d0b..df04793 100644
--- a/test/074-gc-thrash/src/Main.java
+++ b/test/074-gc-thrash/src/Main.java
@@ -69,7 +69,7 @@
      */
     private static Method getDumpHprofDataMethod() {
         ClassLoader myLoader = Main.class.getClassLoader();
-        Class vmdClass;
+        Class<?> vmdClass;
         try {
             vmdClass = myLoader.loadClass("dalvik.system.VMDebug");
         } catch (ClassNotFoundException cnfe) {
@@ -78,8 +78,7 @@
 
         Method meth;
         try {
-            meth = vmdClass.getMethod("dumpHprofData",
-                    new Class[] { String.class });
+            meth = vmdClass.getMethod("dumpHprofData", String.class);
         } catch (NoSuchMethodException nsme) {
             System.err.println("Found VMDebug but not dumpHprofData method");
             return null;
diff --git a/test/080-oom-throw/src/Main.java b/test/080-oom-throw/src/Main.java
index f007b25..0ae92a9 100644
--- a/test/080-oom-throw/src/Main.java
+++ b/test/080-oom-throw/src/Main.java
@@ -105,7 +105,7 @@
     static boolean triggerReflectionOOM() {
         try {
             Class<?> c = Main.class;
-            Method m = c.getMethod("blowup", (Class[]) null);
+            Method m = c.getMethod("blowup");
             holder = new Object[1000000];
             m.invoke(null);
             holder = null;
diff --git a/test/086-null-super/src/Main.java b/test/086-null-super/src/Main.java
index 060737f..8bd1786 100644
--- a/test/086-null-super/src/Main.java
+++ b/test/086-null-super/src/Main.java
@@ -75,14 +75,12 @@
                  * Find the DexFile class, and construct a DexFile object
                  * through reflection, then call loadCLass on it.
                  */
-                Class mDexClass = ClassLoader.getSystemClassLoader().
+                Class<?> mDexClass = ClassLoader.getSystemClassLoader().
                         loadClass("dalvik.system.DexFile");
-                Constructor ctor = mDexClass.
-                        getConstructor(new Class[] {String.class});
+                Constructor<?> ctor = mDexClass.getConstructor(String.class);
                 Object mDexFile = ctor.newInstance(DEX_FILE);
                 Method meth = mDexClass.
-                        getMethod("loadClass",
-                            new Class[] { String.class, ClassLoader.class });
+                        getMethod("loadClass", String.class, ClassLoader.class);
                 /*
                  * Invoking loadClass on CLASS_NAME is expected to
                  * throw an InvocationTargetException. Anything else
diff --git a/test/087-gc-after-link/src/Main.java b/test/087-gc-after-link/src/Main.java
index 7c47e99..698af0b 100644
--- a/test/087-gc-after-link/src/Main.java
+++ b/test/087-gc-after-link/src/Main.java
@@ -70,7 +70,7 @@
                 throws TestFailed, InvocationTargetException
         {
             Object dexFile = null;
-            Class dexClass = null;
+            Class<?> dexClass = null;
 
             try {
                 try {
@@ -80,11 +80,9 @@
                      */
                     dexClass = ClassLoader.getSystemClassLoader().
                             loadClass("dalvik.system.DexFile");
-                    Constructor ctor = dexClass.
-                            getConstructor(new Class[] {String.class});
+                    Constructor<?> ctor = dexClass.getConstructor(String.class);
                     dexFile = ctor.newInstance(DEX_FILE);
-                    Method meth = dexClass.getMethod("loadClass",
-                            new Class[] { String.class, ClassLoader.class });
+                    Method meth = dexClass.getMethod("loadClass", String.class, ClassLoader.class);
                     /*
                      * Invoking loadClass on CLASS_NAME is expected to
                      * throw an InvocationTargetException. Anything else
@@ -95,7 +93,7 @@
                 } finally {
                     if (dexFile != null) {
                         /* close the DexFile to make CloseGuard happy */
-                        Method meth = dexClass.getMethod("close", (Class[]) null);
+                        Method meth = dexClass.getMethod("close");
                         meth.invoke(dexFile);
                     }
                 }
diff --git a/test/088-monitor-verification/src/Main.java b/test/088-monitor-verification/src/Main.java
index 212c894..a6f0e64 100644
--- a/test/088-monitor-verification/src/Main.java
+++ b/test/088-monitor-verification/src/Main.java
@@ -100,7 +100,7 @@
      */
     void constantLock() {
         assertIsManaged();
-        Class thing = Thread.class;
+        Class<?> thing = Thread.class;
         synchronized (Thread.class) {}
     }
 
diff --git a/test/098-ddmc/src/Main.java b/test/098-ddmc/src/Main.java
index 50bbe51..72c5a28 100644
--- a/test/098-ddmc/src/Main.java
+++ b/test/098-ddmc/src/Main.java
@@ -136,7 +136,7 @@
         private static final Method getRecentAllocationsMethod;
         static {
             try {
-                Class c = Class.forName("org.apache.harmony.dalvik.ddmc.DdmVmInternal");
+                Class<?> c = Class.forName("org.apache.harmony.dalvik.ddmc.DdmVmInternal");
                 enableRecentAllocationsMethod = c.getDeclaredMethod("enableRecentAllocations",
                                                                     Boolean.TYPE);
                 getRecentAllocationStatusMethod = c.getDeclaredMethod("getRecentAllocationStatus");
diff --git a/test/099-vmdebug/src/Main.java b/test/099-vmdebug/src/Main.java
index 8068721..90ad315 100644
--- a/test/099-vmdebug/src/Main.java
+++ b/test/099-vmdebug/src/Main.java
@@ -242,7 +242,7 @@
         System.out.println("Instances of null " + VMDebug.countInstancesofClass(null, false));
         System.out.println("Instances of ClassA assignable " +
                 VMDebug.countInstancesofClass(ClassA.class, true));
-        Class[] classes = new Class[]{ClassA.class, ClassB.class, null};
+        Class<?>[] classes = new Class<?>[] {ClassA.class, ClassB.class, null};
         long[] counts = VMDebug.countInstancesofClasses(classes, false);
         System.out.println("Array counts " + Arrays.toString(counts));
         counts = VMDebug.countInstancesofClasses(classes, true);
@@ -259,7 +259,7 @@
         private static final Method countInstancesOfClassesMethod;
         static {
             try {
-                Class c = Class.forName("dalvik.system.VMDebug");
+                Class<?> c = Class.forName("dalvik.system.VMDebug");
                 startMethodTracingMethod = c.getDeclaredMethod("startMethodTracing", String.class,
                         Integer.TYPE, Integer.TYPE, Boolean.TYPE, Integer.TYPE);
                 stopMethodTracingMethod = c.getDeclaredMethod("stopMethodTracing");
@@ -292,10 +292,10 @@
         public static Map<String, String> getRuntimeStats() throws Exception {
             return (Map<String, String>) getRuntimeStatsMethod.invoke(null);
         }
-        public static long countInstancesofClass(Class c, boolean assignable) throws Exception {
+        public static long countInstancesofClass(Class<?> c, boolean assignable) throws Exception {
             return (long) countInstancesOfClassMethod.invoke(null, new Object[]{c, assignable});
         }
-        public static long[] countInstancesofClasses(Class[] classes, boolean assignable)
+        public static long[] countInstancesofClasses(Class<?>[] classes, boolean assignable)
                 throws Exception {
             return (long[]) countInstancesOfClassesMethod.invoke(
                     null, new Object[]{classes, assignable});
diff --git a/test/100-reflect2/src/Main.java b/test/100-reflect2/src/Main.java
index 1245852..91ba307 100644
--- a/test/100-reflect2/src/Main.java
+++ b/test/100-reflect2/src/Main.java
@@ -275,10 +275,8 @@
   }
 
   public static void testConstructorReflection() throws Exception {
-    Constructor<?> ctor;
-
-    ctor = String.class.getConstructor(new Class[0]);
-    show(ctor.newInstance((Object[]) null));
+    Constructor<String> ctor = String.class.getConstructor();
+    show(ctor.newInstance());
 
     ctor = String.class.getConstructor(char[].class, int.class, int.class);
     show(ctor.newInstance(new char[] { '\u2714', 'y', 'z', '!' }, 1, 2));
@@ -287,7 +285,7 @@
   private static void testPackagePrivateConstructor() {
     try {
       Class<?> c = Class.forName("sub.PPClass");
-      Constructor cons = c.getConstructor();
+      Constructor<?> cons = c.getConstructor();
       cons.newInstance();
       throw new RuntimeException("Expected IllegalAccessException.");
     } catch (IllegalAccessException e) {
@@ -301,7 +299,7 @@
   private static void testPackagePrivateAccessibleConstructor() {
     try {
       Class<?> c = Class.forName("sub.PPClass");
-      Constructor cons = c.getConstructor();
+      Constructor<?> cons = c.getConstructor();
       cons.setAccessible(true);  // ensure we prevent IllegalAccessException
       cons.newInstance();
     } catch (Exception e) {
diff --git a/test/107-int-math2/src/Main.java b/test/107-int-math2/src/Main.java
index 0c91d44..ec5678d 100644
--- a/test/107-int-math2/src/Main.java
+++ b/test/107-int-math2/src/Main.java
@@ -104,7 +104,7 @@
     }
 
     static int constClassTest(int x) {
-        Class c = String.class;
+        Class<?> c = String.class;
         if (c != null) {
            return x * 2;
         } else {
diff --git a/test/118-noimage-dex2oat/src/Main.java b/test/118-noimage-dex2oat/src/Main.java
index dba9166..cc19107 100644
--- a/test/118-noimage-dex2oat/src/Main.java
+++ b/test/118-noimage-dex2oat/src/Main.java
@@ -51,7 +51,7 @@
     private static final Method isBootClassPathOnDiskMethod;
     static {
         try {
-            Class c = Class.forName("dalvik.system.VMRuntime");
+            Class<?> c = Class.forName("dalvik.system.VMRuntime");
             getCurrentInstructionSetMethod = c.getDeclaredMethod("getCurrentInstructionSet");
             isBootClassPathOnDiskMethod = c.getDeclaredMethod("isBootClassPathOnDisk",
                                                               String.class);
diff --git a/test/125-gc-and-classloading/src/Main.java b/test/125-gc-and-classloading/src/Main.java
index 61e123d..e81ef7b 100644
--- a/test/125-gc-and-classloading/src/Main.java
+++ b/test/125-gc-and-classloading/src/Main.java
@@ -57,7 +57,7 @@
         public void run() {
             try {
                 cdl.await();
-                Class c0 = Class.forName("Main$BigClass");
+                Class<?> c0 = Class.forName("Main$BigClass");
             } catch (Exception e) {
                 throw new RuntimeException(e);
             }
diff --git a/test/130-hprof/src/Main.java b/test/130-hprof/src/Main.java
index 9868c61..c145f27 100644
--- a/test/130-hprof/src/Main.java
+++ b/test/130-hprof/src/Main.java
@@ -37,15 +37,15 @@
 
     private static Object allocInDifferentLoader() throws Exception {
         final String DEX_FILE = System.getenv("DEX_LOCATION") + "/130-hprof-ex.jar";
-        Class pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
+        Class<?> pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
         if (pathClassLoader == null) {
             throw new AssertionError("Couldn't find path class loader class");
         }
-        Constructor constructor =
+        Constructor<?> constructor =
             pathClassLoader.getDeclaredConstructor(String.class, ClassLoader.class);
         ClassLoader loader = (ClassLoader)constructor.newInstance(
                 DEX_FILE, ClassLoader.getSystemClassLoader());
-        Class allocator = loader.loadClass("Allocator");
+        Class<?> allocator = loader.loadClass("Allocator");
         return allocator.getDeclaredMethod("allocObject", null).invoke(null);
     }
 
@@ -105,7 +105,7 @@
         System.out.println("Generated data.");
 
         createDumpAndConv();
-        Class klass = Class.forName("org.apache.harmony.dalvik.ddmc.DdmVmInternal");
+        Class<?> klass = Class.forName("org.apache.harmony.dalvik.ddmc.DdmVmInternal");
         if (klass == null) {
             throw new AssertionError("Couldn't find path class loader class");
         }
@@ -153,7 +153,7 @@
      */
     private static Method getDumpHprofDataMethod() {
         ClassLoader myLoader = Main.class.getClassLoader();
-        Class vmdClass;
+        Class<?> vmdClass;
         try {
             vmdClass = myLoader.loadClass("dalvik.system.VMDebug");
         } catch (ClassNotFoundException cnfe) {
@@ -162,8 +162,7 @@
 
         Method meth;
         try {
-            meth = vmdClass.getMethod("dumpHprofData",
-                    new Class[] { String.class });
+            meth = vmdClass.getMethod("dumpHprofData", String.class);
         } catch (NoSuchMethodException nsme) {
             System.err.println("Found VMDebug but not dumpHprofData method");
             return null;
diff --git a/test/134-reg-promotion/src/Main.java b/test/134-reg-promotion/src/Main.java
index 008ac58..f633524 100644
--- a/test/134-reg-promotion/src/Main.java
+++ b/test/134-reg-promotion/src/Main.java
@@ -32,13 +32,13 @@
 
     public static void main(String args[]) throws Exception {
         Class<?> c = Class.forName("Test");
-        Method m = c.getMethod("run", (Class[]) null);
+        Method m = c.getMethod("run");
         for (int i = 0; i < 10; i++) {
             holder = new char[128 * 1024][];
             m.invoke(null, (Object[]) null);
             holder = null;
         }
-        m = c.getMethod("run2", (Class[]) null);
+        m = c.getMethod("run2");
         for (int i = 0; i < 10; i++) {
             holder = new char[128 * 1024][];
             m.invoke(null, (Object[]) null);
diff --git a/test/138-duplicate-classes-check/src/Main.java b/test/138-duplicate-classes-check/src/Main.java
index a2ef281..5ffceb9 100644
--- a/test/138-duplicate-classes-check/src/Main.java
+++ b/test/138-duplicate-classes-check/src/Main.java
@@ -38,7 +38,7 @@
                 getClass().getClassLoader());
 
         try {
-            Class testEx = loader.loadClass("TestEx");
+            Class<?> testEx = loader.loadClass("TestEx");
             Method test = testEx.getDeclaredMethod("test");
             test.invoke(null);
         } catch (Exception exc) {
diff --git a/test/138-duplicate-classes-check2/src/FancyLoader.java b/test/138-duplicate-classes-check2/src/FancyLoader.java
index 7e2bb08..58b7ec4 100644
--- a/test/138-duplicate-classes-check2/src/FancyLoader.java
+++ b/test/138-duplicate-classes-check2/src/FancyLoader.java
@@ -42,7 +42,7 @@
             "/138-duplicate-classes-check2-ex.jar";
 
     /* on Dalvik, this is a DexFile; otherwise, it's null */
-    private Class mDexClass;
+    private Class<?> mDexClass;
 
     private Object mDexFile;
 
@@ -83,12 +83,12 @@
 
         if (mDexFile == null) {
             synchronized (FancyLoader.class) {
-                Constructor ctor;
+                Constructor<?> ctor;
                 /*
                  * Construct a DexFile object through reflection.
                  */
                 try {
-                    ctor = mDexClass.getConstructor(new Class[] {String.class});
+                    ctor = mDexClass.getConstructor(String.class);
                 } catch (NoSuchMethodException nsme) {
                     throw new ClassNotFoundException("getConstructor failed",
                         nsme);
@@ -112,8 +112,7 @@
         Method meth;
 
         try {
-            meth = mDexClass.getMethod("loadClass",
-                    new Class[] { String.class, ClassLoader.class });
+            meth = mDexClass.getMethod("loadClass", String.class, ClassLoader.class);
         } catch (NoSuchMethodException nsme) {
             throw new ClassNotFoundException("getMethod failed", nsme);
         }
@@ -185,7 +184,7 @@
     protected Class<?> loadClass(String name, boolean resolve)
         throws ClassNotFoundException
     {
-        Class res;
+        Class<?> res;
 
         /*
          * 1. Invoke findLoadedClass(String) to check if the class has
diff --git a/test/138-duplicate-classes-check2/src/Main.java b/test/138-duplicate-classes-check2/src/Main.java
index a9b5bb0..a0d6977 100644
--- a/test/138-duplicate-classes-check2/src/Main.java
+++ b/test/138-duplicate-classes-check2/src/Main.java
@@ -33,7 +33,7 @@
         FancyLoader loader = new FancyLoader(getClass().getClassLoader());
 
         try {
-            Class testEx = loader.loadClass("TestEx");
+            Class<?> testEx = loader.loadClass("TestEx");
             Method test = testEx.getDeclaredMethod("test");
             test.invoke(null);
         } catch (Exception exc) {
diff --git a/test/139-register-natives/src/Main.java b/test/139-register-natives/src/Main.java
index 8dd2131..11bd53f 100644
--- a/test/139-register-natives/src/Main.java
+++ b/test/139-register-natives/src/Main.java
@@ -47,7 +47,7 @@
     }
   }
 
-  private native static int registerNatives(Class c);
+  private native static int registerNatives(Class<?> c);
 
   private static void expectThrows(Base b) {
     try {
diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java
index 9ed8d28..f9b6180 100644
--- a/test/141-class-unload/src/Main.java
+++ b/test/141-class-unload/src/Main.java
@@ -28,11 +28,11 @@
 
     public static void main(String[] args) throws Exception {
         nativeLibraryName = args[0];
-        Class pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
+        Class<?> pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
         if (pathClassLoader == null) {
             throw new AssertionError("Couldn't find path class loader class");
         }
-        Constructor constructor =
+        Constructor<?> constructor =
             pathClassLoader.getDeclaredConstructor(String.class, String.class, ClassLoader.class);
         try {
             testUnloadClass(constructor);
@@ -67,7 +67,7 @@
         System.out.println("Number of loaded unload-ex maps " + count);
     }
 
-    private static void stressTest(Constructor constructor) throws Exception {
+    private static void stressTest(Constructor<?> constructor) throws Exception {
         for (int i = 0; i <= 100; ++i) {
             setUpUnloadLoader(constructor, false);
             if (i % 10 == 0) {
@@ -76,7 +76,7 @@
         }
     }
 
-    private static void testUnloadClass(Constructor constructor) throws Exception {
+    private static void testUnloadClass(Constructor<?> constructor) throws Exception {
         WeakReference<Class> klass = setUpUnloadClassWeak(constructor);
         // No strong references to class loader, should get unloaded.
         Runtime.getRuntime().gc();
@@ -87,7 +87,7 @@
         System.out.println(klass2.get());
     }
 
-    private static void testUnloadLoader(Constructor constructor)
+    private static void testUnloadLoader(Constructor<?> constructor)
         throws Exception {
       WeakReference<ClassLoader> loader = setUpUnloadLoader(constructor, true);
       // No strong references to class loader, should get unloaded.
@@ -96,8 +96,8 @@
       System.out.println(loader.get());
     }
 
-    private static void testStackTrace(Constructor constructor) throws Exception {
-        Class klass = setUpUnloadClass(constructor);
+    private static void testStackTrace(Constructor<?> constructor) throws Exception {
+        Class<?> klass = setUpUnloadClass(constructor);
         WeakReference<Class> weak_klass = new WeakReference(klass);
         Method stackTraceMethod = klass.getDeclaredMethod("generateStackTrace");
         Throwable throwable = (Throwable) stackTraceMethod.invoke(klass);
@@ -108,7 +108,7 @@
         System.out.println("class null " + isNull + " " + throwable.getMessage());
     }
 
-    private static void testLoadAndUnloadLibrary(Constructor constructor) throws Exception {
+    private static void testLoadAndUnloadLibrary(Constructor<?> constructor) throws Exception {
         WeakReference<ClassLoader> loader = setUpLoadLibrary(constructor);
         // No strong references to class loader, should get unloaded.
         Runtime.getRuntime().gc();
@@ -117,7 +117,7 @@
     }
 
     private static Object testNoUnloadHelper(ClassLoader loader) throws Exception {
-        Class intHolder = loader.loadClass("IntHolder");
+        Class<?> intHolder = loader.loadClass("IntHolder");
         return intHolder.newInstance();
     }
 
@@ -131,14 +131,14 @@
       public WeakReference<ClassLoader> classLoader;
     }
 
-    private static Pair testNoUnloadInstanceHelper(Constructor constructor) throws Exception {
+    private static Pair testNoUnloadInstanceHelper(Constructor<?> constructor) throws Exception {
         ClassLoader loader = (ClassLoader) constructor.newInstance(
             DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
         Object o = testNoUnloadHelper(loader);
         return new Pair(o, loader);
     }
 
-    private static void testNoUnloadInstance(Constructor constructor) throws Exception {
+    private static void testNoUnloadInstance(Constructor<?> constructor) throws Exception {
         Pair p = testNoUnloadInstanceHelper(constructor);
         Runtime.getRuntime().gc();
         // If the class loader was unloded too early due to races, just pass the test.
@@ -146,10 +146,10 @@
         System.out.println("loader null " + isNull);
     }
 
-    private static Class setUpUnloadClass(Constructor constructor) throws Exception {
+    private static Class<?> setUpUnloadClass(Constructor<?> constructor) throws Exception {
         ClassLoader loader = (ClassLoader) constructor.newInstance(
             DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
-        Class intHolder = loader.loadClass("IntHolder");
+        Class<?> intHolder = loader.loadClass("IntHolder");
         Method getValue = intHolder.getDeclaredMethod("getValue");
         Method setValue = intHolder.getDeclaredMethod("setValue", Integer.TYPE);
         // Make sure we don't accidentally preserve the value in the int holder, the class
@@ -161,17 +161,17 @@
         return intHolder;
     }
 
-    private static WeakReference<Class> setUpUnloadClassWeak(Constructor constructor)
+    private static WeakReference<Class> setUpUnloadClassWeak(Constructor<?> constructor)
             throws Exception {
         return new WeakReference<Class>(setUpUnloadClass(constructor));
     }
 
-    private static WeakReference<ClassLoader> setUpUnloadLoader(Constructor constructor,
+    private static WeakReference<ClassLoader> setUpUnloadLoader(Constructor<?> constructor,
                                                                 boolean waitForCompilation)
         throws Exception {
         ClassLoader loader = (ClassLoader) constructor.newInstance(
             DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
-        Class intHolder = loader.loadClass("IntHolder");
+        Class<?> intHolder = loader.loadClass("IntHolder");
         Method setValue = intHolder.getDeclaredMethod("setValue", Integer.TYPE);
         setValue.invoke(intHolder, 2);
         if (waitForCompilation) {
@@ -180,7 +180,7 @@
         return new WeakReference(loader);
     }
 
-    private static void waitForCompilation(Class intHolder) throws Exception {
+    private static void waitForCompilation(Class<?> intHolder) throws Exception {
       // Load the native library so that we can call waitForCompilation.
       Method loadLibrary = intHolder.getDeclaredMethod("loadLibrary", String.class);
       loadLibrary.invoke(intHolder, nativeLibraryName);
@@ -189,11 +189,11 @@
       waitForCompilation.invoke(intHolder);
     }
 
-    private static WeakReference<ClassLoader> setUpLoadLibrary(Constructor constructor)
+    private static WeakReference<ClassLoader> setUpLoadLibrary(Constructor<?> constructor)
         throws Exception {
         ClassLoader loader = (ClassLoader) constructor.newInstance(
             DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
-        Class intHolder = loader.loadClass("IntHolder");
+        Class<?> intHolder = loader.loadClass("IntHolder");
         Method loadLibrary = intHolder.getDeclaredMethod("loadLibrary", String.class);
         loadLibrary.invoke(intHolder, nativeLibraryName);
         waitForCompilation(intHolder);
diff --git a/test/142-classloader2/src/Main.java b/test/142-classloader2/src/Main.java
index 89dadce..80b00e7 100644
--- a/test/142-classloader2/src/Main.java
+++ b/test/142-classloader2/src/Main.java
@@ -25,8 +25,8 @@
     private static ClassLoader createClassLoader(String dexPath, ClassLoader parent) {
         try {
             Class<?> myClassLoaderClass = Class.forName("MyPathClassLoader");
-            Constructor constructor = myClassLoaderClass.getConstructor(String.class,
-                                                                        ClassLoader.class);
+            Constructor<?> constructor = myClassLoaderClass.getConstructor(String.class,
+                                                                           ClassLoader.class);
             return (ClassLoader)constructor.newInstance(dexPath, parent);
         } catch (Exception e) {
             // Ups, not available?!?!
diff --git a/test/145-alloc-tracking-stress/src/Main.java b/test/145-alloc-tracking-stress/src/Main.java
index 752fdd9..4a67a80 100644
--- a/test/145-alloc-tracking-stress/src/Main.java
+++ b/test/145-alloc-tracking-stress/src/Main.java
@@ -31,7 +31,7 @@
     }
 
     public static void main(String[] args) throws Exception {
-      Class klass = Class.forName("org.apache.harmony.dalvik.ddmc.DdmVmInternal");
+      Class<?> klass = Class.forName("org.apache.harmony.dalvik.ddmc.DdmVmInternal");
       if (klass == null) {
           throw new AssertionError("Couldn't find DdmVmInternal class");
       }
diff --git a/test/148-multithread-gc-annotations/src/AnnoClass1.java b/test/148-multithread-gc-annotations/src/AnnoClass1.java
index b82c61f..3eb45ae 100644
--- a/test/148-multithread-gc-annotations/src/AnnoClass1.java
+++ b/test/148-multithread-gc-annotations/src/AnnoClass1.java
@@ -19,5 +19,5 @@
 @Retention(RetentionPolicy.RUNTIME)
 @Target(ElementType.TYPE)
 public @interface AnnoClass1 {
-    Class value();
+    Class<?> value();
 }
diff --git a/test/148-multithread-gc-annotations/src/AnnoClass2.java b/test/148-multithread-gc-annotations/src/AnnoClass2.java
index c75d950..b17490f 100644
--- a/test/148-multithread-gc-annotations/src/AnnoClass2.java
+++ b/test/148-multithread-gc-annotations/src/AnnoClass2.java
@@ -19,5 +19,5 @@
 @Retention(RetentionPolicy.RUNTIME)
 @Target(ElementType.TYPE)
 public @interface AnnoClass2 {
-    Class value();
+    Class<?> value();
 }
diff --git a/test/148-multithread-gc-annotations/src/AnnoClass3.java b/test/148-multithread-gc-annotations/src/AnnoClass3.java
index 5b4a378..7d600a8 100644
--- a/test/148-multithread-gc-annotations/src/AnnoClass3.java
+++ b/test/148-multithread-gc-annotations/src/AnnoClass3.java
@@ -19,5 +19,5 @@
 @Retention(RetentionPolicy.RUNTIME)
 @Target(ElementType.TYPE)
 public @interface AnnoClass3 {
-    Class value();
+    Class<?> value();
 }
diff --git a/test/201-built-in-exception-detail-messages/src/Main.java b/test/201-built-in-exception-detail-messages/src/Main.java
index f0bb6dd..dc58819 100644
--- a/test/201-built-in-exception-detail-messages/src/Main.java
+++ b/test/201-built-in-exception-detail-messages/src/Main.java
@@ -247,7 +247,7 @@
    * Helper for testCastOperatorWithArrays. It's important that
    * the return type is Object.
    */
-  private static Object makeArray(Class c) {
+  private static Object makeArray(Class<?> c) {
     return Array.newInstance(c, 1);
   }
 
diff --git a/test/420-const-class/src/Main.java b/test/420-const-class/src/Main.java
index 44a7436..90ccf3a 100644
--- a/test/420-const-class/src/Main.java
+++ b/test/420-const-class/src/Main.java
@@ -53,15 +53,15 @@
     $opt$LoadAndClinitCheck();
   }
 
-  public static Class $opt$LoadThisClass() {
+  public static Class<?> $opt$LoadThisClass() {
     return Main.class;
   }
 
-  public static Class $opt$LoadOtherClass() {
+  public static Class<?> $opt$LoadOtherClass() {
     return Other.class;
   }
 
-  public static Class $opt$LoadSystemClass() {
+  public static Class<?> $opt$LoadSystemClass() {
     return System.class;
   }
 
diff --git a/test/442-checker-constant-folding/src/Main.java b/test/442-checker-constant-folding/src/Main.java
index b7712a7..33ef10b 100644
--- a/test/442-checker-constant-folding/src/Main.java
+++ b/test/442-checker-constant-folding/src/Main.java
@@ -213,17 +213,17 @@
    * Exercise constant folding on addition.
    */
 
-  /// CHECK-START: int Main.IntAddition1() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.IntAddition1() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const1:i\d+>>  IntConstant 1
   /// CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
   /// CHECK-DAG:     <<Add:i\d+>>     Add [<<Const1>>,<<Const2>>]
   /// CHECK-DAG:                      Return [<<Add>>]
 
-  /// CHECK-START: int Main.IntAddition1() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntAddition1() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const3:i\d+>>  IntConstant 3
   /// CHECK-DAG:                      Return [<<Const3>>]
 
-  /// CHECK-START: int Main.IntAddition1() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntAddition1() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Add
 
   public static int IntAddition1() {
@@ -234,7 +234,7 @@
     return c;
   }
 
-  /// CHECK-START: int Main.IntAddition2() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.IntAddition2() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const1:i\d+>>  IntConstant 1
   /// CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
   /// CHECK-DAG:     <<Const5:i\d+>>  IntConstant 5
@@ -244,11 +244,11 @@
   /// CHECK-DAG:     <<Add3:i\d+>>    Add [<<Add1>>,<<Add2>>]
   /// CHECK-DAG:                      Return [<<Add3>>]
 
-  /// CHECK-START: int Main.IntAddition2() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntAddition2() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const14:i\d+>> IntConstant 14
   /// CHECK-DAG:                      Return [<<Const14>>]
 
-  /// CHECK-START: int Main.IntAddition2() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntAddition2() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Add
 
   public static int IntAddition2() {
@@ -263,17 +263,17 @@
     return c;
   }
 
-  /// CHECK-START: long Main.LongAddition() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.LongAddition() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const1:j\d+>>  LongConstant 1
   /// CHECK-DAG:     <<Const2:j\d+>>  LongConstant 2
   /// CHECK-DAG:     <<Add:j\d+>>     Add [<<Const1>>,<<Const2>>]
   /// CHECK-DAG:                      Return [<<Add>>]
 
-  /// CHECK-START: long Main.LongAddition() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.LongAddition() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const3:j\d+>>  LongConstant 3
   /// CHECK-DAG:                      Return [<<Const3>>]
 
-  /// CHECK-START: long Main.LongAddition() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.LongAddition() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Add
 
   public static long LongAddition() {
@@ -284,17 +284,17 @@
     return c;
   }
 
-  /// CHECK-START: float Main.FloatAddition() constant_folding_after_inlining (before)
+  /// CHECK-START: float Main.FloatAddition() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const1:f\d+>>  FloatConstant 1
   /// CHECK-DAG:     <<Const2:f\d+>>  FloatConstant 2
   /// CHECK-DAG:     <<Add:f\d+>>     Add [<<Const1>>,<<Const2>>]
   /// CHECK-DAG:                      Return [<<Add>>]
 
-  /// CHECK-START: float Main.FloatAddition() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.FloatAddition() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const3:f\d+>>  FloatConstant 3
   /// CHECK-DAG:                      Return [<<Const3>>]
 
-  /// CHECK-START: float Main.FloatAddition() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.FloatAddition() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Add
 
   public static float FloatAddition() {
@@ -305,17 +305,17 @@
     return c;
   }
 
-  /// CHECK-START: double Main.DoubleAddition() constant_folding_after_inlining (before)
+  /// CHECK-START: double Main.DoubleAddition() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const1:d\d+>>  DoubleConstant 1
   /// CHECK-DAG:     <<Const2:d\d+>>  DoubleConstant 2
   /// CHECK-DAG:     <<Add:d\d+>>     Add [<<Const1>>,<<Const2>>]
   /// CHECK-DAG:                      Return [<<Add>>]
 
-  /// CHECK-START: double Main.DoubleAddition() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.DoubleAddition() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const3:d\d+>>  DoubleConstant 3
   /// CHECK-DAG:                      Return [<<Const3>>]
 
-  /// CHECK-START: double Main.DoubleAddition() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.DoubleAddition() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Add
 
   public static double DoubleAddition() {
@@ -331,17 +331,17 @@
    * Exercise constant folding on subtraction.
    */
 
-  /// CHECK-START: int Main.IntSubtraction() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.IntSubtraction() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const6:i\d+>>  IntConstant 6
   /// CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
   /// CHECK-DAG:     <<Sub:i\d+>>     Sub [<<Const6>>,<<Const2>>]
   /// CHECK-DAG:                      Return [<<Sub>>]
 
-  /// CHECK-START: int Main.IntSubtraction() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntSubtraction() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const4:i\d+>>  IntConstant 4
   /// CHECK-DAG:                      Return [<<Const4>>]
 
-  /// CHECK-START: int Main.IntSubtraction() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntSubtraction() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Sub
 
   public static int IntSubtraction() {
@@ -352,17 +352,17 @@
     return c;
   }
 
-  /// CHECK-START: long Main.LongSubtraction() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.LongSubtraction() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const6:j\d+>>  LongConstant 6
   /// CHECK-DAG:     <<Const2:j\d+>>  LongConstant 2
   /// CHECK-DAG:     <<Sub:j\d+>>     Sub [<<Const6>>,<<Const2>>]
   /// CHECK-DAG:                      Return [<<Sub>>]
 
-  /// CHECK-START: long Main.LongSubtraction() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.LongSubtraction() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const4:j\d+>>  LongConstant 4
   /// CHECK-DAG:                      Return [<<Const4>>]
 
-  /// CHECK-START: long Main.LongSubtraction() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.LongSubtraction() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Sub
 
   public static long LongSubtraction() {
@@ -373,17 +373,17 @@
     return c;
   }
 
-  /// CHECK-START: float Main.FloatSubtraction() constant_folding_after_inlining (before)
+  /// CHECK-START: float Main.FloatSubtraction() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const6:f\d+>>  FloatConstant 6
   /// CHECK-DAG:     <<Const2:f\d+>>  FloatConstant 2
   /// CHECK-DAG:     <<Sub:f\d+>>     Sub [<<Const6>>,<<Const2>>]
   /// CHECK-DAG:                      Return [<<Sub>>]
 
-  /// CHECK-START: float Main.FloatSubtraction() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.FloatSubtraction() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const4:f\d+>>  FloatConstant 4
   /// CHECK-DAG:                      Return [<<Const4>>]
 
-  /// CHECK-START: float Main.FloatSubtraction() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.FloatSubtraction() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Sub
 
   public static float FloatSubtraction() {
@@ -394,17 +394,17 @@
     return c;
   }
 
-  /// CHECK-START: double Main.DoubleSubtraction() constant_folding_after_inlining (before)
+  /// CHECK-START: double Main.DoubleSubtraction() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const6:d\d+>>  DoubleConstant 6
   /// CHECK-DAG:     <<Const2:d\d+>>  DoubleConstant 2
   /// CHECK-DAG:     <<Sub:d\d+>>     Sub [<<Const6>>,<<Const2>>]
   /// CHECK-DAG:                      Return [<<Sub>>]
 
-  /// CHECK-START: double Main.DoubleSubtraction() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.DoubleSubtraction() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const4:d\d+>>  DoubleConstant 4
   /// CHECK-DAG:                      Return [<<Const4>>]
 
-  /// CHECK-START: double Main.DoubleSubtraction() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.DoubleSubtraction() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Sub
 
   public static double DoubleSubtraction() {
@@ -420,17 +420,17 @@
    * Exercise constant folding on multiplication.
    */
 
-  /// CHECK-START: int Main.IntMultiplication() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.IntMultiplication() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const7:i\d+>>  IntConstant 7
   /// CHECK-DAG:     <<Const3:i\d+>>  IntConstant 3
   /// CHECK-DAG:     <<Mul:i\d+>>     Mul [<<Const7>>,<<Const3>>]
   /// CHECK-DAG:                      Return [<<Mul>>]
 
-  /// CHECK-START: int Main.IntMultiplication() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntMultiplication() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const21:i\d+>> IntConstant 21
   /// CHECK-DAG:                      Return [<<Const21>>]
 
-  /// CHECK-START: int Main.IntMultiplication() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntMultiplication() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Mul
 
   public static int IntMultiplication() {
@@ -441,17 +441,17 @@
     return c;
   }
 
-  /// CHECK-START: long Main.LongMultiplication() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.LongMultiplication() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const7:j\d+>>  LongConstant 7
   /// CHECK-DAG:     <<Const3:j\d+>>  LongConstant 3
   /// CHECK-DAG:     <<Mul:j\d+>>     Mul [<<Const7>>,<<Const3>>]
   /// CHECK-DAG:                      Return [<<Mul>>]
 
-  /// CHECK-START: long Main.LongMultiplication() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.LongMultiplication() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const21:j\d+>> LongConstant 21
   /// CHECK-DAG:                      Return [<<Const21>>]
 
-  /// CHECK-START: long Main.LongMultiplication() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.LongMultiplication() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Mul
 
   public static long LongMultiplication() {
@@ -462,17 +462,17 @@
     return c;
   }
 
-  /// CHECK-START: float Main.FloatMultiplication() constant_folding_after_inlining (before)
+  /// CHECK-START: float Main.FloatMultiplication() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const7:f\d+>>  FloatConstant 7
   /// CHECK-DAG:     <<Const3:f\d+>>  FloatConstant 3
   /// CHECK-DAG:     <<Mul:f\d+>>     Mul [<<Const7>>,<<Const3>>]
   /// CHECK-DAG:                      Return [<<Mul>>]
 
-  /// CHECK-START: float Main.FloatMultiplication() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.FloatMultiplication() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const21:f\d+>> FloatConstant 21
   /// CHECK-DAG:                      Return [<<Const21>>]
 
-  /// CHECK-START: float Main.FloatMultiplication() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.FloatMultiplication() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Mul
 
   public static float FloatMultiplication() {
@@ -483,17 +483,17 @@
     return c;
   }
 
-  /// CHECK-START: double Main.DoubleMultiplication() constant_folding_after_inlining (before)
+  /// CHECK-START: double Main.DoubleMultiplication() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const7:d\d+>>  DoubleConstant 7
   /// CHECK-DAG:     <<Const3:d\d+>>  DoubleConstant 3
   /// CHECK-DAG:     <<Mul:d\d+>>     Mul [<<Const7>>,<<Const3>>]
   /// CHECK-DAG:                      Return [<<Mul>>]
 
-  /// CHECK-START: double Main.DoubleMultiplication() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.DoubleMultiplication() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const21:d\d+>> DoubleConstant 21
   /// CHECK-DAG:                      Return [<<Const21>>]
 
-  /// CHECK-START: double Main.DoubleMultiplication() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.DoubleMultiplication() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Mul
 
   public static double DoubleMultiplication() {
@@ -509,18 +509,18 @@
    * Exercise constant folding on division.
    */
 
-  /// CHECK-START: int Main.IntDivision() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.IntDivision() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const8:i\d+>>   IntConstant 8
   /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
   /// CHECK-DAG:     <<Div0Chk:i\d+>>  DivZeroCheck [<<Const3>>]
   /// CHECK-DAG:     <<Div:i\d+>>      Div [<<Const8>>,<<Div0Chk>>]
   /// CHECK-DAG:                       Return [<<Div>>]
 
-  /// CHECK-START: int Main.IntDivision() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntDivision() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
   /// CHECK-DAG:                       Return [<<Const2>>]
 
-  /// CHECK-START: int Main.IntDivision() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntDivision() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       DivZeroCheck
   /// CHECK-NOT:                       Div
 
@@ -532,18 +532,18 @@
     return c;
   }
 
-  /// CHECK-START: long Main.LongDivision() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.LongDivision() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const8:j\d+>>   LongConstant 8
   /// CHECK-DAG:     <<Const3:j\d+>>   LongConstant 3
   /// CHECK-DAG:     <<Div0Chk:j\d+>>  DivZeroCheck [<<Const3>>]
   /// CHECK-DAG:     <<Div:j\d+>>      Div [<<Const8>>,<<Div0Chk>>]
   /// CHECK-DAG:                       Return [<<Div>>]
 
-  /// CHECK-START: long Main.LongDivision() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.LongDivision() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const2:j\d+>>   LongConstant 2
   /// CHECK-DAG:                       Return [<<Const2>>]
 
-  /// CHECK-START: long Main.LongDivision() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.LongDivision() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       DivZeroCheck
   /// CHECK-NOT:                       Div
 
@@ -555,17 +555,17 @@
     return c;
   }
 
-  /// CHECK-START: float Main.FloatDivision() constant_folding_after_inlining (before)
+  /// CHECK-START: float Main.FloatDivision() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const8:f\d+>>   FloatConstant 8
   /// CHECK-DAG:     <<Const2P5:f\d+>> FloatConstant 2.5
   /// CHECK-DAG:     <<Div:f\d+>>      Div [<<Const8>>,<<Const2P5>>]
   /// CHECK-DAG:                       Return [<<Div>>]
 
-  /// CHECK-START: float Main.FloatDivision() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.FloatDivision() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const3P2:f\d+>> FloatConstant 3.2
   /// CHECK-DAG:                       Return [<<Const3P2>>]
 
-  /// CHECK-START: float Main.FloatDivision() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.FloatDivision() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Div
 
   public static float FloatDivision() {
@@ -576,17 +576,17 @@
     return c;
   }
 
-  /// CHECK-START: double Main.DoubleDivision() constant_folding_after_inlining (before)
+  /// CHECK-START: double Main.DoubleDivision() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const8:d\d+>>   DoubleConstant 8
   /// CHECK-DAG:     <<Const2P5:d\d+>> DoubleConstant 2.5
   /// CHECK-DAG:     <<Div:d\d+>>      Div [<<Const8>>,<<Const2P5>>]
   /// CHECK-DAG:                       Return [<<Div>>]
 
-  /// CHECK-START: double Main.DoubleDivision() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.DoubleDivision() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const3P2:d\d+>> DoubleConstant 3.2
   /// CHECK-DAG:                       Return [<<Const3P2>>]
 
-  /// CHECK-START: double Main.DoubleDivision() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.DoubleDivision() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Div
 
   public static double DoubleDivision() {
@@ -602,18 +602,18 @@
    * Exercise constant folding on remainder.
    */
 
-  /// CHECK-START: int Main.IntRemainder() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.IntRemainder() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const8:i\d+>>   IntConstant 8
   /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
   /// CHECK-DAG:     <<Div0Chk:i\d+>>  DivZeroCheck [<<Const3>>]
   /// CHECK-DAG:     <<Rem:i\d+>>      Rem [<<Const8>>,<<Div0Chk>>]
   /// CHECK-DAG:                       Return [<<Rem>>]
 
-  /// CHECK-START: int Main.IntRemainder() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntRemainder() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
   /// CHECK-DAG:                       Return [<<Const2>>]
 
-  /// CHECK-START: int Main.IntRemainder() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntRemainder() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       DivZeroCheck
   /// CHECK-NOT:                       Rem
 
@@ -625,18 +625,18 @@
     return c;
   }
 
-  /// CHECK-START: long Main.LongRemainder() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.LongRemainder() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const8:j\d+>>   LongConstant 8
   /// CHECK-DAG:     <<Const3:j\d+>>   LongConstant 3
   /// CHECK-DAG:     <<Div0Chk:j\d+>>  DivZeroCheck [<<Const3>>]
   /// CHECK-DAG:     <<Rem:j\d+>>      Rem [<<Const8>>,<<Div0Chk>>]
   /// CHECK-DAG:                       Return [<<Rem>>]
 
-  /// CHECK-START: long Main.LongRemainder() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.LongRemainder() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const2:j\d+>>   LongConstant 2
   /// CHECK-DAG:                       Return [<<Const2>>]
 
-  /// CHECK-START: long Main.LongRemainder() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.LongRemainder() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       DivZeroCheck
   /// CHECK-NOT:                       Rem
 
@@ -648,17 +648,17 @@
     return c;
   }
 
-  /// CHECK-START: float Main.FloatRemainder() constant_folding_after_inlining (before)
+  /// CHECK-START: float Main.FloatRemainder() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const8:f\d+>>   FloatConstant 8
   /// CHECK-DAG:     <<Const2P5:f\d+>> FloatConstant 2.5
   /// CHECK-DAG:     <<Rem:f\d+>>      Rem [<<Const8>>,<<Const2P5>>]
   /// CHECK-DAG:                       Return [<<Rem>>]
 
-  /// CHECK-START: float Main.FloatRemainder() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.FloatRemainder() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const0P5:f\d+>> FloatConstant 0.5
   /// CHECK-DAG:                       Return [<<Const0P5>>]
 
-  /// CHECK-START: float Main.FloatRemainder() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.FloatRemainder() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Rem
 
   public static float FloatRemainder() {
@@ -669,17 +669,17 @@
     return c;
   }
 
-  /// CHECK-START: double Main.DoubleRemainder() constant_folding_after_inlining (before)
+  /// CHECK-START: double Main.DoubleRemainder() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const8:d\d+>>   DoubleConstant 8
   /// CHECK-DAG:     <<Const2P5:d\d+>> DoubleConstant 2.5
   /// CHECK-DAG:     <<Rem:d\d+>>      Rem [<<Const8>>,<<Const2P5>>]
   /// CHECK-DAG:                       Return [<<Rem>>]
 
-  /// CHECK-START: double Main.DoubleRemainder() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.DoubleRemainder() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const0P5:d\d+>> DoubleConstant 0.5
   /// CHECK-DAG:                       Return [<<Const0P5>>]
 
-  /// CHECK-START: double Main.DoubleRemainder() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.DoubleRemainder() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Rem
 
   public static double DoubleRemainder() {
@@ -695,18 +695,18 @@
    * Exercise constant folding on left shift.
    */
 
-  /// CHECK-START: int Main.ShlIntLong() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.ShlIntLong() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
   /// CHECK-DAG:     <<Const2L:j\d+>>  LongConstant 2
   /// CHECK-DAG:     <<TypeConv:i\d+>> TypeConversion [<<Const2L>>]
   /// CHECK-DAG:     <<Shl:i\d+>>      Shl [<<Const1>>,<<TypeConv>>]
   /// CHECK-DAG:                       Return [<<Shl>>]
 
-  /// CHECK-START: int Main.ShlIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.ShlIntLong() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const4:i\d+>>   IntConstant 4
   /// CHECK-DAG:                       Return [<<Const4>>]
 
-  /// CHECK-START: int Main.ShlIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.ShlIntLong() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Shl
 
   public static int ShlIntLong() {
@@ -715,17 +715,17 @@
     return lhs << rhs;
   }
 
-  /// CHECK-START: long Main.ShlLongInt() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.ShlLongInt() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const3L:j\d+>>  LongConstant 3
   /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
   /// CHECK-DAG:     <<Shl:j\d+>>      Shl [<<Const3L>>,<<Const2>>]
   /// CHECK-DAG:                       Return [<<Shl>>]
 
-  /// CHECK-START: long Main.ShlLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.ShlLongInt() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const12L:j\d+>> LongConstant 12
   /// CHECK-DAG:                       Return [<<Const12L>>]
 
-  /// CHECK-START: long Main.ShlLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.ShlLongInt() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Shl
 
   public static long ShlLongInt() {
@@ -739,18 +739,18 @@
    * Exercise constant folding on right shift.
    */
 
-  /// CHECK-START: int Main.ShrIntLong() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.ShrIntLong() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const7:i\d+>>   IntConstant 7
   /// CHECK-DAG:     <<Const2L:j\d+>>  LongConstant 2
   /// CHECK-DAG:     <<TypeConv:i\d+>> TypeConversion [<<Const2L>>]
   /// CHECK-DAG:     <<Shr:i\d+>>      Shr [<<Const7>>,<<TypeConv>>]
   /// CHECK-DAG:                       Return [<<Shr>>]
 
-  /// CHECK-START: int Main.ShrIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.ShrIntLong() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
   /// CHECK-DAG:                       Return [<<Const1>>]
 
-  /// CHECK-START: int Main.ShrIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.ShrIntLong() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Shr
 
   public static int ShrIntLong() {
@@ -759,17 +759,17 @@
     return lhs >> rhs;
   }
 
-  /// CHECK-START: long Main.ShrLongInt() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.ShrLongInt() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const9L:j\d+>>  LongConstant 9
   /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
   /// CHECK-DAG:     <<Shr:j\d+>>      Shr [<<Const9L>>,<<Const2>>]
   /// CHECK-DAG:                       Return [<<Shr>>]
 
-  /// CHECK-START: long Main.ShrLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.ShrLongInt() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const2L:j\d+>>  LongConstant 2
   /// CHECK-DAG:                       Return [<<Const2L>>]
 
-  /// CHECK-START: long Main.ShrLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.ShrLongInt() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Shr
 
   public static long ShrLongInt() {
@@ -783,18 +783,18 @@
    * Exercise constant folding on unsigned right shift.
    */
 
-  /// CHECK-START: int Main.UShrIntLong() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.UShrIntLong() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<ConstM7:i\d+>>  IntConstant -7
   /// CHECK-DAG:     <<Const2L:j\d+>>  LongConstant 2
   /// CHECK-DAG:     <<TypeConv:i\d+>> TypeConversion [<<Const2L>>]
   /// CHECK-DAG:     <<UShr:i\d+>>     UShr [<<ConstM7>>,<<TypeConv>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: int Main.UShrIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.UShrIntLong() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<ConstRes:i\d+>> IntConstant 1073741822
   /// CHECK-DAG:                       Return [<<ConstRes>>]
 
-  /// CHECK-START: int Main.UShrIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.UShrIntLong() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       UShr
 
   public static int UShrIntLong() {
@@ -803,17 +803,17 @@
     return lhs >>> rhs;
   }
 
-  /// CHECK-START: long Main.UShrLongInt() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.UShrLongInt() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<ConstM9L:j\d+>> LongConstant -9
   /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
   /// CHECK-DAG:     <<UShr:j\d+>>     UShr [<<ConstM9L>>,<<Const2>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: long Main.UShrLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.UShrLongInt() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<ConstRes:j\d+>> LongConstant 4611686018427387901
   /// CHECK-DAG:                       Return [<<ConstRes>>]
 
-  /// CHECK-START: long Main.UShrLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.UShrLongInt() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       UShr
 
   public static long UShrLongInt() {
@@ -827,18 +827,18 @@
    * Exercise constant folding on logical and.
    */
 
-  /// CHECK-START: long Main.AndIntLong() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.AndIntLong() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const10:i\d+>>  IntConstant 10
   /// CHECK-DAG:     <<Const3L:j\d+>>  LongConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const10>>]
   /// CHECK-DAG:     <<And:j\d+>>      And [<<TypeConv>>,<<Const3L>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.AndIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.AndIntLong() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const2:j\d+>>   LongConstant 2
   /// CHECK-DAG:                       Return [<<Const2>>]
 
-  /// CHECK-START: long Main.AndIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.AndIntLong() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       And
 
   public static long AndIntLong() {
@@ -847,18 +847,18 @@
     return lhs & rhs;
   }
 
-  /// CHECK-START: long Main.AndLongInt() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.AndLongInt() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const10L:j\d+>> LongConstant 10
   /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const3>>]
   /// CHECK-DAG:     <<And:j\d+>>      And [<<TypeConv>>,<<Const10L>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.AndLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.AndLongInt() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const2:j\d+>>   LongConstant 2
   /// CHECK-DAG:                       Return [<<Const2>>]
 
-  /// CHECK-START: long Main.AndLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.AndLongInt() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       And
 
   public static long AndLongInt() {
@@ -872,18 +872,18 @@
    * Exercise constant folding on logical or.
    */
 
-  /// CHECK-START: long Main.OrIntLong() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.OrIntLong() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const10:i\d+>>  IntConstant 10
   /// CHECK-DAG:     <<Const3L:j\d+>>  LongConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const10>>]
   /// CHECK-DAG:     <<Or:j\d+>>       Or [<<TypeConv>>,<<Const3L>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: long Main.OrIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.OrIntLong() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const11:j\d+>>  LongConstant 11
   /// CHECK-DAG:                       Return [<<Const11>>]
 
-  /// CHECK-START: long Main.OrIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.OrIntLong() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Or
 
   public static long OrIntLong() {
@@ -892,18 +892,18 @@
     return lhs | rhs;
   }
 
-  /// CHECK-START: long Main.OrLongInt() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.OrLongInt() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const10L:j\d+>> LongConstant 10
   /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const3>>]
   /// CHECK-DAG:     <<Or:j\d+>>       Or [<<TypeConv>>,<<Const10L>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: long Main.OrLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.OrLongInt() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const11:j\d+>>  LongConstant 11
   /// CHECK-DAG:                       Return [<<Const11>>]
 
-  /// CHECK-START: long Main.OrLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.OrLongInt() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Or
 
   public static long OrLongInt() {
@@ -917,18 +917,18 @@
    * Exercise constant folding on logical exclusive or.
    */
 
-  /// CHECK-START: long Main.XorIntLong() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.XorIntLong() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const10:i\d+>>  IntConstant 10
   /// CHECK-DAG:     <<Const3L:j\d+>>  LongConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const10>>]
   /// CHECK-DAG:     <<Xor:j\d+>>      Xor [<<TypeConv>>,<<Const3L>>]
   /// CHECK-DAG:                       Return [<<Xor>>]
 
-  /// CHECK-START: long Main.XorIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.XorIntLong() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const9:j\d+>>   LongConstant 9
   /// CHECK-DAG:                       Return [<<Const9>>]
 
-  /// CHECK-START: long Main.XorIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.XorIntLong() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Xor
 
   public static long XorIntLong() {
@@ -937,18 +937,18 @@
     return lhs ^ rhs;
   }
 
-  /// CHECK-START: long Main.XorLongInt() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.XorLongInt() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const10L:j\d+>> LongConstant 10
   /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const3>>]
   /// CHECK-DAG:     <<Xor:j\d+>>      Xor [<<TypeConv>>,<<Const10L>>]
   /// CHECK-DAG:                       Return [<<Xor>>]
 
-  /// CHECK-START: long Main.XorLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.XorLongInt() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const9:j\d+>>   LongConstant 9
   /// CHECK-DAG:                       Return [<<Const9>>]
 
-  /// CHECK-START: long Main.XorLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.XorLongInt() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Xor
 
   public static long XorLongInt() {
@@ -962,17 +962,17 @@
    * Exercise constant folding on constant (static) condition.
    */
 
-  /// CHECK-START: int Main.StaticCondition() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.StaticCondition() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const7:i\d+>>  IntConstant 7
   /// CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
   /// CHECK-DAG:     <<Cond:z\d+>>    GreaterThanOrEqual [<<Const7>>,<<Const2>>]
   /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<Cond>>]
 
-  /// CHECK-START: int Main.StaticCondition() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.StaticCondition() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const1:i\d+>>  IntConstant 1
   /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<Const1>>]
 
-  /// CHECK-START: int Main.StaticCondition() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.StaticCondition() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      GreaterThanOrEqual
 
   public static int StaticCondition() {
@@ -991,16 +991,16 @@
    * Exercise constant folding on constant (static) condition for null references.
    */
 
-  /// CHECK-START: int Main.StaticConditionNulls() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.StaticConditionNulls() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Null:l\d+>>    NullConstant
   /// CHECK-DAG:     <<Cond:z\d+>>    NotEqual [<<Null>>,<<Null>>]
   /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<Cond>>]
 
-  /// CHECK-START: int Main.StaticConditionNulls() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.StaticConditionNulls() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const0:i\d+>>  IntConstant 0
   /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<Const0>>]
 
-  /// CHECK-START: int Main.StaticConditionNulls() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.StaticConditionNulls() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      NotEqual
 
   private static Object getNull() {
@@ -1023,7 +1023,7 @@
    * (forward) post-order traversal of the the dominator tree.
    */
 
-  /// CHECK-START: int Main.JumpsAndConditionals(boolean) constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.JumpsAndConditionals(boolean) constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Cond:z\d+>>    ParameterValue
   /// CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
   /// CHECK-DAG:     <<Const5:i\d+>>  IntConstant 5
@@ -1032,14 +1032,14 @@
   /// CHECK-DAG:     <<Phi:i\d+>>     Select [<<Sub>>,<<Add>>,<<Cond>>]
   /// CHECK-DAG:                      Return [<<Phi>>]
 
-  /// CHECK-START: int Main.JumpsAndConditionals(boolean) constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.JumpsAndConditionals(boolean) constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Cond:z\d+>>    ParameterValue
   /// CHECK-DAG:     <<Const3:i\d+>>  IntConstant 3
   /// CHECK-DAG:     <<Const7:i\d+>>  IntConstant 7
   /// CHECK-DAG:     <<Phi:i\d+>>     Select [<<Const3>>,<<Const7>>,<<Cond>>]
   /// CHECK-DAG:                      Return [<<Phi>>]
 
-  /// CHECK-START: int Main.JumpsAndConditionals(boolean) constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.JumpsAndConditionals(boolean) constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Add
   /// CHECK-NOT:                      Sub
 
@@ -1325,16 +1325,16 @@
    * Exercise constant folding on type conversions.
    */
 
-  /// CHECK-START: int Main.ReturnInt33() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.ReturnInt33() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const33:j\d+>>  LongConstant 33
   /// CHECK-DAG:     <<Convert:i\d+>>  TypeConversion [<<Const33>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: int Main.ReturnInt33() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.ReturnInt33() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const33:i\d+>>  IntConstant 33
   /// CHECK-DAG:                       Return [<<Const33>>]
 
-  /// CHECK-START: int Main.ReturnInt33() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.ReturnInt33() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static int ReturnInt33() {
@@ -1342,16 +1342,16 @@
     return (int) imm;
   }
 
-  /// CHECK-START: int Main.ReturnIntMax() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.ReturnIntMax() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<ConstMax:f\d+>> FloatConstant 1e+34
   /// CHECK-DAG:     <<Convert:i\d+>>  TypeConversion [<<ConstMax>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: int Main.ReturnIntMax() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.ReturnIntMax() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<ConstMax:i\d+>> IntConstant 2147483647
   /// CHECK-DAG:                       Return [<<ConstMax>>]
 
-  /// CHECK-START: int Main.ReturnIntMax() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.ReturnIntMax() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static int ReturnIntMax() {
@@ -1359,16 +1359,16 @@
     return (int) imm;
   }
 
-  /// CHECK-START: int Main.ReturnInt0() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.ReturnInt0() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<ConstNaN:d\d+>> DoubleConstant nan
   /// CHECK-DAG:     <<Convert:i\d+>>  TypeConversion [<<ConstNaN>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: int Main.ReturnInt0() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.ReturnInt0() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:                       Return [<<Const0>>]
 
-  /// CHECK-START: int Main.ReturnInt0() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.ReturnInt0() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static int ReturnInt0() {
@@ -1376,16 +1376,16 @@
     return (int) imm;
   }
 
-  /// CHECK-START: long Main.ReturnLong33() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.ReturnLong33() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const33:i\d+>>  IntConstant 33
   /// CHECK-DAG:     <<Convert:j\d+>>  TypeConversion [<<Const33>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: long Main.ReturnLong33() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.ReturnLong33() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const33:j\d+>>  LongConstant 33
   /// CHECK-DAG:                       Return [<<Const33>>]
 
-  /// CHECK-START: long Main.ReturnLong33() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.ReturnLong33() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static long ReturnLong33() {
@@ -1393,16 +1393,16 @@
     return (long) imm;
   }
 
-  /// CHECK-START: long Main.ReturnLong34() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.ReturnLong34() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const34:f\d+>>  FloatConstant 34
   /// CHECK-DAG:     <<Convert:j\d+>>  TypeConversion [<<Const34>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: long Main.ReturnLong34() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.ReturnLong34() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const34:j\d+>>  LongConstant 34
   /// CHECK-DAG:                       Return [<<Const34>>]
 
-  /// CHECK-START: long Main.ReturnLong34() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.ReturnLong34() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static long ReturnLong34() {
@@ -1410,16 +1410,16 @@
     return (long) imm;
   }
 
-  /// CHECK-START: long Main.ReturnLong0() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.ReturnLong0() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<ConstNaN:d\d+>> DoubleConstant nan
   /// CHECK-DAG:     <<Convert:j\d+>>  TypeConversion [<<ConstNaN>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: long Main.ReturnLong0() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.ReturnLong0() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
   /// CHECK-DAG:                       Return [<<Const0>>]
 
-  /// CHECK-START: long Main.ReturnLong0() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.ReturnLong0() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static long ReturnLong0() {
@@ -1427,16 +1427,16 @@
     return (long) imm;
   }
 
-  /// CHECK-START: float Main.ReturnFloat33() constant_folding_after_inlining (before)
+  /// CHECK-START: float Main.ReturnFloat33() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const33:i\d+>>  IntConstant 33
   /// CHECK-DAG:     <<Convert:f\d+>>  TypeConversion [<<Const33>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: float Main.ReturnFloat33() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.ReturnFloat33() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const33:f\d+>>  FloatConstant 33
   /// CHECK-DAG:                       Return [<<Const33>>]
 
-  /// CHECK-START: float Main.ReturnFloat33() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.ReturnFloat33() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static float ReturnFloat33() {
@@ -1444,16 +1444,16 @@
     return (float) imm;
   }
 
-  /// CHECK-START: float Main.ReturnFloat34() constant_folding_after_inlining (before)
+  /// CHECK-START: float Main.ReturnFloat34() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const34:j\d+>>  LongConstant 34
   /// CHECK-DAG:     <<Convert:f\d+>>  TypeConversion [<<Const34>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: float Main.ReturnFloat34() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.ReturnFloat34() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const34:f\d+>>  FloatConstant 34
   /// CHECK-DAG:                       Return [<<Const34>>]
 
-  /// CHECK-START: float Main.ReturnFloat34() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.ReturnFloat34() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static float ReturnFloat34() {
@@ -1461,16 +1461,16 @@
     return (float) imm;
   }
 
-  /// CHECK-START: float Main.ReturnFloat99P25() constant_folding_after_inlining (before)
+  /// CHECK-START: float Main.ReturnFloat99P25() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const:d\d+>>    DoubleConstant 99.25
   /// CHECK-DAG:     <<Convert:f\d+>>  TypeConversion [<<Const>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: float Main.ReturnFloat99P25() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.ReturnFloat99P25() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const:f\d+>>    FloatConstant 99.25
   /// CHECK-DAG:                       Return [<<Const>>]
 
-  /// CHECK-START: float Main.ReturnFloat99P25() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.ReturnFloat99P25() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static float ReturnFloat99P25() {
@@ -1478,12 +1478,12 @@
     return (float) imm;
   }
 
-  /// CHECK-START: double Main.ReturnDouble33() constant_folding_after_inlining (before)
+  /// CHECK-START: double Main.ReturnDouble33() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const33:i\d+>>  IntConstant 33
   /// CHECK-DAG:     <<Convert:d\d+>>  TypeConversion [<<Const33>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: double Main.ReturnDouble33() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.ReturnDouble33() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const33:d\d+>>  DoubleConstant 33
   /// CHECK-DAG:                       Return [<<Const33>>]
 
@@ -1492,16 +1492,16 @@
     return (double) imm;
   }
 
-  /// CHECK-START: double Main.ReturnDouble34() constant_folding_after_inlining (before)
+  /// CHECK-START: double Main.ReturnDouble34() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const34:j\d+>>  LongConstant 34
   /// CHECK-DAG:     <<Convert:d\d+>>  TypeConversion [<<Const34>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: double Main.ReturnDouble34() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.ReturnDouble34() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const34:d\d+>>  DoubleConstant 34
   /// CHECK-DAG:                       Return [<<Const34>>]
 
-  /// CHECK-START: double Main.ReturnDouble34() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.ReturnDouble34() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static double ReturnDouble34() {
@@ -1509,16 +1509,16 @@
     return (double) imm;
   }
 
-  /// CHECK-START: double Main.ReturnDouble99P25() constant_folding_after_inlining (before)
+  /// CHECK-START: double Main.ReturnDouble99P25() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const:f\d+>>    FloatConstant 99.25
   /// CHECK-DAG:     <<Convert:d\d+>>  TypeConversion [<<Const>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: double Main.ReturnDouble99P25() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.ReturnDouble99P25() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const:d\d+>>    DoubleConstant 99.25
   /// CHECK-DAG:                       Return [<<Const>>]
 
-  /// CHECK-START: double Main.ReturnDouble99P25() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.ReturnDouble99P25() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static double ReturnDouble99P25() {
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index c125e33..3a56c3b 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -1380,7 +1380,7 @@
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
 
-  /// CHECK-START: void Main.foo9(int[], boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: void Main.foo9(int[], boolean) instruction_simplifier$after_bce (after)
   //  Simplification removes the redundant check
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java
index 36f14d8..6e453af 100644
--- a/test/450-checker-types/src/Main.java
+++ b/test/450-checker-types/src/Main.java
@@ -103,7 +103,7 @@
   /// CHECK-NOT:     CheckCast
   public String testClassRemove() {
     Object s = SubclassA.class;
-    return ((Class)s).getName();
+    return ((Class<?>)s).getName();
   }
 
   /// CHECK-START: java.lang.String Main.testClassKeep() instruction_simplifier (before)
@@ -214,11 +214,11 @@
   /// CHECK-DAG:     <<IOf:z\d+>>  InstanceOf
   /// CHECK-DAG:                   If [<<IOf>>]
 
-  /// CHECK-START: void Main.testInstanceOf_Inlined(java.lang.Object) instruction_simplifier_after_bce (before)
+  /// CHECK-START: void Main.testInstanceOf_Inlined(java.lang.Object) instruction_simplifier$after_bce (before)
   /// CHECK:         CheckCast
   /// CHECK-NOT:     CheckCast
 
-  /// CHECK-START: void Main.testInstanceOf_Inlined(java.lang.Object) instruction_simplifier_after_bce (after)
+  /// CHECK-START: void Main.testInstanceOf_Inlined(java.lang.Object) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOf_Inlined(Object o) {
     if (!$inline$InstanceofSubclassC(o)) {
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index 040479e..5b14735 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -876,7 +876,7 @@
   /// CHECK-NOT:                       Neg
   /// CHECK-NOT:                       Add
 
-  /// CHECK-START: int Main.$noinline$NegNeg2(int) constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.$noinline$NegNeg2(int) constant_folding$after_inlining (after)
   /// CHECK:         <<Const0:i\d+>>   IntConstant 0
   /// CHECK-NOT:                       Neg
   /// CHECK-NOT:                       Add
@@ -1126,7 +1126,7 @@
     return res;
   }
 
-  /// CHECK-START: boolean Main.$noinline$EqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (before)
+  /// CHECK-START: boolean Main.$noinline$EqualBoolVsIntConst(boolean) instruction_simplifier$after_bce (before)
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
@@ -1136,7 +1136,7 @@
   /// CHECK-DAG:     <<NotCond:i\d+>>  Select [<<Const1>>,<<Const0>>,<<Cond>>]
   /// CHECK-DAG:                       Return [<<NotCond>>]
 
-  /// CHECK-START: boolean Main.$noinline$EqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$noinline$EqualBoolVsIntConst(boolean) instruction_simplifier$after_bce (after)
   /// CHECK-DAG:     <<True:i\d+>>     IntConstant 1
   /// CHECK-DAG:                       Return [<<True>>]
 
@@ -1151,7 +1151,7 @@
     return arg;
   }
 
-  /// CHECK-START: boolean Main.$noinline$NotEqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (before)
+  /// CHECK-START: boolean Main.$noinline$NotEqualBoolVsIntConst(boolean) instruction_simplifier$after_bce (before)
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
@@ -1161,7 +1161,7 @@
   /// CHECK-DAG:     <<NotCond:i\d+>>  Select [<<Const1>>,<<Const0>>,<<Cond>>]
   /// CHECK-DAG:                       Return [<<NotCond>>]
 
-  /// CHECK-START: boolean Main.$noinline$NotEqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$noinline$NotEqualBoolVsIntConst(boolean) instruction_simplifier$after_bce (after)
   /// CHECK-DAG:     <<False:i\d+>>    IntConstant 0
   /// CHECK-DAG:                       Return [<<False>>]
 
@@ -1178,7 +1178,7 @@
    * remove the second.
    */
 
-  /// CHECK-START: boolean Main.$noinline$NotNotBool(boolean) instruction_simplifier_after_bce (before)
+  /// CHECK-START: boolean Main.$noinline$NotNotBool(boolean) instruction_simplifier$after_bce (before)
   /// CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>    IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>>    IntConstant 1
@@ -1186,7 +1186,7 @@
   /// CHECK-DAG:     <<NotNotArg:i\d+>> Select [<<Const1>>,<<Const0>>,<<NotArg>>]
   /// CHECK-DAG:                        Return [<<NotNotArg>>]
 
-  /// CHECK-START: boolean Main.$noinline$NotNotBool(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$noinline$NotNotBool(boolean) instruction_simplifier$after_bce (after)
   /// CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
@@ -1317,7 +1317,7 @@
     return arg * 31;
   }
 
-  /// CHECK-START: int Main.$noinline$booleanFieldNotEqualOne() instruction_simplifier_after_bce (before)
+  /// CHECK-START: int Main.$noinline$booleanFieldNotEqualOne() instruction_simplifier$after_bce (before)
   /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
@@ -1327,7 +1327,7 @@
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  /// CHECK-START: int Main.$noinline$booleanFieldNotEqualOne() instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.$noinline$booleanFieldNotEqualOne() instruction_simplifier$after_bce (after)
   /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
@@ -1340,7 +1340,7 @@
     return (booleanField == $inline$true()) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.$noinline$booleanFieldEqualZero() instruction_simplifier_after_bce (before)
+  /// CHECK-START: int Main.$noinline$booleanFieldEqualZero() instruction_simplifier$after_bce (before)
   /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
@@ -1350,7 +1350,7 @@
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  /// CHECK-START: int Main.$noinline$booleanFieldEqualZero() instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.$noinline$booleanFieldEqualZero() instruction_simplifier$after_bce (after)
   /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
@@ -1363,7 +1363,7 @@
     return (booleanField != $inline$false()) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.$noinline$intConditionNotEqualOne(int) instruction_simplifier_after_bce (before)
+  /// CHECK-START: int Main.$noinline$intConditionNotEqualOne(int) instruction_simplifier$after_bce (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
@@ -1376,7 +1376,7 @@
   /// CHECK-DAG:      <<Result:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Result>>]
 
-  /// CHECK-START: int Main.$noinline$intConditionNotEqualOne(int) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.$noinline$intConditionNotEqualOne(int) instruction_simplifier$after_bce (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
@@ -1392,7 +1392,7 @@
     return ((i > 42) == $inline$true()) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.$noinline$intConditionEqualZero(int) instruction_simplifier_after_bce (before)
+  /// CHECK-START: int Main.$noinline$intConditionEqualZero(int) instruction_simplifier$after_bce (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
@@ -1405,7 +1405,7 @@
   /// CHECK-DAG:      <<Result:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Result>>]
 
-  /// CHECK-START: int Main.$noinline$intConditionEqualZero(int) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.$noinline$intConditionEqualZero(int) instruction_simplifier$after_bce (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
@@ -1426,7 +1426,7 @@
   /// CHECK-START: int Main.$noinline$floatConditionNotEqualOne(float) builder (after)
   /// CHECK:                            LessThanOrEqual
 
-  /// CHECK-START: int Main.$noinline$floatConditionNotEqualOne(float) instruction_simplifier_before_codegen (after)
+  /// CHECK-START: int Main.$noinline$floatConditionNotEqualOne(float) instruction_simplifier$before_codegen (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
@@ -1443,7 +1443,7 @@
   /// CHECK-START: int Main.$noinline$doubleConditionEqualZero(double) builder (after)
   /// CHECK:                            LessThanOrEqual
 
-  /// CHECK-START: int Main.$noinline$doubleConditionEqualZero(double) instruction_simplifier_before_codegen (after)
+  /// CHECK-START: int Main.$noinline$doubleConditionEqualZero(double) instruction_simplifier$before_codegen (after)
   /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
@@ -1859,7 +1859,7 @@
     if (doThrow) { throw new Error(); }
     try {
       Class<?> c = Class.forName("SmaliTests");
-      Method m = c.getMethod(name, new Class[] { boolean.class });
+      Method m = c.getMethod(name, boolean.class);
       return (Integer) m.invoke(null, input);
     } catch (Exception ex) {
       throw new Error(ex);
diff --git a/test/462-checker-inlining-across-dex-files/src-multidex/OtherDex.java b/test/462-checker-inlining-across-dex-files/src-multidex/OtherDex.java
index 171ade8..2056e2f 100644
--- a/test/462-checker-inlining-across-dex-files/src-multidex/OtherDex.java
+++ b/test/462-checker-inlining-across-dex-files/src-multidex/OtherDex.java
@@ -38,25 +38,25 @@
     return "OtherDex";
   }
 
-  public static Class returnOtherDexClass() {
+  public static Class<?> returnOtherDexClass() {
     return OtherDex.class;
   }
 
-  public static Class returnMainClass() {
+  public static Class<?> returnMainClass() {
     return Main.class;
   }
 
-  private static Class returnOtherDexClass2() {
+  private static Class<?> returnOtherDexClass2() {
     return OtherDex.class;
   }
 
-  public static Class returnOtherDexClassStaticCall() {
+  public static Class<?> returnOtherDexClassStaticCall() {
     // Do not call returnOtherDexClass, as it may have been flagged
     // as non-inlineable.
     return returnOtherDexClass2();
   }
 
-  public static Class returnOtherDexCallingMain() {
+  public static Class<?> returnOtherDexCallingMain() {
     return Main.getOtherClass();
   }
 
diff --git a/test/462-checker-inlining-across-dex-files/src/Main.java b/test/462-checker-inlining-across-dex-files/src/Main.java
index 1fe49a8..c2bb479 100644
--- a/test/462-checker-inlining-across-dex-files/src/Main.java
+++ b/test/462-checker-inlining-across-dex-files/src/Main.java
@@ -106,7 +106,7 @@
   /// CHECK-DAG:     <<Invoke:l\d+>>  InvokeStaticOrDirect
   /// CHECK-DAG:                      Return [<<Invoke>>]
 
-  public static Class dontInlineOtherDexClass() {
+  public static Class<?> dontInlineOtherDexClass() {
     return OtherDex.returnOtherDexClass();
   }
 
@@ -123,7 +123,7 @@
   // Note: There are two LoadClass instructions. We obtain the correct
   //       instruction id by matching the Return's input list first.
 
-  public static Class inlineMainClass() {
+  public static Class<?> inlineMainClass() {
     return OtherDex.returnMainClass();
   }
 
@@ -135,7 +135,7 @@
   /// CHECK-DAG:     <<Invoke:l\d+>>  InvokeStaticOrDirect
   /// CHECK-DAG:                      Return [<<Invoke>>]
 
-  public static Class dontInlineOtherDexClassStaticCall() {
+  public static Class<?> dontInlineOtherDexClassStaticCall() {
     return OtherDex.returnOtherDexClassStaticCall();
   }
 
@@ -152,11 +152,11 @@
   // Note: There are two LoadClass instructions. We obtain the correct
   //       instruction id by matching the Return's input list first.
 
-  public static Class inlineOtherDexCallingMain() {
+  public static Class<?> inlineOtherDexCallingMain() {
     return OtherDex.returnOtherDexCallingMain();
   }
 
-  public static Class getOtherClass() {
+  public static Class<?> getOtherClass() {
     return Main.class;
   }
 
diff --git a/test/471-uninitialized-locals/src/Main.java b/test/471-uninitialized-locals/src/Main.java
index a5b1c48..1ac749e 100644
--- a/test/471-uninitialized-locals/src/Main.java
+++ b/test/471-uninitialized-locals/src/Main.java
@@ -24,8 +24,8 @@
   public static void main(String args[]) throws Exception {
     try {
       Class<?> c = Class.forName("Test");
-      Method m = c.getMethod("ThrowException", (Class[]) null);
-      m.invoke(null, (Object[]) null);
+      Method m = c.getMethod("ThrowException");
+      m.invoke(null);
     } catch (VerifyError e) {
        // Compilation should go fine but we expect the runtime verification to fail.
       return;
diff --git a/test/472-unreachable-if-regression/src/Main.java b/test/472-unreachable-if-regression/src/Main.java
index c9f9511..d426df1 100644
--- a/test/472-unreachable-if-regression/src/Main.java
+++ b/test/472-unreachable-if-regression/src/Main.java
@@ -25,12 +25,12 @@
     System.out.println("Test started.");
     Class<?> c = Class.forName("Test");
 
-    Method unreachableIf = c.getMethod("UnreachableIf", (Class[]) null);
-    unreachableIf.invoke(null, (Object[]) null);
+    Method unreachableIf = c.getMethod("UnreachableIf");
+    unreachableIf.invoke(null);
     System.out.println("Successfully called UnreachableIf().");
 
-    Method unreachablePackedSwitch = c.getMethod("UnreachablePackedSwitch", (Class[]) null);
-    unreachablePackedSwitch.invoke(null, (Object[]) null);
+    Method unreachablePackedSwitch = c.getMethod("UnreachablePackedSwitch");
+    unreachablePackedSwitch.invoke(null);
     System.out.println("Successfully called UnreachablePackedSwitch().");
   }
 
diff --git a/test/480-checker-dead-blocks/src/Main.java b/test/480-checker-dead-blocks/src/Main.java
index e5171f0..141054d 100644
--- a/test/480-checker-dead-blocks/src/Main.java
+++ b/test/480-checker-dead-blocks/src/Main.java
@@ -30,7 +30,7 @@
     return false;
   }
 
-  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$final (before)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:                      If
@@ -39,13 +39,13 @@
   /// CHECK-DAG:     <<Phi:i\d+>>     Phi [<<Add>>,<<Sub>>]
   /// CHECK-DAG:                      Return [<<Phi>>]
 
-  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$final (after)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<Add:i\d+>>     Add [<<ArgX>>,<<ArgY>>]
   /// CHECK-DAG:                      Return [<<Add>>]
 
-  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$final (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Sub
   /// CHECK-NOT:                      Phi
@@ -62,7 +62,7 @@
     return z;
   }
 
-  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$final (before)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:                      If
@@ -71,13 +71,13 @@
   /// CHECK-DAG:     <<Phi:i\d+>>     Phi [<<Add>>,<<Sub>>]
   /// CHECK-DAG:                      Return [<<Phi>>]
 
-  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$final (after)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>     Sub [<<ArgX>>,<<ArgY>>]
   /// CHECK-DAG:                      Return [<<Sub>>]
 
-  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$final (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Add
   /// CHECK-NOT:                      Phi
@@ -94,10 +94,10 @@
     return z;
   }
 
-  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination$final (before)
   /// CHECK:                          Mul
 
-  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination$final (after)
   /// CHECK-NOT:                      Mul
 
   public static int testRemoveLoop(int x) {
@@ -109,11 +109,11 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination$final (before)
   /// CHECK-DAG:                      Return
   /// CHECK-DAG:                      Exit
 
-  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination$final (after)
   /// CHECK-NOT:                      Return
   /// CHECK-NOT:                      Exit
 
@@ -124,15 +124,15 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$final (before)
   /// CHECK-DAG:                      If
   /// CHECK-DAG:                      Add
 
-  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$final (after)
   /// CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$final (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Add
 
@@ -143,16 +143,16 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$final (before)
   /// CHECK-DAG:                      If
   /// CHECK-DAG:                      If
   /// CHECK-DAG:                      Add
 
-  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$final (after)
   /// CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$final (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Add
 
@@ -165,13 +165,13 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination$final (before)
   /// CHECK:                          SuspendCheck
   /// CHECK:                          SuspendCheck
   /// CHECK:                          SuspendCheck
   /// CHECK-NOT:                      SuspendCheck
 
-  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination$final (after)
   /// CHECK:                          SuspendCheck
   /// CHECK:                          SuspendCheck
   /// CHECK-NOT:                      SuspendCheck
diff --git a/test/485-checker-dce-loop-update/smali/TestCase.smali b/test/485-checker-dce-loop-update/smali/TestCase.smali
index 056f22c..e3617c7 100644
--- a/test/485-checker-dce-loop-update/smali/TestCase.smali
+++ b/test/485-checker-dce-loop-update/smali/TestCase.smali
@@ -23,7 +23,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination$final (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<Cst1:i\d+>>  IntConstant 1
@@ -36,7 +36,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
 
-## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination$final (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
@@ -73,7 +73,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination$final (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -88,7 +88,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
 
-## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination$final (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -129,7 +129,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination$final (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -146,7 +146,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<SelX>>]                          loop:none
 
-## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination$final (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -194,7 +194,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination$final (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -217,7 +217,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
 
-## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination$final (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
diff --git a/test/485-checker-dce-switch/src/Main.java b/test/485-checker-dce-switch/src/Main.java
index 019d876..7d5fd4f 100644
--- a/test/485-checker-dce-switch/src/Main.java
+++ b/test/485-checker-dce-switch/src/Main.java
@@ -20,14 +20,14 @@
     return 5;
   }
 
-  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$final (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$final (after)
   /// CHECK-DAG:    <<Const100:i\d+>> IntConstant 100
   /// CHECK-DAG:                      Return [<<Const100>>]
 
-  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$final (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int wholeSwitchDead(int j) {
@@ -60,14 +60,14 @@
     return l;
   }
 
-  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (before)
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$final (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (after)
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$final (after)
   /// CHECK-DAG:     <<Const7:i\d+>>  IntConstant 7
   /// CHECK-DAG:                      Return [<<Const7>>]
 
-  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (after)
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$final (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int constantSwitch_InRange() {
@@ -96,14 +96,14 @@
     return i;
   }
 
-  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (before)
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$final (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (after)
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$final (after)
   /// CHECK-DAG:     <<Const15:i\d+>> IntConstant 15
   /// CHECK-DAG:                      Return [<<Const15>>]
 
-  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (after)
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$final (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int constantSwitch_AboveRange() {
@@ -132,14 +132,14 @@
     return i;
   }
 
-  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (before)
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$final (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (after)
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$final (after)
   /// CHECK-DAG:     <<ConstM5:i\d+>> IntConstant -5
   /// CHECK-DAG:                      Return [<<ConstM5>>]
 
-  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (after)
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$final (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int constantSwitch_BelowRange() {
diff --git a/test/489-current-method-regression/src/Main.java b/test/489-current-method-regression/src/Main.java
index 7d102f5..285c41d 100644
--- a/test/489-current-method-regression/src/Main.java
+++ b/test/489-current-method-regression/src/Main.java
@@ -23,7 +23,7 @@
     if (a == 42) {
       // The class loading will be seen as dead code by
       // the optimizer.
-      Class c = Main.class;
+      Class<?> c = Main.class;
     }
     return new Main().bar();
   }
diff --git a/test/496-checker-inlining-and-class-loader/src/Main.java b/test/496-checker-inlining-and-class-loader/src/Main.java
index 78e8a40..15d4dc0 100644
--- a/test/496-checker-inlining-and-class-loader/src/Main.java
+++ b/test/496-checker-inlining-and-class-loader/src/Main.java
@@ -69,7 +69,7 @@
             "loadClassBinaryName", String.class, ClassLoader.class, List.class);
 
         if (dexFile != null) {
-          Class clazz = (Class)method.invoke(dexFile, className, this, null);
+          Class<?> clazz = (Class<?>)method.invoke(dexFile, className, this, null);
           if (clazz != null) {
             return clazz;
           }
@@ -124,7 +124,7 @@
 public class Main {
   public static void main(String[] args) throws Exception {
     MyClassLoader o = new MyClassLoader();
-    Class foo = o.loadClass("LoadedByMyClassLoader");
+    Class<?> foo = o.loadClass("LoadedByMyClassLoader");
     Method m = foo.getDeclaredMethod("bar");
     m.invoke(null);
   }
diff --git a/test/497-inlining-and-class-loader/src/Main.java b/test/497-inlining-and-class-loader/src/Main.java
index 832b1f0..1e27e77 100644
--- a/test/497-inlining-and-class-loader/src/Main.java
+++ b/test/497-inlining-and-class-loader/src/Main.java
@@ -66,7 +66,7 @@
             "loadClassBinaryName", String.class, ClassLoader.class, List.class);
 
         if (dex != null) {
-          Class clazz = (Class)method.invoke(dex, className, this, null);
+          Class<?> clazz = (Class<?>)method.invoke(dex, className, this, null);
           if (clazz != null) {
             return clazz;
           }
@@ -92,7 +92,7 @@
 
     MyClassLoader o = new MyClassLoader();
     MyClassLoader.level1ClassLoader = new MyClassLoader();
-    Class foo = o.loadClass("LoadedByMyClassLoader");
+    Class<?> foo = o.loadClass("LoadedByMyClassLoader");
     Method m = foo.getDeclaredMethod("bar");
     try {
       m.invoke(null);
diff --git a/test/501-regression-packed-switch/src/Main.java b/test/501-regression-packed-switch/src/Main.java
index 12bc1a8..74c081a 100644
--- a/test/501-regression-packed-switch/src/Main.java
+++ b/test/501-regression-packed-switch/src/Main.java
@@ -24,12 +24,12 @@
 
   public static void main(String args[]) throws Exception {
     Class<?> c = Class.forName("Test");
-    Method m = c.getMethod("EmptyPackedSwitch", new Class[] { int.class });
+    Method m = c.getMethod("EmptyPackedSwitch", int.class);
     Integer result = (Integer) m.invoke(null, new Integer(42));
     if (result != 5) {
       throw new Error("Expected 5, got " + result);
     }
-    m = c.getMethod("PackedSwitchAfterData", new Class[] { int.class });
+    m = c.getMethod("PackedSwitchAfterData", int.class);
     result = (Integer) m.invoke(null, new Integer(0));
     if (result != 1) {
       throw new Error("Expected 1, got " + result);
diff --git a/test/504-regression-baseline-entry/src/Main.java b/test/504-regression-baseline-entry/src/Main.java
index 2c9df28..284cbdc 100644
--- a/test/504-regression-baseline-entry/src/Main.java
+++ b/test/504-regression-baseline-entry/src/Main.java
@@ -24,7 +24,7 @@
 
   public static void main(String args[]) throws Exception {
     Class<?> c = Class.forName("Test");
-    Method m = c.getMethod("SingleGotoStart", (Class[]) null);
+    Method m = c.getMethod("SingleGotoStart");
     Integer result = (Integer) m.invoke(null);
     if (result != 5) {
       throw new Error("Expected 5, got " + result);
diff --git a/test/510-checker-try-catch/smali/Builder.smali b/test/510-checker-try-catch/smali/Builder.smali
index 733a1dd..b0bffa5 100644
--- a/test/510-checker-try-catch/smali/Builder.smali
+++ b/test/510-checker-try-catch/smali/Builder.smali
@@ -1360,7 +1360,7 @@
 # Test that a throw-catch loop on monitor-exit is eliminated.
 # Note that we do not test this until after DCE which merges trivially split blocks.
 
-## CHECK-START: int Builder.testSynchronized(java.lang.Object) dead_code_elimination (after)
+## CHECK-START: int Builder.testSynchronized(java.lang.Object) dead_code_elimination$initial (after)
 ## CHECK:      flags "catch_block"
 ## CHECK-NOT:  end_block
 ## CHECK:      MonitorOperation kind:exit
diff --git a/test/510-checker-try-catch/src/Main.java b/test/510-checker-try-catch/src/Main.java
index 25cdc0e..d6dcd30 100644
--- a/test/510-checker-try-catch/src/Main.java
+++ b/test/510-checker-try-catch/src/Main.java
@@ -39,7 +39,7 @@
 
   public static void testMethod(String method) throws Exception {
     Class<?> c = Class.forName("Runtime");
-    Method m = c.getMethod(method, new Class[] { boolean.class, boolean.class });
+    Method m = c.getMethod(method, boolean.class, boolean.class);
 
     for (TestPath path : TestPath.values()) {
       Object[] arguments = new Object[] { path.arg1, path.arg2 };
diff --git a/test/517-checker-builder-fallthrough/src/Main.java b/test/517-checker-builder-fallthrough/src/Main.java
index 23d94e6..14170f5 100644
--- a/test/517-checker-builder-fallthrough/src/Main.java
+++ b/test/517-checker-builder-fallthrough/src/Main.java
@@ -20,7 +20,7 @@
 
   public static int runTest(int input) throws Exception {
     Class<?> c = Class.forName("TestCase");
-    Method m = c.getMethod("testCase", new Class[] { int.class });
+    Method m = c.getMethod("testCase", int.class);
     return (Integer) m.invoke(null, input);
   }
 
diff --git a/test/522-checker-regression-monitor-exit/smali/Test.smali b/test/522-checker-regression-monitor-exit/smali/Test.smali
index c8e9198..72583d2 100644
--- a/test/522-checker-regression-monitor-exit/smali/Test.smali
+++ b/test/522-checker-regression-monitor-exit/smali/Test.smali
@@ -17,11 +17,11 @@
 
 .super Ljava/lang/Object;
 
-## CHECK-START: int Test.synchronizedHashCode(java.lang.Object) dead_code_elimination (before)
+## CHECK-START: int Test.synchronizedHashCode(java.lang.Object) dead_code_elimination$initial (before)
 ## CHECK:         MonitorOperation [<<Param:l\d+>>] kind:enter
 ## CHECK:         MonitorOperation [<<Param>>]      kind:exit
 
-## CHECK-START: int Test.synchronizedHashCode(java.lang.Object) dead_code_elimination (after)
+## CHECK-START: int Test.synchronizedHashCode(java.lang.Object) dead_code_elimination$initial (after)
 ## CHECK:         MonitorOperation [<<Param:l\d+>>] kind:enter
 ## CHECK:         MonitorOperation [<<Param>>]      kind:exit
 
diff --git a/test/522-checker-regression-monitor-exit/src/Main.java b/test/522-checker-regression-monitor-exit/src/Main.java
index c85ac96..a5e9512 100644
--- a/test/522-checker-regression-monitor-exit/src/Main.java
+++ b/test/522-checker-regression-monitor-exit/src/Main.java
@@ -40,7 +40,7 @@
       Integer result;
       try {
         Class<?> c = Class.forName("Test");
-        Method m = c.getMethod("synchronizedHashCode", new Class[] { Object.class });
+        Method m = c.getMethod("synchronizedHashCode", Object.class);
         result = (Integer) m.invoke(null, m_obj);
       } catch (Exception e) {
         System.err.println("Hash code query exception");
diff --git a/test/527-checker-array-access-split/src/Main.java b/test/527-checker-array-access-split/src/Main.java
index 3366f20..9435ef1 100644
--- a/test/527-checker-array-access-split/src/Main.java
+++ b/test/527-checker-array-access-split/src/Main.java
@@ -189,7 +189,7 @@
   /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
 
-  /// CHECK-START-ARM64: void Main.getSet(int[], int) GVN_after_arch (after)
+  /// CHECK-START-ARM64: void Main.getSet(int[], int) GVN$after_arch (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
@@ -220,7 +220,7 @@
   /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
 
-  /// CHECK-START-ARM:   void Main.getSet(int[], int) GVN_after_arch (after)
+  /// CHECK-START-ARM:   void Main.getSet(int[], int) GVN$after_arch (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
@@ -260,7 +260,7 @@
   /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
 
-  /// CHECK-START-ARM64: int[] Main.accrossGC(int[], int) GVN_after_arch (after)
+  /// CHECK-START-ARM64: int[] Main.accrossGC(int[], int) GVN$after_arch (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
@@ -294,7 +294,7 @@
   /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
 
-  /// CHECK-START-ARM:   int[] Main.accrossGC(int[], int) GVN_after_arch (after)
+  /// CHECK-START-ARM:   int[] Main.accrossGC(int[], int) GVN$after_arch (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
@@ -349,7 +349,7 @@
   /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
 
-  /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() GVN_after_arch (after)
+  /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() GVN$after_arch (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
   /// CHECK:             <<Array:l\d+>>         NewArray
@@ -386,7 +386,7 @@
   /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
 
-  /// CHECK-START-ARM:   int Main.canMergeAfterBCE1() GVN_after_arch (after)
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE1() GVN$after_arch (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
   /// CHECK:             <<Array:l\d+>>         NewArray
@@ -445,7 +445,7 @@
   /// CHECK:             <<Address3:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK:                                    ArraySet [<<Address3>>,<<Index1>>,<<Add>>]
 
-  /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN_after_arch (after)
+  /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN$after_arch (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
   /// CHECK:             <<Array:l\d+>>         NewArray
@@ -461,7 +461,7 @@
 
   // There should be only one intermediate address computation in the loop.
 
-  /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN_after_arch (after)
+  /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN$after_arch (after)
   /// CHECK:                                    IntermediateAddress
   /// CHECK-NOT:                                IntermediateAddress
 
@@ -494,7 +494,7 @@
   /// CHECK:             <<Address3:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK:                                    ArraySet [<<Address3>>,<<Index1>>,<<Add>>]
 
-  /// CHECK-START-ARM:   int Main.canMergeAfterBCE2() GVN_after_arch (after)
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE2() GVN$after_arch (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
   /// CHECK:             <<Array:l\d+>>         NewArray
@@ -508,7 +508,7 @@
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
   /// CHECK:                                    ArraySet [<<Address>>,<<Index1>>,<<Add>>]
 
-  /// CHECK-START-ARM:   int Main.canMergeAfterBCE2() GVN_after_arch (after)
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE2() GVN$after_arch (after)
   /// CHECK:                                    IntermediateAddress
   /// CHECK-NOT:                                IntermediateAddress
 
diff --git a/test/530-checker-loops3/src/Main.java b/test/530-checker-loops3/src/Main.java
index 5ffcbe9..6b5c657 100644
--- a/test/530-checker-loops3/src/Main.java
+++ b/test/530-checker-loops3/src/Main.java
@@ -132,7 +132,7 @@
   /// CHECK-DAG: Deoptimize loop:none
   /// CHECK-NOT: Deoptimize
   //
-  /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) instruction_simplifier_after_bce (after)
+  /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) instruction_simplifier$after_bce (after)
   /// CHECK-DAG: Deoptimize loop:none
   /// CHECK-DAG: Deoptimize loop:none
   /// CHECK-DAG: Deoptimize loop:none
@@ -164,7 +164,7 @@
   /// CHECK-DAG: Deoptimize loop:none
   /// CHECK-NOT: Deoptimize
   //
-  /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) instruction_simplifier_after_bce (after)
+  /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) instruction_simplifier$after_bce (after)
   /// CHECK-DAG: Deoptimize loop:none
   /// CHECK-DAG: Deoptimize loop:none
   /// CHECK-DAG: Deoptimize loop:none
@@ -196,7 +196,7 @@
   /// CHECK-DAG: Deoptimize loop:none
   /// CHECK-NOT: Deoptimize
   //
-  /// CHECK-START: void Main.shifter(int[]) instruction_simplifier_after_bce (after)
+  /// CHECK-START: void Main.shifter(int[]) instruction_simplifier$after_bce (after)
   /// CHECK-DAG: Deoptimize loop:none
   /// CHECK-DAG: Deoptimize loop:none
   /// CHECK-NOT: Deoptimize
diff --git a/test/538-checker-embed-constants/src/Main.java b/test/538-checker-embed-constants/src/Main.java
index f791adf..f6713a2 100644
--- a/test/538-checker-embed-constants/src/Main.java
+++ b/test/538-checker-embed-constants/src/Main.java
@@ -473,7 +473,7 @@
   }
 
   /**
-   * Test that the `-1` constant is not synthesized in a register and that we
+   * ARM/ARM64: Test that the `-1` constant is not synthesized in a register and that we
    * instead simply switch between `add` and `sub` instructions with the
    * constant embedded.
    * We need two uses (or more) of the constant because the compiler always
@@ -491,10 +491,137 @@
   /// CHECK:                        sub x{{\d+}}, x{{\d+}}, #0x1
   /// CHECK:                        add x{{\d+}}, x{{\d+}}, #0x1
 
+  /// CHECK-START-ARM: long Main.addM1(long) register (after)
+  /// CHECK:     <<Arg:j\d+>>       ParameterValue
+  /// CHECK:     <<ConstM1:j\d+>>   LongConstant -1
+  /// CHECK-NOT:                    ParallelMove
+  /// CHECK:                        Add [<<Arg>>,<<ConstM1>>]
+  /// CHECK:                        Sub [<<Arg>>,<<ConstM1>>]
+
+  /// CHECK-START-ARM: long Main.addM1(long) disassembly (after)
+  /// CHECK:     <<Arg:j\d+>>       ParameterValue
+  /// CHECK:     <<ConstM1:j\d+>>   LongConstant -1
+  /// CHECK:                        Add [<<Arg>>,<<ConstM1>>]
+  /// CHECK-NEXT:                   subs r{{\d+}}, #1
+  /// CHECK-NEXT:                   adc r{{\d+}}, r{{\d+}}, #-1
+  /// CHECK:                        Sub [<<Arg>>,<<ConstM1>>]
+  /// CHECK-NEXT:                   adds r{{\d+}}, #1
+  /// CHECK-NEXT:                   adc r{{\d+}}, r{{\d+}}, #0
+
   public static long addM1(long arg) {
     return (arg + (-1)) | (arg - (-1));
   }
 
+  /**
+   * ARM: Test that some long constants are not synthesized in a register for add-long.
+   * Also test some negative cases where we do synthetize constants in registers.
+   */
+
+  /// CHECK-START-ARM: long Main.addLongConstants(long) disassembly (after)
+  /// CHECK:     <<Arg:j\d+>>       ParameterValue
+  /// CHECK-DAG: <<ConstA:j\d+>>    LongConstant 4486007727657233
+  /// CHECK-DAG: <<ConstB:j\d+>>    LongConstant 4486011735248896
+  /// CHECK-DAG: <<ConstC:j\d+>>    LongConstant -1071856711330889728
+  /// CHECK-DAG: <<ConstD:j\d+>>    LongConstant 17587891077120
+  /// CHECK-DAG: <<ConstE:j\d+>>    LongConstant -8808977924096
+  /// CHECK-DAG: <<ConstF:j\d+>>    LongConstant 17587891077121
+  /// CHECK-DAG: <<ConstG:j\d+>>    LongConstant 4095
+  /// CHECK:                        Add [<<Arg>>,<<ConstA>>]
+  /// CHECK-NEXT:                   adds r{{\d+}}, r{{\d+}}, #286331153
+  /// CHECK-NEXT:                   adc r{{\d+}}, r{{\d+}}, #1044480
+  /// CHECK:                        Add [<<Arg>>,<<ConstB>>]
+  /// CHECK-NEXT:                   subs r{{\d+}}, r{{\d+}}, #1044480
+  /// CHECK-NEXT:                   adc r{{\d+}}, r{{\d+}}, #1044480
+  /// CHECK:                        Add [<<Arg>>,<<ConstC>>]
+  /// CHECK-NEXT:                   subs r{{\d+}}, r{{\d+}}, #16711680
+  /// CHECK-NEXT:                   sbc r{{\d+}}, r{{\d+}}, #249561088
+  /// CHECK:                        Add [<<Arg>>,<<ConstD>>]
+  // There may or may not be a MOV here.
+  /// CHECK:                        addw r{{\d+}}, r{{\d+}}, #4095
+  /// CHECK:                        Add [<<Arg>>,<<ConstE>>]
+  // There may or may not be a MOV here.
+  /// CHECK:                        subw r{{\d+}}, r{{\d+}}, #2051
+  /// CHECK:                        Add [<<Arg>>,<<ConstF>>]
+  /// CHECK-NEXT:                   adds{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  /// CHECK-NEXT:                   adc{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  /// CHECK:                        Add [<<Arg>>,<<ConstG>>]
+  /// CHECK-NEXT:                   adds{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  /// CHECK-NEXT:                   adc{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+
+  public static long addLongConstants(long arg) {
+    return
+        // Modified immediates.
+        (arg + 0x000ff00011111111L) ^  // 4486007727657233
+        // Modified immediates high and -low.
+        (arg + 0x000ff000fff01000L) ^  // 4486011735248896
+        // Modified immediates ~high and -low.
+        (arg + 0xf11fffffff010000L) ^  // -1071856711330889728
+        // Low word 0 (no carry), high is imm12.
+        (arg + 0x00000fff00000000L) ^  // 17587891077120
+        // Low word 0 (no carry), -high is imm12.
+        (arg + 0xfffff7fd00000000L) ^  // -8808977924096
+        // Cannot embed imm12 in ADC/SBC for high word.
+        (arg + 0x00000fff00000001L) ^  // 17587891077121
+        // Cannot embed imm12 in ADDS/SUBS for low word (need to set flags).
+        (arg + 0x0000000000000fffL) ^  // 4095
+        arg;
+  }
+
+  /**
+   * ARM: Test that some long constants are not synthesized in a register for add-long.
+   * Also test some negative cases where we do synthetize constants in registers.
+   */
+
+  /// CHECK-START-ARM: long Main.subLongConstants(long) disassembly (after)
+  /// CHECK:     <<Arg:j\d+>>       ParameterValue
+  /// CHECK-DAG: <<ConstA:j\d+>>    LongConstant 4486007727657233
+  /// CHECK-DAG: <<ConstB:j\d+>>    LongConstant 4486011735248896
+  /// CHECK-DAG: <<ConstC:j\d+>>    LongConstant -1071856711330889728
+  /// CHECK-DAG: <<ConstD:j\d+>>    LongConstant 17587891077120
+  /// CHECK-DAG: <<ConstE:j\d+>>    LongConstant -8808977924096
+  /// CHECK-DAG: <<ConstF:j\d+>>    LongConstant 17587891077121
+  /// CHECK-DAG: <<ConstG:j\d+>>    LongConstant 4095
+  /// CHECK:                        Sub [<<Arg>>,<<ConstA>>]
+  /// CHECK-NEXT:                   subs r{{\d+}}, r{{\d+}}, #286331153
+  /// CHECK-NEXT:                   sbc r{{\d+}}, r{{\d+}}, #1044480
+  /// CHECK:                        Sub [<<Arg>>,<<ConstB>>]
+  /// CHECK-NEXT:                   adds r{{\d+}}, r{{\d+}}, #1044480
+  /// CHECK-NEXT:                   sbc r{{\d+}}, r{{\d+}}, #1044480
+  /// CHECK:                        Sub [<<Arg>>,<<ConstC>>]
+  /// CHECK-NEXT:                   adds r{{\d+}}, r{{\d+}}, #16711680
+  /// CHECK-NEXT:                   adc r{{\d+}}, r{{\d+}}, #249561088
+  /// CHECK:                        Sub [<<Arg>>,<<ConstD>>]
+  // There may or may not be a MOV here.
+  /// CHECK:                        subw r{{\d+}}, r{{\d+}}, #4095
+  /// CHECK:                        Sub [<<Arg>>,<<ConstE>>]
+  // There may or may not be a MOV here.
+  /// CHECK:                        addw r{{\d+}}, r{{\d+}}, #2051
+  /// CHECK:                        Sub [<<Arg>>,<<ConstF>>]
+  /// CHECK-NEXT:                   subs{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  /// CHECK-NEXT:                   sbc{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  /// CHECK:                        Sub [<<Arg>>,<<ConstG>>]
+  /// CHECK-NEXT:                   subs{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  /// CHECK-NEXT:                   sbc{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+
+  public static long subLongConstants(long arg) {
+    return
+        // Modified immediates.
+        (arg - 0x000ff00011111111L) ^  // 4486007727657233
+        // Modified immediates high and -low.
+        (arg - 0x000ff000fff01000L) ^  // 4486011735248896
+        // Modified immediates ~high and -low.
+        (arg - 0xf11fffffff010000L) ^  // -1071856711330889728
+        // Low word 0 (no carry), high is imm12.
+        (arg - 0x00000fff00000000L) ^  // 17587891077120
+        // Low word 0 (no carry), -high is imm12.
+        (arg - 0xfffff7fd00000000L) ^  // -8808977924096
+        // Cannot embed imm12 in ADC/SBC for high word.
+        (arg - 0x00000fff00000001L) ^  // 17587891077121
+        // Cannot embed imm12 in ADDS/SUBS for low word (need to set flags).
+        (arg - 0x0000000000000fffL) ^  // 4095
+        arg;
+  }
+
   public static void main(String[] args) {
     int arg = 0x87654321;
     assertIntEquals(and255(arg), 0x21);
@@ -522,7 +649,7 @@
     assertLongEquals(xor0xfffffff00000000f(longArg), 0xedcba9888765432eL);
     assertLongEquals(xor0xf00000000000000f(longArg), 0xe23456788765432eL);
 
-    assertLongEquals(14, addM1(7));
+    assertLongEquals(14L, addM1(7));
 
     assertLongEquals(shl1(longArg), 0x2468acf10eca8642L);
     assertLongEquals(shl2(longArg), 0x48d159e21d950c84L);
@@ -562,5 +689,30 @@
     assertLongEquals(ushr32(~longArg), 0x00000000edcba987L);
     assertLongEquals(ushr33(~longArg), 0x0000000076e5d4c3L);
     assertLongEquals(ushr63(~longArg), 0x0000000000000001L);
+
+    // Test -1, 0, +1 and arbitrary constants just before and after overflow
+    // on low word in subexpressions of addLongConstants()/subLongConstants(),
+    // so that we check that we carry the overflow correctly to the high word.
+    // For example
+    //    0x111eeeeeeee+0x000ff00011111111 = 0x000ff111ffffffff (carry=0),
+    //    0x111eeeeeeef+0x000ff00011111111 = 0x000ff11200000000 (carry=1).
+    assertLongEquals(0xf11ff7fdee1e1111L, addLongConstants(0xffffffffffffffffL));
+    assertLongEquals(0xee0080211e00eefL, addLongConstants(0x0L));
+    assertLongEquals(0xee0080211e01111L, addLongConstants(0x1L));
+    assertLongEquals(0xedff81c12201113L, addLongConstants(0x111eeeeeeeeL));
+    assertLongEquals(0xedff81feddfeef1L, addLongConstants(0x111eeeeeeefL));
+    assertLongEquals(0xedff83e11c1f111L, addLongConstants(0x222000fefffL));
+    assertLongEquals(0xedff83fee3e0eefL, addLongConstants(0x222000ff000L));
+    assertLongEquals(0xedff805edfe1111L, addLongConstants(0x33300feffffL));
+    assertLongEquals(0xedff80412000eefL, addLongConstants(0x33300ff0000L));
+    assertLongEquals(0xee0080211e00eefL, subLongConstants(0xffffffffffffffffL));
+    assertLongEquals(0xf11ff7fdee1e1111L, subLongConstants(0x0L));
+    assertLongEquals(0xf11ff7fc11e1eef3L, subLongConstants(0x1L));
+    assertLongEquals(0xee0080412201113L, subLongConstants(0x44411111111L));
+    assertLongEquals(0xee0080412201111L, subLongConstants(0x44411111112L));
+    assertLongEquals(0xee0080e11c1f111L, subLongConstants(0x555fff01000L));
+    assertLongEquals(0xee0080e11c1eef3L, subLongConstants(0x555fff01001L));
+    assertLongEquals(0xee0080dedfe1111L, subLongConstants(0x666ff010000L));
+    assertLongEquals(0xee0080dedffeef3L, subLongConstants(0x666ff010001L));
   }
 }
diff --git a/test/540-checker-rtp-bug/src/Main.java b/test/540-checker-rtp-bug/src/Main.java
index 17b11db..19b7fb7 100644
--- a/test/540-checker-rtp-bug/src/Main.java
+++ b/test/540-checker-rtp-bug/src/Main.java
@@ -48,7 +48,7 @@
   /// CHECK:    <<Class:l\d+>>   LoadClass
   /// CHECK:                     InstanceOf [<<Phi>>,<<Class>>]
 
-  /// CHECK-START: void Main.testKeepInstanceOf(java.lang.Object, boolean) dead_code_elimination (after)
+  /// CHECK-START: void Main.testKeepInstanceOf(java.lang.Object, boolean) dead_code_elimination$initial (after)
   /// CHECK:    <<Phi:l\d+>>     Phi
   /// CHECK:    <<Class:l\d+>>   LoadClass
   /// CHECK:                     InstanceOf [<<Phi>>,<<Class>>]
diff --git a/test/542-unresolved-access-check/src/Main.java b/test/542-unresolved-access-check/src/Main.java
index 2bdf47f..62bfea1 100644
--- a/test/542-unresolved-access-check/src/Main.java
+++ b/test/542-unresolved-access-check/src/Main.java
@@ -58,7 +58,7 @@
             "loadClassBinaryName", String.class, ClassLoader.class, List.class);
 
         if (dex != null) {
-          Class clazz = (Class)method.invoke(dex, className, this, null);
+          Class<?> clazz = (Class<?>)method.invoke(dex, className, this, null);
           if (clazz != null) {
             return clazz;
           }
@@ -72,7 +72,7 @@
 public class Main {
     public static void main(String[] args) throws Exception {
       MyClassLoader o = new MyClassLoader();
-      Class foo = o.loadClass("LoadedByMyClassLoader");
+      Class<?> foo = o.loadClass("LoadedByMyClassLoader");
       Method m = foo.getDeclaredMethod("main");
       m.invoke(null);
     }
diff --git a/test/543-checker-dce-trycatch/smali/TestCase.smali b/test/543-checker-dce-trycatch/smali/TestCase.smali
index 9f9916d..5557c7b 100644
--- a/test/543-checker-dce-trycatch/smali/TestCase.smali
+++ b/test/543-checker-dce-trycatch/smali/TestCase.smali
@@ -26,18 +26,18 @@
 # Test a case when one entering TryBoundary is dead but the rest of the try
 # block remains live.
 
-## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (before)
 ## CHECK: Add
 
-## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (before)
 ## CHECK:     TryBoundary kind:entry
 ## CHECK:     TryBoundary kind:entry
 ## CHECK-NOT: TryBoundary kind:entry
 
-## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (after)
 ## CHECK-NOT: Add
 
-## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (after)
 ## CHECK:     TryBoundary kind:entry
 ## CHECK-NOT: TryBoundary kind:entry
 
@@ -71,18 +71,18 @@
 # Test a case when one exiting TryBoundary is dead but the rest of the try
 # block remains live.
 
-## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (before)
 ## CHECK: Add
 
-## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (before)
 ## CHECK:     TryBoundary kind:exit
 ## CHECK:     TryBoundary kind:exit
 ## CHECK-NOT: TryBoundary kind:exit
 
-## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (after)
 ## CHECK-NOT: Add
 
-## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (after)
 ## CHECK:     TryBoundary kind:exit
 ## CHECK-NOT: TryBoundary kind:exit
 
@@ -117,21 +117,21 @@
 # Test that a catch block remains live and consistent if some of try blocks
 # throwing into it are removed.
 
-## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (before)
 ## CHECK:     TryBoundary kind:entry
 ## CHECK:     TryBoundary kind:entry
 ## CHECK-NOT: TryBoundary kind:entry
 
-## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (before)
 ## CHECK:     TryBoundary kind:exit
 ## CHECK:     TryBoundary kind:exit
 ## CHECK-NOT: TryBoundary kind:exit
 
-## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (after)
 ## CHECK:     TryBoundary kind:entry
 ## CHECK-NOT: TryBoundary kind:entry
 
-## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (after)
 ## CHECK:     TryBoundary kind:exit
 ## CHECK-NOT: TryBoundary kind:exit
 
@@ -203,7 +203,7 @@
 
 # Test that DCE removes catch phi uses of instructions defined in dead try blocks.
 
-## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination$final (before)
 ## CHECK-DAG:     <<Arg0:i\d+>>      ParameterValue
 ## CHECK-DAG:     <<Arg1:i\d+>>      ParameterValue
 ## CHECK-DAG:     <<Const0xa:i\d+>>  IntConstant 10
@@ -220,7 +220,7 @@
 ## CHECK-DAG:                        Phi [<<Add>>,<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
 ## CHECK-DAG:                        Phi [<<Select>>,<<Const0x10>>,<<Const0x11>>] reg:3 is_catch_phi:true
 
-## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination$final (after)
 ## CHECK-DAG:     <<Const0xb:i\d+>>  IntConstant 11
 ## CHECK-DAG:     <<Const0xc:i\d+>>  IntConstant 12
 ## CHECK-DAG:     <<Const0xd:i\d+>>  IntConstant 13
@@ -277,7 +277,7 @@
 # Test that DCE does not remove catch phi uses of instructions defined outside
 # dead try blocks.
 
-## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination$final (before)
 ## CHECK-DAG:     <<Const0xa:i\d+>> IntConstant 10
 ## CHECK-DAG:     <<Const0xb:i\d+>> IntConstant 11
 ## CHECK-DAG:     <<Const0xc:i\d+>> IntConstant 12
@@ -287,7 +287,7 @@
 ## CHECK-DAG:                       Phi [<<Const0xa>>,<<Const0xb>>,<<Const0xd>>] reg:1 is_catch_phi:true
 ## CHECK-DAG:                       Phi [<<Const0xf>>,<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
 
-## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination$final (after)
 ## CHECK-DAG:     <<Const0xa:i\d+>> IntConstant 10
 ## CHECK-DAG:     <<Const0xb:i\d+>> IntConstant 11
 ## CHECK-DAG:     <<Const0xc:i\d+>> IntConstant 12
diff --git a/test/543-checker-dce-trycatch/src/Main.java b/test/543-checker-dce-trycatch/src/Main.java
index 6e73d0d..19587e7 100644
--- a/test/543-checker-dce-trycatch/src/Main.java
+++ b/test/543-checker-dce-trycatch/src/Main.java
@@ -35,10 +35,10 @@
   // where TryBoundary still has exception handler successors after having removed
   // some already.
 
-  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination_final (after)
+  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination$final (after)
   /// CHECK-NOT: TryBoundary
 
-  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination_final (after)
+  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination$final (after)
   /// CHECK: begin_block
   /// CHECK: begin_block
   /// CHECK: begin_block
diff --git a/test/545-tracing-and-jit/src/Main.java b/test/545-tracing-and-jit/src/Main.java
index a2d51d5..f365c6e 100644
--- a/test/545-tracing-and-jit/src/Main.java
+++ b/test/545-tracing-and-jit/src/Main.java
@@ -226,7 +226,7 @@
         private static final Method getMethodTracingModeMethod;
         static {
             try {
-                Class c = Class.forName("dalvik.system.VMDebug");
+                Class<?> c = Class.forName("dalvik.system.VMDebug");
                 startMethodTracingMethod = c.getDeclaredMethod("startMethodTracing", String.class,
                         Integer.TYPE, Integer.TYPE, Boolean.TYPE, Integer.TYPE);
                 stopMethodTracingMethod = c.getDeclaredMethod("stopMethodTracing");
diff --git a/test/552-checker-primitive-typeprop/src/Main.java b/test/552-checker-primitive-typeprop/src/Main.java
index fe2343e..1296800 100644
--- a/test/552-checker-primitive-typeprop/src/Main.java
+++ b/test/552-checker-primitive-typeprop/src/Main.java
@@ -29,15 +29,15 @@
 
   public static void main(String[] args) throws Exception {
     Class<?> c = Class.forName("SsaBuilder");
-    Method m = c.getMethod("environmentPhi", new Class[] { boolean.class, int[].class });
+    Method m = c.getMethod("environmentPhi", boolean.class, int[].class);
 
     int[] array = new int[3];
     int result;
 
-    result = (Integer) m.invoke(null, new Object[] { true, array } );
+    result = (Integer) m.invoke(null, true, array);
     assertEquals(2, result);
 
-    result = (Integer) m.invoke(null, new Object[] { false, array } );
+    result = (Integer) m.invoke(null, false, array);
     assertEquals(0, result);
   }
 }
diff --git a/test/557-checker-instruction-simplifier-ror/src/Main.java b/test/557-checker-instruction-simplifier-ror/src/Main.java
index 6d8b74d..0e3d145 100644
--- a/test/557-checker-instruction-simplifier-ror/src/Main.java
+++ b/test/557-checker-instruction-simplifier-ror/src/Main.java
@@ -175,7 +175,7 @@
 
   //  (i >>> #distance) | (i << #-distance)
 
-  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier_after_bce (before)
+  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier$after_bce (before)
   /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
   /// CHECK:          <<Const2:i\d+>>       IntConstant 2
   /// CHECK:          <<ConstNeg2:i\d+>>    IntConstant -2
@@ -184,13 +184,13 @@
   /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
   /// CHECK:                                Return [<<Or>>]
 
-  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier$after_bce (after)
   /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
   /// CHECK:          <<Const2:i\d+>>       IntConstant 2
   /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Const2>>]
   /// CHECK:                                Return [<<Ror>>]
 
-  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:      UShr
   /// CHECK-NOT:      Shl
   public static int ror_int_constant_c_negc(int value) {
diff --git a/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
index 7ce60a3..5d4aa56 100644
--- a/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
+++ b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -28,7 +28,7 @@
 #  exit    \-    \
 #           other_loop_entry
 #
-## CHECK-START: int IrreducibleLoop.simpleLoop(int) dead_code_elimination (before)
+## CHECK-START: int IrreducibleLoop.simpleLoop(int) dead_code_elimination$initial (before)
 ## CHECK: irreducible:true
 .method public static simpleLoop(I)I
    .registers 2
@@ -65,7 +65,7 @@
 #                other_loop_entry
 #              set 30 in p1:myField
 #
-## CHECK-START: int IrreducibleLoop.lse(int, Main) dead_code_elimination (after)
+## CHECK-START: int IrreducibleLoop.lse(int, Main) dead_code_elimination$initial (after)
 ## CHECK: irreducible:true
 #
 ## CHECK-START: int IrreducibleLoop.lse(int, Main) load_store_elimination (after)
@@ -101,10 +101,10 @@
 #  exit    \-    \
 #           other_loop_entry
 #
-## CHECK-START: int IrreducibleLoop.dce(int) dead_code_elimination (before)
+## CHECK-START: int IrreducibleLoop.dce(int) dead_code_elimination$initial (before)
 ## CHECK: irreducible:true
 
-## CHECK-START: int IrreducibleLoop.dce(int) dead_code_elimination (after)
+## CHECK-START: int IrreducibleLoop.dce(int) dead_code_elimination$initial (after)
 ## CHECK: irreducible:true
 .method public static dce(I)I
    .registers 3
diff --git a/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali
index b82ed92..75344f7 100644
--- a/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali
+++ b/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -16,7 +16,7 @@
 
 .super Ljava/lang/Object;
 
-## CHECK-START-X86: int IrreducibleLoop.simpleLoop(int) dead_code_elimination (before)
+## CHECK-START-X86: int IrreducibleLoop.simpleLoop(int) dead_code_elimination$initial (before)
 ## CHECK-DAG: <<Method:(i|j)\d+>> CurrentMethod
 ## CHECK-DAG: <<Constant:i\d+>>   IntConstant 42
 ## CHECK-DAG:                     InvokeStaticOrDirect [<<Constant>>,<<Method>>] loop:{{B\d+}} irreducible:true
diff --git a/test/565-checker-doublenegbitwise/src/Main.java b/test/565-checker-doublenegbitwise/src/Main.java
index e426b75..811c280 100644
--- a/test/565-checker-doublenegbitwise/src/Main.java
+++ b/test/565-checker-doublenegbitwise/src/Main.java
@@ -70,7 +70,7 @@
    * same pass.
    */
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier_after_bce (before)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier$after_bce (before)
   /// CHECK:       <<P1:z\d+>>          ParameterValue
   /// CHECK:       <<P2:z\d+>>          ParameterValue
   /// CHECK-DAG:   <<Const0:i\d+>>      IntConstant 0
@@ -80,18 +80,18 @@
   /// CHECK:       <<And:i\d+>>         And [<<Select2>>,<<Select1>>]
   /// CHECK:                            Return [<<And>>]
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier$after_bce (after)
   /// CHECK:       <<Cond1:z\d+>>       ParameterValue
   /// CHECK:       <<Cond2:z\d+>>       ParameterValue
   /// CHECK:       <<Or:i\d+>>          Or [<<Cond2>>,<<Cond1>>]
   /// CHECK:       <<BooleanNot:z\d+>>  BooleanNot [<<Or>>]
   /// CHECK:                            Return [<<BooleanNot>>]
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier$after_bce (after)
   /// CHECK:                            BooleanNot
   /// CHECK-NOT:                        BooleanNot
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:                        And
 
   public static boolean $opt$noinline$booleanAndToOr(boolean a, boolean b) {
@@ -138,7 +138,7 @@
    * same pass.
    */
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier_after_bce (before)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier$after_bce (before)
   /// CHECK:       <<P1:z\d+>>          ParameterValue
   /// CHECK:       <<P2:z\d+>>          ParameterValue
   /// CHECK-DAG:   <<Const0:i\d+>>      IntConstant 0
@@ -148,18 +148,18 @@
   /// CHECK:       <<Or:i\d+>>          Or [<<Select2>>,<<Select1>>]
   /// CHECK:                            Return [<<Or>>]
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier$after_bce (after)
   /// CHECK:       <<Cond1:z\d+>>       ParameterValue
   /// CHECK:       <<Cond2:z\d+>>       ParameterValue
   /// CHECK:       <<And:i\d+>>         And [<<Cond2>>,<<Cond1>>]
   /// CHECK:       <<BooleanNot:z\d+>>  BooleanNot [<<And>>]
   /// CHECK:                            Return [<<BooleanNot>>]
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier$after_bce (after)
   /// CHECK:                            BooleanNot
   /// CHECK-NOT:                        BooleanNot
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:                        Or
 
   public static boolean $opt$noinline$booleanOrToAnd(boolean a, boolean b) {
@@ -246,7 +246,7 @@
    * same pass.
    */
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanNotXorToXor(boolean, boolean) instruction_simplifier_after_bce (before)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanNotXorToXor(boolean, boolean) instruction_simplifier$after_bce (before)
   /// CHECK:       <<P1:z\d+>>          ParameterValue
   /// CHECK:       <<P2:z\d+>>          ParameterValue
   /// CHECK-DAG:   <<Const0:i\d+>>      IntConstant 0
@@ -256,13 +256,13 @@
   /// CHECK:       <<Xor:i\d+>>         Xor [<<Select2>>,<<Select1>>]
   /// CHECK:                            Return [<<Xor>>]
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanNotXorToXor(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanNotXorToXor(boolean, boolean) instruction_simplifier$after_bce (after)
   /// CHECK:       <<Cond1:z\d+>>       ParameterValue
   /// CHECK:       <<Cond2:z\d+>>       ParameterValue
   /// CHECK:       <<Xor:i\d+>>         Xor [<<Cond2>>,<<Cond1>>]
   /// CHECK:                            Return [<<Xor>>]
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanNotXorToXor(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanNotXorToXor(boolean, boolean) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:                        BooleanNot
 
   public static boolean $opt$noinline$booleanNotXorToXor(boolean a, boolean b) {
diff --git a/test/565-checker-rotate/src/Main.java b/test/565-checker-rotate/src/Main.java
index aadb597..eb0e868 100644
--- a/test/565-checker-rotate/src/Main.java
+++ b/test/565-checker-rotate/src/Main.java
@@ -52,14 +52,14 @@
   /// CHECK-START: int Main.rotateLeftBoolean(boolean, int) select_generator (after)
   /// CHECK-NOT:                      Phi
 
-  /// CHECK-START: int Main.rotateLeftBoolean(boolean, int) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.rotateLeftBoolean(boolean, int) instruction_simplifier$after_bce (after)
   /// CHECK:         <<ArgVal:z\d+>>  ParameterValue
   /// CHECK:         <<ArgDist:i\d+>> ParameterValue
   /// CHECK-DAG:     <<NegDist:i\d+>> Neg [<<ArgDist>>]
   /// CHECK-DAG:     <<Result:i\d+>>  Ror [<<ArgVal>>,<<NegDist>>]
   /// CHECK-DAG:                      Return [<<Result>>]
 
-  /// CHECK-START: int Main.rotateLeftBoolean(boolean, int) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.rotateLeftBoolean(boolean, int) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:                      Select
 
   private static int rotateLeftBoolean(boolean value, int distance) {
@@ -206,13 +206,13 @@
   /// CHECK-START: int Main.rotateRightBoolean(boolean, int) select_generator (after)
   /// CHECK-NOT:                     Phi
 
-  /// CHECK-START: int Main.rotateRightBoolean(boolean, int) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.rotateRightBoolean(boolean, int) instruction_simplifier$after_bce (after)
   /// CHECK:         <<ArgVal:z\d+>>  ParameterValue
   /// CHECK:         <<ArgDist:i\d+>> ParameterValue
   /// CHECK-DAG:     <<Result:i\d+>>  Ror [<<ArgVal>>,<<ArgDist>>]
   /// CHECK-DAG:                      Return [<<Result>>]
 
-  /// CHECK-START: int Main.rotateRightBoolean(boolean, int) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.rotateRightBoolean(boolean, int) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:                     Select
 
   private static int rotateRightBoolean(boolean value, int distance) {
diff --git a/test/566-checker-signum/src/Main.java b/test/566-checker-signum/src/Main.java
index 5f2cf3d..7fc9e84 100644
--- a/test/566-checker-signum/src/Main.java
+++ b/test/566-checker-signum/src/Main.java
@@ -45,13 +45,13 @@
   /// CHECK-START: int Main.signBoolean(boolean) select_generator (after)
   /// CHECK-NOT:                     Phi
 
-  /// CHECK-START: int Main.signBoolean(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.signBoolean(boolean) instruction_simplifier$after_bce (after)
   /// CHECK-DAG:     <<Arg:z\d+>>    ParameterValue
   /// CHECK-DAG:     <<Zero:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Result:i\d+>> Compare [<<Arg>>,<<Zero>>]
   /// CHECK-DAG:                     Return [<<Result>>]
 
-  /// CHECK-START: int Main.signBoolean(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.signBoolean(boolean) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:                     Select
 
   private static int signBoolean(boolean x) {
diff --git a/test/566-polymorphic-inlining/src/Main.java b/test/566-polymorphic-inlining/src/Main.java
index 53852a4..793b85f 100644
--- a/test/566-polymorphic-inlining/src/Main.java
+++ b/test/566-polymorphic-inlining/src/Main.java
@@ -15,9 +15,9 @@
  */
 
 interface Itf {
-  public Class sameInvokeInterface();
-  public Class sameInvokeInterface2();
-  public Class sameInvokeInterface3();
+  public Class<?> sameInvokeInterface();
+  public Class<?> sameInvokeInterface2();
+  public Class<?> sameInvokeInterface3();
 }
 
 public class Main implements Itf {
@@ -81,31 +81,31 @@
     assertEquals(20001, counter);
   }
 
-  public Class sameInvokeVirtual() {
+  public Class<?> sameInvokeVirtual() {
     field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo.
     return Main.class;
   }
 
-  public Class sameInvokeInterface() {
+  public Class<?> sameInvokeInterface() {
     field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo.
     return Itf.class;
   }
 
-  public Class sameInvokeInterface2() {
+  public Class<?> sameInvokeInterface2() {
     field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo.
     return Itf.class;
   }
 
-  public Class sameInvokeInterface3() {
+  public Class<?> sameInvokeInterface3() {
     field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo.
     return Itf.class;
   }
 
-  public static Class testInvokeInterface(Itf i) {
+  public static Class<?> testInvokeInterface(Itf i) {
     return i.sameInvokeInterface();
   }
 
-  public static Class testInvokeInterface2(Itf i) {
+  public static Class<?> testInvokeInterface2(Itf i) {
     // Make three interface calls that will do a ClassTableGet to ensure bogus code
     // generation of ClassTableGet will crash.
     i.sameInvokeInterface();
@@ -113,7 +113,7 @@
     return i.sameInvokeInterface3();
   }
 
-  public static Class testInvokeVirtual(Main m) {
+  public static Class<?> testInvokeVirtual(Main m) {
     return m.sameInvokeVirtual();
   }
 
@@ -139,18 +139,18 @@
 }
 
 class OtherSubclass extends Main {
-  public Class sameInvokeVirtual() {
+  public Class<?> sameInvokeVirtual() {
     return OtherSubclass.class;
   }
 
-  public Class sameInvokeInterface() {
+  public Class<?> sameInvokeInterface() {
     return OtherSubclass.class;
   }
 
-  public Class sameInvokeInterface2() {
+  public Class<?> sameInvokeInterface2() {
     return null;
   }
-  public Class sameInvokeInterface3() {
+  public Class<?> sameInvokeInterface3() {
     return null;
   }
 }
diff --git a/test/567-checker-compare/src/Main.java b/test/567-checker-compare/src/Main.java
index 8587950..a05bb60 100644
--- a/test/567-checker-compare/src/Main.java
+++ b/test/567-checker-compare/src/Main.java
@@ -75,13 +75,13 @@
   /// CHECK-START: int Main.compareBooleans(boolean, boolean) select_generator (after)
   /// CHECK-NOT:                     Phi
 
-  /// CHECK-START: int Main.compareBooleans(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.compareBooleans(boolean, boolean) instruction_simplifier$after_bce (after)
   /// CHECK:         <<ArgX:z\d+>>   ParameterValue
   /// CHECK:         <<ArgY:z\d+>>   ParameterValue
   /// CHECK-DAG:     <<Result:i\d+>> Compare [<<ArgX>>,<<ArgY>>]
   /// CHECK-DAG:                     Return [<<Result>>]
 
-  /// CHECK-START: int Main.compareBooleans(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.compareBooleans(boolean, boolean) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:                     Select
 
   private static int compareBooleans(boolean x, boolean y) {
diff --git a/test/570-checker-osr/smali/Osr.smali b/test/570-checker-osr/smali/Osr.smali
index 869c7c3..6592b7b 100644
--- a/test/570-checker-osr/smali/Osr.smali
+++ b/test/570-checker-osr/smali/Osr.smali
@@ -19,7 +19,7 @@
 # Check that blocks only havig nops are not merged when they are loop headers.
 # This ensures we can do on-stack replacement for branches to those nop blocks.
 
-## CHECK-START: int Osr.simpleLoop(int, int) dead_code_elimination_final (after)
+## CHECK-START: int Osr.simpleLoop(int, int) dead_code_elimination$final (after)
 ## CHECK-DAG:                     SuspendCheck loop:<<OuterLoop:B\d+>> outer_loop:none
 ## CHECK-DAG:                     SuspendCheck loop:{{B\d+}} outer_loop:<<OuterLoop>>
 .method public static simpleLoop(II)I
diff --git a/test/570-checker-osr/src/Main.java b/test/570-checker-osr/src/Main.java
index 15c232d..8af3894 100644
--- a/test/570-checker-osr/src/Main.java
+++ b/test/570-checker-osr/src/Main.java
@@ -129,7 +129,7 @@
     DeoptimizationController.startDeoptimization();
   }
 
-  public static Class $noinline$inlineCache(Main m, boolean isSecondInvocation) {
+  public static Class<?> $noinline$inlineCache(Main m, boolean isSecondInvocation) {
     // If we are running in non-JIT mode, or were unlucky enough to get this method
     // already JITted, just return the expected value.
     if (!isInInterpreter("$noinline$inlineCache")) {
@@ -159,7 +159,7 @@
     return other.returnClass();
   }
 
-  public static Class $noinline$inlineCache2(Main m, boolean isSecondInvocation) {
+  public static Class<?> $noinline$inlineCache2(Main m, boolean isSecondInvocation) {
     // If we are running in non-JIT mode, or were unlucky enough to get this method
     // already JITted, just return the expected value.
     if (!isInInterpreter("$noinline$inlineCache2")) {
@@ -188,7 +188,7 @@
     return (other == null) ? null : other.returnClass();
   }
 
-  public static Class $noinline$inlineCache3(Main m, boolean isSecondInvocation) {
+  public static Class<?> $noinline$inlineCache3(Main m, boolean isSecondInvocation) {
     // If we are running in non-JIT mode, or were unlucky enough to get this method
     // already JITted, just return the expected value.
     if (!isInInterpreter("$noinline$inlineCache3")) {
@@ -229,7 +229,7 @@
     return null;
   }
 
-  public Class returnClass() {
+  public Class<?> returnClass() {
     return Main.class;
   }
 
@@ -305,7 +305,7 @@
 }
 
 class SubMain extends Main {
-  public Class returnClass() {
+  public Class<?> returnClass() {
     return SubMain.class;
   }
 
diff --git a/test/576-polymorphic-inlining/src/Main.java b/test/576-polymorphic-inlining/src/Main.java
index d8d09af..5763d89 100644
--- a/test/576-polymorphic-inlining/src/Main.java
+++ b/test/576-polymorphic-inlining/src/Main.java
@@ -65,11 +65,11 @@
   public void willOnlyInlineForMainVoid() {
   }
 
-  public Class willInlineWithReturnValue() {
+  public Class<?> willInlineWithReturnValue() {
     return Main.class;
   }
 
-  public Class willOnlyInlineForMainWithReturnValue() {
+  public Class<?> willOnlyInlineForMainWithReturnValue() {
     return Main.class;
   }
   public static boolean doThrow;
@@ -83,21 +83,21 @@
   public void willInlineVoid() {
   }
 
-  public Class willInlineWithReturnValue() {
+  public Class<?> willInlineWithReturnValue() {
     return SubMain.class;
   }
 
-  public Class willOnlyInlineForMainWithReturnValue() {
+  public Class<?> willOnlyInlineForMainWithReturnValue() {
     return SubMain.class;
   }
 }
 
 class SubSubMain extends SubMain {
-  public Class willInlineWithReturnValue() {
+  public Class<?> willInlineWithReturnValue() {
     return SubSubMain.class;
   }
 
-  public Class willOnlyInlineForMainWithReturnValue() {
+  public Class<?> willOnlyInlineForMainWithReturnValue() {
     return SubSubMain.class;
   }
 }
diff --git a/test/577-profile-foreign-dex/src/Main.java b/test/577-profile-foreign-dex/src/Main.java
index 0cd85b5..ed7a625 100644
--- a/test/577-profile-foreign-dex/src/Main.java
+++ b/test/577-profile-foreign-dex/src/Main.java
@@ -111,11 +111,11 @@
   }
 
   private static void loadDexFile(String dexFile) throws Exception {
-    Class pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
+    Class<?> pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
     if (pathClassLoader == null) {
         throw new RuntimeException("Couldn't find path class loader class");
     }
-    Constructor constructor =
+    Constructor<?> constructor =
         pathClassLoader.getDeclaredConstructor(String.class, ClassLoader.class);
     constructor.newInstance(
             dexFile, ClassLoader.getSystemClassLoader());
@@ -125,7 +125,7 @@
     private static final Method registerAppInfoMethod;
     static {
       try {
-        Class c = Class.forName("dalvik.system.VMRuntime");
+        Class<?> c = Class.forName("dalvik.system.VMRuntime");
         registerAppInfoMethod = c.getDeclaredMethod("registerAppInfo",
             String.class, String.class, String[].class, String.class);
       } catch (Exception e) {
diff --git a/test/580-checker-round/src/Main.java b/test/580-checker-round/src/Main.java
index 9e248ef..83bc55c 100644
--- a/test/580-checker-round/src/Main.java
+++ b/test/580-checker-round/src/Main.java
@@ -36,7 +36,8 @@
     expectEquals32(-2, round32(-1.51f));
     expectEquals32(-1, round32(-1.2f));
     expectEquals32(-1, round32(-1.0f));
-    expectEquals32(-1, round32(-0.51f));
+    expectEquals32(-1, round32(-0.5000001f));
+    expectEquals32(0, round32(-0.5f));
     expectEquals32(0, round32(-0.2f));
     expectEquals32(0, round32(-0.0f));
     expectEquals32(0, round32(+0.0f));
@@ -47,11 +48,23 @@
     expectEquals32(2, round32(+1.5f));
     expectEquals32(2147483647, round32(Float.POSITIVE_INFINITY));
 
+    // Near minint.
+    expectEquals32(-2147483648, round32(Math.nextAfter(-2147483648.0f, Float.NEGATIVE_INFINITY)));
+    expectEquals32(-2147483648, round32(-2147483648.0f));
+    expectEquals32(-2147483520, round32(Math.nextAfter(-2147483648.0f, Float.POSITIVE_INFINITY)));
+
+    // Near maxint.
+    expectEquals32(2147483520, round32(Math.nextAfter(2147483648.0f, Float.NEGATIVE_INFINITY)));
+    expectEquals32(2147483647, round32(2147483648.0f));
+    expectEquals32(2147483647, round32(Math.nextAfter(2147483648.0f, Float.POSITIVE_INFINITY)));
+
     // Some others.
     for (int i = -100; i <= 100; ++i) {
       expectEquals32(i - 1, round32((float) i - 0.51f));
+      expectEquals32(i, round32((float) i - 0.5f));
       expectEquals32(i, round32((float) i));
       expectEquals32(i + 1, round32((float) i + 0.5f));
+      expectEquals32(i + 1, round32((float) i + 0.51f));
     }
     for (float f = -1.5f; f <= -1.499f; f = Math.nextAfter(f, Float.POSITIVE_INFINITY)) {
       expectEquals32(-1, round32(f));
@@ -61,8 +74,10 @@
     float[] fvals = {
       -16777215.5f,
       -16777215.0f,
-      -0.4999f,
-      0.4999f,
+      -0.49999998f,
+      -0.4999999701976776123046875f,
+      0.4999999701976776123046875f,
+      0.49999998f,
       16777215.0f,
       16777215.5f
     };
@@ -71,6 +86,8 @@
       -16777215,
       0,
       0,
+      0,
+      0,
       16777215,
       16777216
     };
@@ -98,7 +115,8 @@
     expectEquals64(-2L, round64(-1.51d));
     expectEquals64(-1L, round64(-1.2d));
     expectEquals64(-1L, round64(-1.0d));
-    expectEquals64(-1L, round64(-0.51d));
+    expectEquals64(-1L, round64(-0.5000001f));
+    expectEquals64(0L, round64(-0.5d));
     expectEquals64(0L, round64(-0.2d));
     expectEquals64(0L, round64(-0.0d));
     expectEquals64(0L, round64(+0.0d));
@@ -109,11 +127,27 @@
     expectEquals64(2L, round64(+1.5d));
     expectEquals64(9223372036854775807L, round64(Double.POSITIVE_INFINITY));
 
+    // Near minlong.
+    expectEquals64(-9223372036854775808L,
+        round64(Math.nextAfter(-9223372036854775808.0, Double.NEGATIVE_INFINITY)));
+    expectEquals64(-9223372036854775808L, round64(-9223372036854775808.0));
+    expectEquals64(-9223372036854774784L,
+        round64(Math.nextAfter(-9223372036854775809.0, Double.POSITIVE_INFINITY)));
+
+    // Near maxlong.
+    expectEquals64(9223372036854774784L,
+        round64(Math.nextAfter(9223372036854775808.0, Double.NEGATIVE_INFINITY)));
+    expectEquals64(9223372036854775807L, round64(9223372036854775808.0));
+    expectEquals64(9223372036854775807L,
+        round64(Math.nextAfter(9223372036854775808.0, Double.POSITIVE_INFINITY)));
+
     // Some others.
     for (long l = -100; l <= 100; ++l) {
       expectEquals64(l - 1, round64((double) l - 0.51d));
+      expectEquals64(l, round64((double) l - 0.5d));
+      expectEquals64(l, round64((double) l));
       expectEquals64(l + 1, round64((double) l + 0.5d));
-      expectEquals64(l + 1, round64((double) l + 0.5d));
+      expectEquals64(l + 1, round64((double) l + 0.51d));
     }
     for (double d = -1.5d; d <= -1.49999999999d; d = Math.nextAfter(d, Double.POSITIVE_INFINITY)) {
       expectEquals64(-1L, round64(d));
@@ -123,8 +157,10 @@
     double[] dvals = {
       -9007199254740991.5d,
       -9007199254740991.0d,
+      -0.49999999999999997d,
       -0.49999999999999994d,
       0.49999999999999994d,
+      0.49999999999999997d,
       9007199254740991.0d,
       9007199254740991.5d
     };
@@ -133,6 +169,8 @@
       -9007199254740991L,
       0L,
       0L,
+      0L,
+      0L,
       9007199254740991L,
       9007199254740992L
     };
diff --git a/test/588-checker-irreducible-lifetime-hole/smali/IrreducibleLoop.smali b/test/588-checker-irreducible-lifetime-hole/smali/IrreducibleLoop.smali
index 7dbd9da..186f0ab 100644
--- a/test/588-checker-irreducible-lifetime-hole/smali/IrreducibleLoop.smali
+++ b/test/588-checker-irreducible-lifetime-hole/smali/IrreducibleLoop.smali
@@ -16,7 +16,7 @@
 
 .super Ljava/lang/Object;
 
-## CHECK-START-X86: int IrreducibleLoop.simpleLoop1(int) dead_code_elimination (before)
+## CHECK-START-X86: int IrreducibleLoop.simpleLoop1(int) dead_code_elimination$initial (before)
 ## CHECK-DAG: <<Method:(i|j)\d+>> CurrentMethod
 ## CHECK-DAG: <<Constant:i\d+>>   IntConstant 42
 ## CHECK-DAG:                     Goto irreducible:true
@@ -57,7 +57,7 @@
    return v0
 .end method
 
-## CHECK-START-X86: int IrreducibleLoop.simpleLoop2(int) dead_code_elimination (before)
+## CHECK-START-X86: int IrreducibleLoop.simpleLoop2(int) dead_code_elimination$initial (before)
 ## CHECK-DAG: <<Method:(i|j)\d+>> CurrentMethod
 ## CHECK-DAG: <<Constant:i\d+>>   IntConstant 42
 ## CHECK-DAG:                     Goto irreducible:true
diff --git a/test/591-checker-regression-dead-loop/src/Main.java b/test/591-checker-regression-dead-loop/src/Main.java
index 6d9fcf8..19856cf 100644
--- a/test/591-checker-regression-dead-loop/src/Main.java
+++ b/test/591-checker-regression-dead-loop/src/Main.java
@@ -17,7 +17,7 @@
 class Main {
   private static boolean $inline$false() { return false; }
 
-  /// CHECK-START: void Main.main(java.lang.String[]) dead_code_elimination (before)
+  /// CHECK-START: void Main.main(java.lang.String[]) dead_code_elimination$initial (before)
   /// CHECK-DAG:     <<Const0:i\d+>> IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>> IntConstant 1
   /// CHECK-DAG:     <<Phi:i\d+>>    Phi [<<Const0>>,<<Add:i\d+>>] loop:{{B\d+}}
diff --git a/test/593-checker-boolean-to-integral-conv/src/Main.java b/test/593-checker-boolean-to-integral-conv/src/Main.java
index ba65839..b4c91c8 100644
--- a/test/593-checker-boolean-to-integral-conv/src/Main.java
+++ b/test/593-checker-boolean-to-integral-conv/src/Main.java
@@ -46,7 +46,7 @@
   /// CHECK-DAG:     <<IToS:b\d+>>          TypeConversion [<<Sel>>]
   /// CHECK-DAG:                            Return [<<IToS>>]
 
-  /// CHECK-START: byte Main.booleanToByte(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: byte Main.booleanToByte(boolean) instruction_simplifier$after_bce (after)
   /// CHECK:         <<Arg:z\d+>>           ParameterValue
   /// CHECK-DAG:                            Return [<<Arg>>]
 
@@ -72,7 +72,7 @@
   /// CHECK-DAG:     <<IToS:s\d+>>          TypeConversion [<<Sel>>]
   /// CHECK-DAG:                            Return [<<IToS>>]
 
-  /// CHECK-START: short Main.booleanToShort(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: short Main.booleanToShort(boolean) instruction_simplifier$after_bce (after)
   /// CHECK:         <<Arg:z\d+>>           ParameterValue
   /// CHECK-DAG:                            Return [<<Arg>>]
 
@@ -98,7 +98,7 @@
   /// CHECK-DAG:     <<IToC:c\d+>>          TypeConversion [<<Sel>>]
   /// CHECK-DAG:                            Return [<<IToC>>]
 
-  /// CHECK-START: char Main.booleanToChar(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: char Main.booleanToChar(boolean) instruction_simplifier$after_bce (after)
   /// CHECK:         <<Arg:z\d+>>           ParameterValue
   /// CHECK-DAG:                            Return [<<Arg>>]
 
@@ -122,7 +122,7 @@
   /// CHECK-DAG:     <<Sel:i\d+>>           Select [<<Zero>>,<<One>>,<<Arg>>]
   /// CHECK-DAG:                            Return [<<Sel>>]
 
-  /// CHECK-START: int Main.booleanToInt(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.booleanToInt(boolean) instruction_simplifier$after_bce (after)
   /// CHECK:         <<Arg:z\d+>>           ParameterValue
   /// CHECK-DAG:                            Return [<<Arg>>]
 
@@ -148,7 +148,7 @@
   /// CHECK-DAG:     <<IToJ:j\d+>>          TypeConversion [<<Sel>>]
   /// CHECK-DAG:                            Return [<<IToJ>>]
 
-  /// CHECK-START: long Main.booleanToLong(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: long Main.booleanToLong(boolean) instruction_simplifier$after_bce (after)
   /// CHECK:         <<Arg:z\d+>>           ParameterValue
   /// CHECK-DAG:     <<ZToJ:j\d+>>          TypeConversion [<<Arg>>]
   /// CHECK-DAG:                            Return [<<ZToJ>>]
@@ -185,7 +185,7 @@
   /// CHECK-DAG:     <<JToI:i\d+>>          TypeConversion [<<IToJ>>]
   /// CHECK-DAG:                            Return [<<JToI>>]
 
-  /// CHECK-START: int Main.longToIntOfBoolean() instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.longToIntOfBoolean() instruction_simplifier$after_bce (after)
   /// CHECK-DAG:     <<Method:[ij]\d+>>     CurrentMethod
   /// CHECK-DAG:     <<Sget:z\d+>>          StaticFieldGet
   /// CHECK-DAG:                            Return [<<Sget>>]
diff --git a/test/601-method-access/src/Main.java b/test/601-method-access/src/Main.java
index 838080a..9d9e568 100644
--- a/test/601-method-access/src/Main.java
+++ b/test/601-method-access/src/Main.java
@@ -22,7 +22,7 @@
 public class Main {
   public static void main(String[] args) {
     try {
-      Class c = Class.forName("SubClassUsingInaccessibleMethod");
+      Class<?> c = Class.forName("SubClassUsingInaccessibleMethod");
       Object o = c.newInstance();
       c.getMethod("test").invoke(o, null);
     } catch (InvocationTargetException ite) {
diff --git a/test/604-hot-static-interface/src/Main.java b/test/604-hot-static-interface/src/Main.java
index 04d7cd6..a26623c 100644
--- a/test/604-hot-static-interface/src/Main.java
+++ b/test/604-hot-static-interface/src/Main.java
@@ -29,7 +29,7 @@
     }
   }
 
-  private static native void ensureJitCompiled(Class itf, String method_name);
+  private static native void ensureJitCompiled(Class<?> itf, String method_name);
 }
 
 interface Itf {
diff --git a/test/605-new-string-from-bytes/src/Main.java b/test/605-new-string-from-bytes/src/Main.java
index 7dc0c15..5bd6c5d 100644
--- a/test/605-new-string-from-bytes/src/Main.java
+++ b/test/605-new-string-from-bytes/src/Main.java
@@ -20,7 +20,7 @@
 public class Main {
 
   public static void main(String[] args) throws Exception {
-    Class c = Class.forName("java.lang.StringFactory");
+    Class<?> c = Class.forName("java.lang.StringFactory");
     Method m = c.getDeclaredMethod("newStringFromBytes", byte[].class, int.class);
 
     // Loop over allocations to get more chances of doing GC while in the
diff --git a/test/611-checker-simplify-if/src/Main.java b/test/611-checker-simplify-if/src/Main.java
index 21f4115..7dac007 100644
--- a/test/611-checker-simplify-if/src/Main.java
+++ b/test/611-checker-simplify-if/src/Main.java
@@ -35,14 +35,14 @@
 
   // Test when a condition is the input of the if.
 
-  /// CHECK-START: void Main.testNoInline(java.lang.String[]) dead_code_elimination (before)
+  /// CHECK-START: void Main.testNoInline(java.lang.String[]) dead_code_elimination$initial (before)
   /// CHECK: <<Const0:i\d+>>   IntConstant 0
   /// CHECK:                   If
   /// CHECK: <<Phi:i\d+>>      Phi
   /// CHECK: <<Equal:z\d+>>    Equal [<<Phi>>,<<Const0>>]
   /// CHECK:                   If [<<Equal>>]
 
-  /// CHECK-START: void Main.testNoInline(java.lang.String[]) dead_code_elimination (after)
+  /// CHECK-START: void Main.testNoInline(java.lang.String[]) dead_code_elimination$initial (after)
   /// CHECK:      If
   /// CHECK-NOT:  Phi
   /// CHECK-NOT:  Equal
@@ -64,13 +64,13 @@
 
   // Test when the phi is the input of the if.
 
-  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination_final (before)
+  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination$final (before)
   /// CHECK-DAG: <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:                   If
   /// CHECK-DAG: <<Phi:i\d+>>      Phi
   /// CHECK-DAG:                   If [<<Phi>>]
 
-  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination_final (after)
+  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination$final (after)
   /// CHECK:      If
   /// CHECK-NOT:  Phi
   /// CHECK-NOT:  If
@@ -96,7 +96,7 @@
 
   // Test when one input is not a constant. We can only optimize the constant input.
 
-  /// CHECK-START: void Main.testNonConstantInputs(java.lang.String[]) dead_code_elimination (before)
+  /// CHECK-START: void Main.testNonConstantInputs(java.lang.String[]) dead_code_elimination$initial (before)
   /// CHECK-DAG: <<Const34:i\d+>>         IntConstant 34
   /// CHECK-DAG: <<Const42:i\d+>>         IntConstant 42
   /// CHECK-DAG:                          If
@@ -105,7 +105,7 @@
   /// CHECK-DAG: <<NotEqual:z\d+>>        NotEqual [<<Phi>>,<<Const42>>]
   /// CHECK-DAG:                          If [<<NotEqual>>]
 
-  /// CHECK-START: void Main.testNonConstantInputs(java.lang.String[]) dead_code_elimination (after)
+  /// CHECK-START: void Main.testNonConstantInputs(java.lang.String[]) dead_code_elimination$initial (after)
   /// CHECK-DAG: <<Const42:i\d+>>         IntConstant 42
   /// CHECK-DAG:                          If
   /// CHECK-DAG: <<StaticFieldGet:i\d+>>  StaticFieldGet
@@ -129,7 +129,7 @@
 
   // Test with a condition.
 
-  /// CHECK-START: void Main.testGreaterCondition(java.lang.String[]) dead_code_elimination (before)
+  /// CHECK-START: void Main.testGreaterCondition(java.lang.String[]) dead_code_elimination$initial (before)
   /// CHECK-DAG: <<Const34:i\d+>>         IntConstant 34
   /// CHECK-DAG: <<Const22:i\d+>>         IntConstant 22
   /// CHECK-DAG: <<Const25:i\d+>>         IntConstant 25
@@ -138,7 +138,7 @@
   /// CHECK-DAG: <<GE:z\d+>>              GreaterThanOrEqual [<<Phi>>,<<Const25>>]
   /// CHECK-DAG:                          If [<<GE>>]
 
-  /// CHECK-START: void Main.testGreaterCondition(java.lang.String[]) dead_code_elimination (after)
+  /// CHECK-START: void Main.testGreaterCondition(java.lang.String[]) dead_code_elimination$initial (after)
   /// CHECK-DAG:                          If
   /// CHECK-NOT:                          Phi
   /// CHECK-NOT:                          GreaterThanOrEqual
@@ -160,7 +160,7 @@
 
   // Test when comparing non constants.
 
-  /// CHECK-START: void Main.testNonConstantEqual(java.lang.String[]) dead_code_elimination (before)
+  /// CHECK-START: void Main.testNonConstantEqual(java.lang.String[]) dead_code_elimination$initial (before)
   /// CHECK-DAG: <<Const34:i\d+>>         IntConstant 34
   /// CHECK-DAG: <<Const42:i\d+>>         IntConstant 42
   /// CHECK-DAG:                          If
@@ -169,7 +169,7 @@
   /// CHECK-DAG: <<NotEqual:z\d+>>        NotEqual [<<Phi>>,<<StaticFieldGet>>]
   /// CHECK-DAG:                          If [<<NotEqual>>]
 
-  /// CHECK-START: void Main.testNonConstantEqual(java.lang.String[]) dead_code_elimination (after)
+  /// CHECK-START: void Main.testNonConstantEqual(java.lang.String[]) dead_code_elimination$initial (after)
   /// CHECK-DAG: <<Const34:i\d+>>         IntConstant 34
   /// CHECK-DAG:                          If
   /// CHECK-DAG: <<StaticFieldGet:i\d+>>  StaticFieldGet
@@ -217,12 +217,12 @@
     return true;
   }
 
-  /// CHECK-START: void Main.testSwitch(java.lang.String[]) dead_code_elimination (before)
+  /// CHECK-START: void Main.testSwitch(java.lang.String[]) dead_code_elimination$initial (before)
   /// CHECK:      If
   /// CHECK:      If
   /// CHECK:      If
 
-  /// CHECK-START: void Main.testSwitch(java.lang.String[]) dead_code_elimination (after)
+  /// CHECK-START: void Main.testSwitch(java.lang.String[]) dead_code_elimination$initial (after)
   /// CHECK:      If
   /// CHECK:      If
   /// CHECK-NOT:  If
@@ -248,11 +248,11 @@
     // Redirect default here.
   }
 
-  /// CHECK-START: void Main.testFP(java.lang.String[]) dead_code_elimination (before)
+  /// CHECK-START: void Main.testFP(java.lang.String[]) dead_code_elimination$initial (before)
   /// CHECK:      If
   /// CHECK:      If
 
-  /// CHECK-START: void Main.testFP(java.lang.String[]) dead_code_elimination (after)
+  /// CHECK-START: void Main.testFP(java.lang.String[]) dead_code_elimination$initial (after)
   /// CHECK:      If
   /// CHECK:      If
   public static void testFP(String[] args) {
diff --git a/test/612-jit-dex-cache/src-ex/LoadedByAppClassLoader.java b/test/612-jit-dex-cache/src-ex/LoadedByAppClassLoader.java
index 1d6158a..fcb314d 100644
--- a/test/612-jit-dex-cache/src-ex/LoadedByAppClassLoader.java
+++ b/test/612-jit-dex-cache/src-ex/LoadedByAppClassLoader.java
@@ -29,7 +29,7 @@
 }
 
 class OtherClass {
-  public static Class getB() {
+  public static Class<?> getB() {
     // This used to return the B class of another class loader.
     return B.class;
   }
diff --git a/test/612-jit-dex-cache/src/Main.java b/test/612-jit-dex-cache/src/Main.java
index 0e4bd22..89ebe09 100644
--- a/test/612-jit-dex-cache/src/Main.java
+++ b/test/612-jit-dex-cache/src/Main.java
@@ -41,7 +41,7 @@
 
 public class Main {
 
-   private static Class classFromDifferentLoader() throws Exception {
+   private static Class<?> classFromDifferentLoader() throws Exception {
      final String DEX_FILE = System.getenv("DEX_LOCATION") + "/612-jit-dex-cache-ex.jar";
      ClassLoader loader = new DelegateLastPathClassLoader(DEX_FILE, Main.class.getClassLoader());
      return loader.loadClass("LoadedByAppClassLoader");
@@ -49,7 +49,7 @@
 
   public static void main(String[] args) throws Exception {
     System.loadLibrary(args[0]);
-    Class cls = classFromDifferentLoader();
+    Class<?> cls = classFromDifferentLoader();
     Method m = cls.getDeclaredMethod("letMeInlineYou", A.class);
     B b = new B();
     // Invoke the method enough times to get an inline cache and get JITted.
@@ -63,5 +63,5 @@
     }
   }
 
-  public static native void ensureJitCompiled(Class cls, String method_name);
+  public static native void ensureJitCompiled(Class<?> cls, String method_name);
 }
diff --git a/test/614-checker-dump-constant-location/expected.txt b/test/614-checker-dump-constant-location/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/614-checker-dump-constant-location/expected.txt
diff --git a/test/614-checker-dump-constant-location/info.txt b/test/614-checker-dump-constant-location/info.txt
new file mode 100644
index 0000000..4a94ffa
--- /dev/null
+++ b/test/614-checker-dump-constant-location/info.txt
@@ -0,0 +1,2 @@
+Test that the graph visualizer outputs useful information for constant
+locations in parallel moves.
diff --git a/test/614-checker-dump-constant-location/src/Main.java b/test/614-checker-dump-constant-location/src/Main.java
new file mode 100644
index 0000000..f6bc063
--- /dev/null
+++ b/test/614-checker-dump-constant-location/src/Main.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static int array_int[] = { 0 };
+  public static long array_long[] = { 0 };
+  public static float array_float[] = { 0.0f };
+  public static double array_double[] = { 0.0 };
+
+  // The code used to print constant locations in parallel moves is architecture
+  // independent. We only test for ARM and ARM64 as it is easy: 'store'
+  // instructions only take registers as a source.
+
+  /// CHECK-START-ARM: void Main.store_to_arrays() register (after)
+  /// CHECK:    ParallelMove {{.*#1->.*#2->.*#3\.3->.*#4\.4->.*}}
+
+  /// CHECK-START-ARM64: void Main.store_to_arrays() register (after)
+  /// CHECK:    ParallelMove {{.*#1->.*#2->.*#3\.3->.*#4\.4->.*}}
+
+  public void store_to_arrays() {
+    array_int[0] = 1;
+    array_long[0] = 2;
+    array_float[0] = 3.3f;
+    array_double[0] = 4.4;
+  }
+
+  public static void main(String args[]) {}
+}
diff --git a/test/615-checker-arm64-zr-parallel-move/expected.txt b/test/615-checker-arm64-zr-parallel-move/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/615-checker-arm64-zr-parallel-move/expected.txt
diff --git a/test/615-checker-arm64-zr-parallel-move/info.txt b/test/615-checker-arm64-zr-parallel-move/info.txt
new file mode 100644
index 0000000..199755d
--- /dev/null
+++ b/test/615-checker-arm64-zr-parallel-move/info.txt
@@ -0,0 +1 @@
+Checker test to verify we correctly use wzr and xzr to synthesize zero constants.
diff --git a/test/615-checker-arm64-zr-parallel-move/src/Main.java b/test/615-checker-arm64-zr-parallel-move/src/Main.java
new file mode 100644
index 0000000..5024f28
--- /dev/null
+++ b/test/615-checker-arm64-zr-parallel-move/src/Main.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static boolean doThrow = false;
+
+  public void $noinline$foo(int in_w1,
+                            int in_w2,
+                            int in_w3,
+                            int in_w4,
+                            int in_w5,
+                            int in_w6,
+                            int in_w7,
+                            int on_stack_int,
+                            long on_stack_long,
+                            float in_s0,
+                            float in_s1,
+                            float in_s2,
+                            float in_s3,
+                            float in_s4,
+                            float in_s5,
+                            float in_s6,
+                            float in_s7,
+                            float on_stack_float,
+                            double on_stack_double) {
+    if (doThrow) throw new Error();
+  }
+
+  // We expect a parallel move that moves four times the zero constant to stack locations.
+  /// CHECK-START-ARM64: void Main.bar() register (after)
+  /// CHECK:             ParallelMove {{.*#0->[0-9x]+\(sp\).*#0->[0-9x]+\(sp\).*#0->[0-9x]+\(sp\).*#0->[0-9x]+\(sp\).*}}
+
+  // Those four moves should generate four 'store' instructions using directly the zero register.
+  /// CHECK-START-ARM64: void Main.bar() disassembly (after)
+  /// CHECK-DAG:         {{(str|stur)}} wzr, [sp, #{{[0-9]+}}]
+  /// CHECK-DAG:         {{(str|stur)}} xzr, [sp, #{{[0-9]+}}]
+  /// CHECK-DAG:         {{(str|stur)}} wzr, [sp, #{{[0-9]+}}]
+  /// CHECK-DAG:         {{(str|stur)}} xzr, [sp, #{{[0-9]+}}]
+
+  public void bar() {
+    $noinline$foo(1, 2, 3, 4, 5, 6, 7,     // Integral values in registers.
+                  0, 0L,                   // Integral values on the stack.
+                  1, 2, 3, 4, 5, 6, 7, 8,  // Floating-point values in registers.
+                  0.0f, 0.0);              // Floating-point values on the stack.
+  }
+
+  public static void main(String args[]) {}
+}
diff --git a/test/955-lambda-smali/build b/test/955-lambda-smali/build
deleted file mode 100755
index 14230c2..0000000
--- a/test/955-lambda-smali/build
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-#
-# Copyright 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# make us exit on a failure
-set -e
-
-./default-build "$@" --experimental default-methods
diff --git a/test/955-lambda-smali/expected.txt b/test/955-lambda-smali/expected.txt
deleted file mode 100644
index 16381e4..0000000
--- a/test/955-lambda-smali/expected.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-SanityCheck
-Hello world! (0-args, no closure)
-ABCD Hello world! (4-args, no closure)
-Caught NPE
-(BoxUnbox) Hello boxing world! (0-args, no closure)
-(BoxUnbox) Boxing repeatedly yields referentially-equal objects
-(BoxUnbox) Caught NPE for unbox-lambda
-(BoxUnbox) Caught NPE for box-lambda
-(BoxUnbox) Caught ClassCastException for unbox-lambda
-(MoveResult) testZ success
-(MoveResult) testB success
-(MoveResult) testS success
-(MoveResult) testI success
-(MoveResult) testC success
-(MoveResult) testJ success
-(MoveResult) testF success
-(MoveResult) testD success
-(MoveResult) testL success
-(CaptureVariables) (0-args, 1 captured variable 'Z'): value is true
-(CaptureVariables) (0-args, 1 captured variable 'B'): value is R
-(CaptureVariables) (0-args, 1 captured variable 'C'): value is ∂
-(CaptureVariables) (0-args, 1 captured variable 'S'): value is 1000
-(CaptureVariables) (0-args, 1 captured variable 'I'): value is 12345678
-(CaptureVariables) (0-args, 1 captured variable 'J'): value is 3287471278325742
-(CaptureVariables) (0-args, 1 captured variable 'F'): value is Infinity
-(CaptureVariables) (0-args, 1 captured variable 'D'): value is -Infinity
-(CaptureVariables) (0-args, 8 captured variable 'ZBCSIJFD'): value is true,R,∂,1000,12345678,3287471278325742,Infinity,-Infinity
-(CaptureVariables) Caught NPE
diff --git a/test/955-lambda-smali/info.txt b/test/955-lambda-smali/info.txt
deleted file mode 100644
index aed5e84..0000000
--- a/test/955-lambda-smali/info.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-Smali-based tests for experimental lambda intructions.
-
-Obviously needs to run under ART.
diff --git a/test/955-lambda-smali/run b/test/955-lambda-smali/run
deleted file mode 100755
index 2fb2f89..0000000
--- a/test/955-lambda-smali/run
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Ensure that the lambda experimental opcodes are turned on for dalvikvm and dex2oat
-${RUN} "$@" --experimental lambdas
diff --git a/test/955-lambda-smali/smali/BoxUnbox.smali b/test/955-lambda-smali/smali/BoxUnbox.smali
deleted file mode 100644
index 915de2d..0000000
--- a/test/955-lambda-smali/smali/BoxUnbox.smali
+++ /dev/null
@@ -1,168 +0,0 @@
-#  Copyright (C) 2015 The Android Open Source Project
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-.class public LBoxUnbox;
-.super Ljava/lang/Object;
-
-.method public constructor <init>()V
-.registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-.method public static run()V
-    .registers 0
-
-    invoke-static {}, LBoxUnbox;->testBox()V
-    invoke-static {}, LBoxUnbox;->testBoxEquality()V
-    invoke-static {}, LBoxUnbox;->testFailures()V
-    invoke-static {}, LBoxUnbox;->testFailures2()V
-    invoke-static {}, LBoxUnbox;->testFailures3()V
-    invoke-static {}, LBoxUnbox;->forceGC()V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of ArtMethod.
-.method public static doHelloWorld(J)V
-    .registers 4 # 1 wide parameters, 2 locals
-
-    const-string v0, "(BoxUnbox) Hello boxing world! (0-args, no closure)"
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-.end method
-
-# Test boxing and unboxing; the same lambda should be invoked as if there was no box.
-.method private static testBox()V
-    .registers 3
-
-    create-lambda v0, LBoxUnbox;->doHelloWorld(J)V
-    box-lambda v2, v0 # v2 = box(v0)
-    unbox-lambda v0, v2, J # v0 = unbox(v2)
-    invoke-lambda v0, {}
-
-    return-void
-.end method
-
-# Test that boxing the same lambda twice yield the same object.
-.method private static testBoxEquality()V
-   .registers 6 # 0 parameters, 6 locals
-
-    create-lambda v0, LBoxUnbox;->doHelloWorld(J)V
-    box-lambda v2, v0 # v2 = box(v0)
-    box-lambda v3, v0 # v3 = box(v0)
-
-    # The objects should be not-null, and they should have the same reference
-    if-eqz v2, :is_zero
-    if-ne v2, v3, :is_not_equal
-
-    const-string v4, "(BoxUnbox) Boxing repeatedly yields referentially-equal objects"
-    goto :end
-
-:is_zero
-    const-string v4, "(BoxUnbox) Boxing repeatedly FAILED: boxing returned null"
-    goto :end
-
-:is_not_equal
-    const-string v4, "(BoxUnbox) Boxing repeatedly FAILED: objects were not same reference"
-    goto :end
-
-:end
-    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-.end method
-
-# Test exceptions are thrown as expected when used opcodes incorrectly
-.method private static testFailures()V
-    .registers 4 # 0 parameters, 4 locals
-
-    const v0, 0  # v0 = null
-    const v1, 0  # v1 = null
-:start
-    unbox-lambda v2, v0, J
-    # attempting to unbox a null lambda will throw NPE
-:end
-    return-void
-
-:handler
-    const-string v2, "(BoxUnbox) Caught NPE for unbox-lambda"
-    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-
-    .catch Ljava/lang/NullPointerException; {:start .. :end} :handler
-.end method
-
-# Test exceptions are thrown as expected when used opcodes incorrectly
-.method private static testFailures2()V
-    .registers 4 # 0 parameters, 4 locals
-
-    const v0, 0  # v0 = null
-    const v1, 0  # v1 = null
-:start
-    box-lambda v2, v0  # attempting to box a null lambda will throw NPE
-:end
-    return-void
-
-    # TODO: refactor testFailures using a goto
-
-:handler
-    const-string v2, "(BoxUnbox) Caught NPE for box-lambda"
-    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-
-    .catch Ljava/lang/NullPointerException; {:start .. :end} :handler
-.end method
-
-# Test exceptions are thrown as expected when used opcodes incorrectly
-.method private static testFailures3()V
-    .registers 4 # 0 parameters, 4 locals
-
-    const-string v0, "This is not a boxed lambda"
-:start
-    # TODO: use \FunctionalType; here instead
-    unbox-lambda v2, v0, J
-    # can't use a string, expects a lambda object here. throws ClassCastException.
-:end
-    return-void
-
-    # TODO: refactor testFailures using a goto
-
-:handler
-    const-string v2, "(BoxUnbox) Caught ClassCastException for unbox-lambda"
-    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-
-    .catch Ljava/lang/ClassCastException; {:start .. :end} :handler
-.end method
-
-
-# Force a GC. Used to ensure our weak reference table of boxed lambdas is getting swept.
-.method private static forceGC()V
-    .registers 1
-    invoke-static {}, Ljava/lang/Runtime;->getRuntime()Ljava/lang/Runtime;
-    move-result-object v0
-    invoke-virtual {v0}, Ljava/lang/Runtime;->gc()V
-
-    return-void
-.end method
diff --git a/test/955-lambda-smali/smali/CaptureVariables.smali b/test/955-lambda-smali/smali/CaptureVariables.smali
deleted file mode 100644
index f18b7ff..0000000
--- a/test/955-lambda-smali/smali/CaptureVariables.smali
+++ /dev/null
@@ -1,311 +0,0 @@
-#
-#  Copyright (C) 2015 The Android Open Source Project
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-.class public LCaptureVariables;
-.super Ljava/lang/Object;
-
-.method public constructor <init>()V
-.registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-.method public static run()V
-.registers 8
-    # Test boolean capture
-    const v2, 1           # v2 = true
-    capture-variable v2, "Z"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_Z(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Test byte capture
-    const v2, 82       # v2 = 82, 'R'
-    capture-variable v2, "B"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_B(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Test char capture
-    const v2, 0x2202       # v2 = 0x2202, '∂'
-    capture-variable v2, "C"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_C(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Test short capture
-    const v2, 1000 # v2 = 1000
-    capture-variable v2, "S"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_S(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Test int capture
-    const v2, 12345678
-    capture-variable v2, "I"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_I(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Test long capture
-    const-wide v2, 0x0badf00dc0ffeeL # v2 = 3287471278325742
-    capture-variable v2, "J"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_J(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Test float capture
-    const v2, infinityf
-    capture-variable v2, "F"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_F(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Test double capture
-    const-wide v2, -infinity
-    capture-variable v2, "D"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_D(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    #TODO: capture objects and lambdas once we have support for it
-
-    # Test capturing multiple variables
-    invoke-static {}, LCaptureVariables;->testMultipleCaptures()V
-
-    # Test failures
-    invoke-static {}, LCaptureVariables;->testFailures()V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_Z(J)V
-    .registers 5 # 1 wide parameter, 3 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'Z'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "Z"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(Z)V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_B(J)V
-    .registers 5 # 1 wide parameter, 3 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'B'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "B"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(C)V  # no println(B), use char instead.
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_C(J)V
-    .registers 5 # 1 wide parameter, 3 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'C'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "C"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(C)V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_S(J)V
-    .registers 5 # 1 wide parameter, 3 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'S'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "S"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(I)V  # no println(S), use int instead
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_I(J)V
-    .registers 5 # 1 wide parameter, 3 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'I'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "I"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(I)V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_J(J)V
-    .registers 6 # 1 wide parameter, 4 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'J'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "J"
-    invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->println(J)V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_F(J)V
-    .registers 5 # 1 parameter, 4 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'F'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "F"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(F)V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_D(J)V
-    .registers 6 # 1 wide parameter, 4 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'D'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "D"
-    invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->println(D)V
-
-    return-void
-.end method
-
-# Test capturing more than one variable.
-.method private static testMultipleCaptures()V
-    .registers 4 # 0 parameters, 4 locals
-
-    const v2, 1           # v2 = true
-    capture-variable v2, "Z"
-
-    const v2, 82       # v2 = 82, 'R'
-    capture-variable v2, "B"
-
-    const v2, 0x2202       # v2 = 0x2202, '∂'
-    capture-variable v2, "C"
-
-    const v2, 1000 # v2 = 1000
-    capture-variable v2, "S"
-
-    const v2, 12345678
-    capture-variable v2, "I"
-
-    const-wide v2, 0x0badf00dc0ffeeL # v2 = 3287471278325742
-    capture-variable v2, "J"
-
-    const v2, infinityf
-    capture-variable v2, "F"
-
-    const-wide v2, -infinity
-    capture-variable v2, "D"
-
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_ZBCSIJFD(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_ZBCSIJFD(J)V
-    .registers 7 # 1 wide parameter, 5 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 8 captured variable 'ZBCSIJFD'): value is "
-    const-string v4, ","
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "Z"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(Z)V
-    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "B"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(C)V
-    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "C"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(C)V
-    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "S"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(I)V
-    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "I"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(I)V
-    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "J"
-    invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->print(J)V
-    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "F"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(F)V
-    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "D"
-    invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->println(D)V
-
-    return-void
-.end method
-
-# Test exceptions are thrown as expected when used opcodes incorrectly
-.method private static testFailures()V
-    .registers 4 # 0 parameters, 4 locals
-
-    const v0, 0  # v0 = null
-    const v1, 0  # v1 = null
-:start
-    liberate-variable v0, v2, "Z" # invoking a null lambda shall raise an NPE
-:end
-    return-void
-
-:handler
-    const-string v2, "(CaptureVariables) Caught NPE"
-    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-
-    .catch Ljava/lang/NullPointerException; {:start .. :end} :handler
-.end method
diff --git a/test/955-lambda-smali/smali/Main.smali b/test/955-lambda-smali/smali/Main.smali
deleted file mode 100644
index 9892d61..0000000
--- a/test/955-lambda-smali/smali/Main.smali
+++ /dev/null
@@ -1,32 +0,0 @@
-#
-#  Copyright (C) 2015 The Android Open Source Project
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-.class public LMain;
-
-.super Ljava/lang/Object;
-
-.method public static main([Ljava/lang/String;)V
-    .registers 2
-
-    invoke-static {}, LSanityCheck;->run()I
-    invoke-static {}, LTrivialHelloWorld;->run()V
-    invoke-static {}, LBoxUnbox;->run()V
-    invoke-static {}, LMoveResult;->run()V
-    invoke-static {}, LCaptureVariables;->run()V
-
-# TODO: add tests when verification fails
-
-    return-void
-.end method
diff --git a/test/955-lambda-smali/smali/MoveResult.smali b/test/955-lambda-smali/smali/MoveResult.smali
deleted file mode 100644
index 52f7ba3..0000000
--- a/test/955-lambda-smali/smali/MoveResult.smali
+++ /dev/null
@@ -1,330 +0,0 @@
-#
-#  Copyright (C) 2015 The Android Open Source Project
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-.class public LMoveResult;
-.super Ljava/lang/Object;
-
-.method public constructor <init>()V
-.registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-.method public static run()V
-.registers 8
-    invoke-static {}, LMoveResult;->testZ()V
-    invoke-static {}, LMoveResult;->testB()V
-    invoke-static {}, LMoveResult;->testS()V
-    invoke-static {}, LMoveResult;->testI()V
-    invoke-static {}, LMoveResult;->testC()V
-    invoke-static {}, LMoveResult;->testJ()V
-    invoke-static {}, LMoveResult;->testF()V
-    invoke-static {}, LMoveResult;->testD()V
-    invoke-static {}, LMoveResult;->testL()V
-
-    return-void
-.end method
-
-# Test that booleans are returned correctly via move-result.
-.method public static testZ()V
-    .registers 6
-
-    create-lambda v0, LMoveResult;->lambdaZ(J)Z
-    invoke-lambda v0, {}
-    move-result v2
-    const v3, 1
-
-    if-ne v3, v2, :is_not_equal
-    const-string v4, "(MoveResult) testZ success"
-    goto :end
-
-:is_not_equal
-    const-string v4, "(MoveResult) testZ failed"
-
-:end
-    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testZ. Always returns "true".
-.method public static lambdaZ(J)Z
-    .registers 3
-
-    const v0, 1
-    return v0
-
-.end method
-
-# Test that bytes are returned correctly via move-result.
-.method public static testB()V
-    .registers 6
-
-    create-lambda v0, LMoveResult;->lambdaB(J)B
-    invoke-lambda v0, {}
-    move-result v2
-    const v3, 15
-
-    if-ne v3, v2, :is_not_equal
-    const-string v4, "(MoveResult) testB success"
-    goto :end
-
-:is_not_equal
-    const-string v4, "(MoveResult) testB failed"
-
-:end
-    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testB. Always returns "15".
-.method public static lambdaB(J)B
-    .registers 3 # 1 parameters, 2 locals
-
-    const v0, 15
-    return v0
-
-.end method
-
-# Test that shorts are returned correctly via move-result.
-.method public static testS()V
-    .registers 6
-
-    create-lambda v0, LMoveResult;->lambdaS(J)S
-    invoke-lambda v0, {}
-    move-result v2
-    const/16 v3, 31000
-
-    if-ne v3, v2, :is_not_equal
-    const-string v4, "(MoveResult) testS success"
-    goto :end
-
-:is_not_equal
-    const-string v4, "(MoveResult) testS failed"
-
-:end
-    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testS. Always returns "31000".
-.method public static lambdaS(J)S
-    .registers 3
-
-    const/16 v0, 31000
-    return v0
-
-.end method
-
-# Test that ints are returned correctly via move-result.
-.method public static testI()V
-    .registers 6
-
-    create-lambda v0, LMoveResult;->lambdaI(J)I
-    invoke-lambda v0, {}
-    move-result v2
-    const v3, 128000
-
-    if-ne v3, v2, :is_not_equal
-    const-string v4, "(MoveResult) testI success"
-    goto :end
-
-:is_not_equal
-    const-string v4, "(MoveResult) testI failed"
-
-:end
-    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testI. Always returns "128000".
-.method public static lambdaI(J)I
-    .registers 3
-
-    const v0, 128000
-    return v0
-
-.end method
-
-# Test that chars are returned correctly via move-result.
-.method public static testC()V
-    .registers 7
-
-    create-lambda v0, LMoveResult;->lambdaC(J)C
-    invoke-lambda v0, {}
-    move-result v2
-    const v3, 65535
-
-    if-ne v3, v2, :is_not_equal
-    const-string v4, "(MoveResult) testC success"
-    goto :end
-
-:is_not_equal
-    const-string v4, "(MoveResult) testC failed"
-
-:end
-    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testC. Always returns "65535".
-.method public static lambdaC(J)C
-    .registers 3
-
-    const v0, 65535
-    return v0
-
-.end method
-
-# Test that longs are returned correctly via move-result.
-.method public static testJ()V
-    .registers 9
-
-    create-lambda v0, LMoveResult;->lambdaJ(J)J
-    invoke-lambda v0, {}
-    move-result v2
-    const-wide v4, 0xdeadf00dc0ffeeL
-
-    if-ne v4, v2, :is_not_equal
-    const-string v6, "(MoveResult) testJ success"
-    goto :end
-
-:is_not_equal
-    const-string v6, "(MoveResult) testJ failed"
-
-:end
-    sget-object v7, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v7, v6}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testC. Always returns "0xdeadf00dc0ffeeL".
-.method public static lambdaJ(J)J
-    .registers 5
-
-    const-wide v0, 0xdeadf00dc0ffeeL
-    return-wide v0
-
-.end method
-
-# Test that floats are returned correctly via move-result.
-.method public static testF()V
-    .registers 6
-
-    create-lambda v0, LMoveResult;->lambdaF(J)F
-    invoke-lambda v0, {}
-    move-result v2
-    const v3, infinityf
-
-    if-ne v3, v2, :is_not_equal
-    const-string v4, "(MoveResult) testF success"
-    goto :end
-
-:is_not_equal
-    const-string v4, "(MoveResult) testF failed"
-
-:end
-    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testF. Always returns "infinityf".
-.method public static lambdaF(J)F
-    .registers 4
-
-    const v0, infinityf
-    return v0
-
-.end method
-
-# Test that doubles are returned correctly via move-result.
-.method public static testD()V
-    .registers 8
-
-    create-lambda v0, LMoveResult;->lambdaD(J)D
-    invoke-lambda v0, {}
-    move-result-wide v2
-    const-wide v4, -infinity
-
-    if-ne v4, v2, :is_not_equal
-    const-string v6, "(MoveResult) testD success"
-    goto :end
-
-:is_not_equal
-    const-string v6, "(MoveResult) testD failed"
-
-:end
-    sget-object v7, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v7, v6}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testD. Always returns "infinity".
-.method public static lambdaD(J)D
-    .registers 5
-
-    const-wide v0, -infinity
-    return-wide v0
-
-.end method
-
-
-# Test that objects are returned correctly via move-result.
-.method public static testL()V
-    .registers 8
-
-    create-lambda v0, LMoveResult;->lambdaL(J)Ljava/lang/String;
-    invoke-lambda v0, {}
-    move-result-object v2
-    const-string v4, "Interned string"
-
-    # relies on string interning returning identical object references
-    if-ne v4, v2, :is_not_equal
-    const-string v6, "(MoveResult) testL success"
-    goto :end
-
-:is_not_equal
-    const-string v6, "(MoveResult) testL failed"
-
-:end
-    sget-object v7, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v7, v6}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testL. Always returns "Interned string" (string).
-.method public static lambdaL(J)Ljava/lang/String;
-    .registers 5
-
-    const-string v0, "Interned string"
-    return-object v0
-
-.end method
-
-
diff --git a/test/955-lambda-smali/smali/SanityCheck.smali b/test/955-lambda-smali/smali/SanityCheck.smali
deleted file mode 100644
index 4c807d7..0000000
--- a/test/955-lambda-smali/smali/SanityCheck.smali
+++ /dev/null
@@ -1,36 +0,0 @@
-#
-#  Copyright (C) 2015 The Android Open Source Project
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-.class public LSanityCheck;
-.super Ljava/lang/Object;
-
-
-.method public constructor <init>()V
-.registers 1
-   invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-   return-void
-.end method
-
-# This test is just here to make sure that we can at least execute basic non-lambda
-# functionality such as printing (when lambdas are enabled in the runtime).
-.method public static run()I
-# Don't use too many registers here to avoid hitting the Stack::SanityCheck frame<2KB assert
-.registers 3
-    const-string v0, "SanityCheck"
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    const v2, 123456
-    return v2
-.end method
diff --git a/test/955-lambda-smali/smali/TrivialHelloWorld.smali b/test/955-lambda-smali/smali/TrivialHelloWorld.smali
deleted file mode 100644
index 3444b13..0000000
--- a/test/955-lambda-smali/smali/TrivialHelloWorld.smali
+++ /dev/null
@@ -1,94 +0,0 @@
-#
-#  Copyright (C) 2015 The Android Open Source Project
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-.class public LTrivialHelloWorld;
-.super Ljava/lang/Object;
-
-.method public constructor <init>()V
-.registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-.method public static run()V
-.registers 8
-    # Trivial 0-arg hello world
-    create-lambda v0, LTrivialHelloWorld;->doHelloWorld(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Slightly more interesting 4-arg hello world
-    create-lambda v2, doHelloWorldArgs(JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
-    # TODO: create-lambda should not write to both v2 and v3
-    const-string v4, "A"
-    const-string v5, "B"
-    const-string v6, "C"
-    const-string v7, "D"
-    invoke-lambda v2, {v4, v5, v6, v7}
-
-    invoke-static {}, LTrivialHelloWorld;->testFailures()V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of jlong. 
-.method public static doHelloWorld(J)V
-    .registers 5 # 1 wide parameters, 3 locals
-
-    const-string v0, "Hello world! (0-args, no closure)"
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of jlong. 
-.method public static doHelloWorldArgs(JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
-    .registers 9 # 1 wide parameter, 4 narrow parameters, 3 locals
-
-    const-string v0, " Hello world! (4-args, no closure)"
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-
-    invoke-virtual {v1, p2}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-    invoke-virtual {v1, p3}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-    invoke-virtual {v1, p4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-    invoke-virtual {v1, p5}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-.end method
-
-# Test exceptions are thrown as expected when used opcodes incorrectly
-.method private static testFailures()V
-    .registers 4 # 0 parameters, 4 locals
-
-    const v0, 0  # v0 = null
-    const v1, 0  # v1 = null
-:start
-    invoke-lambda v0, {}  # invoking a null lambda shall raise an NPE
-:end
-    return-void
-
-:handler
-    const-string v2, "Caught NPE"
-    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-
-    .catch Ljava/lang/NullPointerException; {:start .. :end} :handler
-.end method
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 8f8b667..bba6f8e 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -26,7 +26,8 @@
 
 # The path where build only targets will be output, e.g.
 # out/target/product/generic_x86_64/obj/PACKAGING/art-run-tests_intermediates/DATA
-art_run_tests_dir := $(call intermediates-dir-for,PACKAGING,art-run-tests)/DATA
+art_run_tests_build_dir := $(call intermediates-dir-for,JAVA_LIBRARIES,art-run-tests)/DATA
+art_run_tests_install_dir := $(call intermediates-dir-for,PACKAGING,art-run-tests)/DATA
 
 # A generated list of prerequisites that call 'run-test --build-only', the actual prerequisite is
 # an empty file touched in the intermediate directory.
@@ -49,7 +50,8 @@
 # Helper to create individual build targets for tests. Must be called with $(eval).
 # $(1): the test number
 define define-build-art-run-test
-  dmart_target := $(art_run_tests_dir)/art-run-tests/$(1)/touch
+  dmart_target := $(art_run_tests_build_dir)/art-run-tests/$(1)/touch
+  dmart_install_target := $(art_run_tests_install_dir)/art-run-tests/$(1)/touch
   run_test_options = --build-only
   ifeq ($(ART_TEST_QUIET),true)
     run_test_options += --quiet
@@ -67,8 +69,13 @@
 	  $(LOCAL_PATH)/run-test $$(PRIVATE_RUN_TEST_OPTIONS) --output-path $$(abspath $$(dir $$@)) $(1)
 	$(hide) touch $$@
 
-  TEST_ART_RUN_TEST_BUILD_RULES += $$(dmart_target)
+$$(dmart_install_target): $$(dmart_target)
+	$(hide) rm -rf $$(dir $$@) && mkdir -p $$(dir $$@)
+	$(hide) cp $$(dir $$<)/* $$(dir $$@)/
+
+  TEST_ART_RUN_TEST_BUILD_RULES += $$(dmart_install_target)
   dmart_target :=
+  dmart_install_target :=
   run_test_options :=
 endef
 $(foreach test, $(TEST_ART_RUN_TESTS), $(eval $(call define-build-art-run-test,$(test))))
@@ -78,12 +85,13 @@
 LOCAL_MODULE := art-run-tests
 LOCAL_ADDITIONAL_DEPENDENCIES := $(TEST_ART_RUN_TEST_BUILD_RULES)
 # The build system use this flag to pick up files generated by declare-make-art-run-test.
-LOCAL_PICKUP_FILES := $(art_run_tests_dir)
+LOCAL_PICKUP_FILES := $(art_run_tests_install_dir)
 
 include $(BUILD_PHONY_PACKAGE)
 
 # Clear temp vars.
-art_run_tests_dir :=
+art_run_tests_build_dir :=
+art_run_tests_install_dir :=
 define-build-art-run-test :=
 TEST_ART_RUN_TEST_BUILD_RULES :=
 
@@ -111,8 +119,14 @@
 ifeq ($(ART_TEST_JIT),true)
   COMPILER_TYPES += jit
 endif
+OPTIMIZING_COMPILER_TYPES :=
 ifeq ($(ART_TEST_OPTIMIZING),true)
   COMPILER_TYPES += optimizing
+  OPTIMIZING_COMPILER_TYPES += optimizing
+endif
+ifeq ($(ART_TEST_OPTIMIZING_GRAPH_COLOR),true)
+  COMPILER_TYPES += regalloc_gc
+  OPTIMIZING_COMPILER_TYPES += regalloc_gc
 endif
 RELOCATE_TYPES := relocate
 ifeq ($(ART_TEST_RUN_TEST_NO_RELOCATE),true)
@@ -468,14 +482,28 @@
 
 TEST_ART_BROKEN_JIT_RUN_TESTS :=
 
+# Known broken tests for the graph coloring register allocator.
+# These tests were based on the linear scan allocator, which makes different decisions than
+# the graph coloring allocator. (These attempt to test for code quality, not correctness.)
+TEST_ART_BROKEN_OPTIMIZING_GRAPH_COLOR := \
+  570-checker-select \
+  484-checker-register-hints
+
+ifneq (,$(filter regalloc_gc,$(COMPILER_TYPES)))
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+      regalloc_gc,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+      $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+      $(TEST_ART_BROKEN_OPTIMIZING_GRAPH_COLOR),$(ALL_ADDRESS_SIZES))
+endif
+
 # Known broken tests for the mips32 optimizing compiler backend.
 TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS := \
     510-checker-try-catch \
 
 ifeq (mips,$(TARGET_ARCH))
-  ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+  ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
     ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
-        optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
         $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
         $(TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS),$(ALL_ADDRESS_SIZES))
   endif
@@ -487,9 +515,9 @@
 TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS := \
 
 ifeq (mips64,$(TARGET_ARCH))
-  ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+  ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
     ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
-        optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
         $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
         $(TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS),$(ALL_ADDRESS_SIZES))
   endif
@@ -502,9 +530,9 @@
   454-get-vreg \
   457-regs \
 
-ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+      $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
       $(IMAGE_TYPES),$(PICTEST_TYPES),ndebuggable,$(TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
@@ -513,9 +541,9 @@
 # Tests that should fail when the optimizing compiler compiles them debuggable.
 TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS := \
 
-ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+      $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
       $(IMAGE_TYPES),$(PICTEST_TYPES),debuggable,$(TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
@@ -527,13 +555,10 @@
 # Tests that should fail in the read barrier configuration with the Optimizing compiler (AOT).
 # 484: Baker's fast path based read barrier compiler instrumentation generates code containing
 #      more parallel moves on x86, thus some Checker assertions may fail.
-# 527: On ARM64 and ARM, the read barrier instrumentation does not support the HIntermediateAddress
-#      instruction yet (b/26601270).
 # 537: Expects an array copy to be intrinsified on x86-64, but calling-on-slowpath intrinsics are
 #      not yet handled in the read barrier configuration.
 TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS := \
   484-checker-register-hints \
-  527-checker-array-access-split \
   537-checker-arraycopy
 
 # Tests that should fail in the read barrier configuration with JIT (Optimizing compiler).
@@ -547,9 +572,9 @@
         $(TEST_ART_BROKEN_INTERPRETER_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
   endif
 
-  ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+  ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
     ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
-        $(PREBUILD_TYPES),optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
+        $(PREBUILD_TYPES),$(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
         $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
         $(TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
   endif
@@ -578,9 +603,9 @@
   055-enum-performance
 
 ifeq ($(ART_HEAP_POISONING),true)
-  ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+  ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
     ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
-        $(PREBUILD_TYPES),optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(PREBUILD_TYPES),$(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
         $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
         $(TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
   endif
@@ -727,6 +752,9 @@
   ifeq ($(4),optimizing)
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_OPTIMIZING_RULES
     run_test_options += --optimizing
+  else ifeq ($(4),regalloc_gc)
+    test_groups += ART_RUN_TEST_$$(uc_host_or_target)_OPTIMIZING_GRAPH_COLOR_RULES
+    run_test_options += --optimizing -Xcompiler-option --register-allocation-strategy=graph-color
   else
     ifeq ($(4),interpreter)
       test_groups += ART_RUN_TEST_$$(uc_host_or_target)_INTERPRETER_RULES
@@ -810,6 +838,10 @@
     endif
   endif
   image_suffix := $(4)
+  ifeq ($(4),regalloc_gc)
+    # Graph coloring tests share the image_suffix with optimizing tests.
+    image_suffix := optimizing
+  endif
   ifeq ($(9),no-image)
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_IMAGE_RULES
     run_test_options += --no-image
diff --git a/test/MyClassNatives/MyClassNatives.java b/test/MyClassNatives/MyClassNatives.java
index 19c13f7..4a8b0e0 100644
--- a/test/MyClassNatives/MyClassNatives.java
+++ b/test/MyClassNatives/MyClassNatives.java
@@ -35,11 +35,11 @@
     static native int getText(long val1, Object obj1, long val2, Object obj2);
     synchronized native Object []getSinkPropertiesNative(String path);
 
-    native Class instanceMethodThatShouldReturnClass();
-    static native Class staticMethodThatShouldReturnClass();
+    native Class<?> instanceMethodThatShouldReturnClass();
+    static native Class<?> staticMethodThatShouldReturnClass();
 
-    native void instanceMethodThatShouldTakeClass(int i, Class c);
-    static native void staticMethodThatShouldTakeClass(int i, Class c);
+    native void instanceMethodThatShouldTakeClass(int i, Class<?> c);
+    static native void staticMethodThatShouldTakeClass(int i, Class<?> c);
 
     native float checkFloats(float f1, float f2);
     native void forceStackParameters(int i1, int i2, int i3, int i4, int i5, int i6, int i8, int i9,
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index 806e130..ee2ee1a 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -130,18 +130,18 @@
     return;
   }
 
-  ScopedObjectAccess soa(Thread::Current());
+  ArtMethod* method = nullptr;
+  {
+    ScopedObjectAccess soa(Thread::Current());
 
-  ScopedUtfChars chars(env, method_name);
-  CHECK(chars.c_str() != nullptr);
-
-  mirror::Class* klass = soa.Decode<mirror::Class*>(cls);
-  ArtMethod* method = klass->FindDeclaredDirectMethodByName(chars.c_str(), kRuntimePointerSize);
+    ScopedUtfChars chars(env, method_name);
+    CHECK(chars.c_str() != nullptr);
+    method = soa.Decode<mirror::Class*>(cls)->FindDeclaredDirectMethodByName(
+        chars.c_str(), kRuntimePointerSize);
+  }
 
   jit::JitCodeCache* code_cache = jit->GetCodeCache();
   OatQuickMethodHeader* header = nullptr;
-  // Make sure there is a profiling info, required by the compiler.
-  ProfilingInfo::Create(soa.Self(), method, /* retry_allocation */ true);
   while (true) {
     header = OatQuickMethodHeader::FromEntryPoint(method->GetEntryPointFromQuickCompiledCode());
     if (code_cache->ContainsPc(header->GetCode())) {
@@ -149,6 +149,9 @@
     } else {
       // Sleep to yield to the compiler thread.
       usleep(1000);
+      ScopedObjectAccess soa(Thread::Current());
+      // Make sure there is a profiling info, required by the compiler.
+      ProfilingInfo::Create(soa.Self(), method, /* retry_allocation */ true);
       // Will either ensure it's compiled or do the compilation itself.
       jit->CompileMethod(method, soa.Self(), /* osr */ false);
     }
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 64bf4f3..c6c9380 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -553,12 +553,10 @@
     if [ "$TIME_OUT" = "timeout" ]; then
       # Add timeout command if time out is desired.
       #
-      # Note: We use nested timeouts. The inner timeout sends SIGRTMIN+2 (usually 36) to ART, which
-      #       will induce a full thread dump before abort. However, dumping threads might deadlock,
-      #       so the outer timeout sends the regular SIGTERM after an additional minute to ensure
-      #       termination (without dumping all threads).
-      TIME_PLUS_ONE=$(($TIME_OUT_VALUE + 60))
-      cmdline="timeout ${TIME_PLUS_ONE}s timeout -s SIGRTMIN+2 ${TIME_OUT_VALUE}s $cmdline"
+      # Note: We first send SIGRTMIN+2 (usually 36) to ART, which will induce a full thread dump
+      #       before abort. However, dumping threads might deadlock, so we also use the "-k"
+      #       option to definitely kill the child.
+      cmdline="timeout -k 120s -s SIGRTMIN+2 ${TIME_OUT_VALUE}s $cmdline"
     fi
 
     if [ "$DEV_MODE" = "y" ]; then
diff --git a/tools/ahat/Android.mk b/tools/ahat/Android.mk
index 834426d..60e0cd8 100644
--- a/tools/ahat/Android.mk
+++ b/tools/ahat/Android.mk
@@ -30,6 +30,10 @@
 LOCAL_IS_HOST_MODULE := true
 LOCAL_MODULE_TAGS := optional
 LOCAL_MODULE := ahat
+
+# Let users with Java 7 run ahat (b/28303627)
+LOCAL_JAVA_LANGUAGE_VERSION := 1.7
+
 include $(BUILD_HOST_JAVA_LIBRARY)
 
 # --- ahat script ----------------
diff --git a/tools/ahat/src/AhatSnapshot.java b/tools/ahat/src/AhatSnapshot.java
index e6f8411..a8205c7 100644
--- a/tools/ahat/src/AhatSnapshot.java
+++ b/tools/ahat/src/AhatSnapshot.java
@@ -86,7 +86,7 @@
     mSnapshot = snapshot;
     mHeaps = new ArrayList<Heap>(mSnapshot.getHeaps());
 
-    ClassObj javaLangClass = mSnapshot.findClass("java.lang.Class");
+    final ClassObj javaLangClass = mSnapshot.findClass("java.lang.Class");
     for (Heap heap : mHeaps) {
       // Use a single element array for the total to act as a reference to a
       // long.
diff --git a/tools/ahat/src/InstanceUtils.java b/tools/ahat/src/InstanceUtils.java
index 3cdb40c..8769d11 100644
--- a/tools/ahat/src/InstanceUtils.java
+++ b/tools/ahat/src/InstanceUtils.java
@@ -95,9 +95,7 @@
       return null;
     }
 
-    // TODO: When perflib provides a better way to get the length of the
-    // array, we should use that here.
-    int numChars = chars.getValues().length;
+    int numChars = chars.getLength();
     int count = getIntField(inst, "count", numChars);
     if (count == 0) {
       return "";
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index d88a4a0..12e0338 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -21,7 +21,7 @@
 
 out_dir=${OUT_DIR-out}
 java_libraries_dir=${out_dir}/target/common/obj/JAVA_LIBRARIES
-common_targets="vogar core-tests apache-harmony-jdwp-tests-hostdex jsr166-tests ${out_dir}/host/linux-x86/bin/jack"
+common_targets="vogar core-tests apache-harmony-jdwp-tests-hostdex jsr166-tests mockito-target ${out_dir}/host/linux-x86/bin/jack"
 mode="target"
 j_arg="-j$(nproc)"
 showcommands=
diff --git a/tools/cpp-define-generator/constant_heap.def b/tools/cpp-define-generator/constant_heap.def
new file mode 100644
index 0000000..dc76736
--- /dev/null
+++ b/tools/cpp-define-generator/constant_heap.def
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Export heap values.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "gc/heap.h"
+#endif
+
+// Size of references to the heap on the stack.
+DEFINE_EXPR(MIN_LARGE_OBJECT_THRESHOLD, size_t, art::gc::Heap::kMinLargeObjectThreshold)
+
diff --git a/tools/cpp-define-generator/constant_lockword.def b/tools/cpp-define-generator/constant_lockword.def
index c1e6099..67ed5b5 100644
--- a/tools/cpp-define-generator/constant_lockword.def
+++ b/tools/cpp-define-generator/constant_lockword.def
@@ -30,5 +30,12 @@
 DEFINE_LOCK_WORD_EXPR(READ_BARRIER_STATE_MASK_TOGGLED, uint32_t, kReadBarrierStateMaskShiftedToggled)
 DEFINE_LOCK_WORD_EXPR(THIN_LOCK_COUNT_ONE,       int32_t,  kThinLockCountOne)
 
+DEFINE_LOCK_WORD_EXPR(GC_STATE_MASK_SHIFTED,   uint32_t,  kGCStateMaskShifted)
+DEFINE_LOCK_WORD_EXPR(GC_STATE_MASK_SHIFTED_TOGGLED, uint32_t, kGCStateMaskShiftedToggled)
+DEFINE_LOCK_WORD_EXPR(GC_STATE_SHIFT,   int32_t,  kGCStateShift)
+
+DEFINE_LOCK_WORD_EXPR(MARK_BIT_SHIFT, int32_t, kMarkBitStateShift)
+DEFINE_LOCK_WORD_EXPR(MARK_BIT_MASK_SHIFTED, uint32_t, kMarkBitStateMaskShifted)
+
 #undef DEFINE_LOCK_WORD_EXPR
 
diff --git a/tools/cpp-define-generator/offset_runtime.def b/tools/cpp-define-generator/offset_runtime.def
index b327ca3..17167a0 100644
--- a/tools/cpp-define-generator/offset_runtime.def
+++ b/tools/cpp-define-generator/offset_runtime.def
@@ -25,15 +25,17 @@
 // Note: these callee save methods loads require read barriers.
 
 #define DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(field_name, constant_name) \
-  DEFINE_OFFSET_EXPR(Runtime, field_name ## _CALLEE_SAVE_FRAME, size_t, art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: constant_name))
+  DEFINE_OFFSET_EXPR(Runtime, field_name ## _METHOD, size_t, art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: constant_name))
 
                     //     Macro substring       Constant name
-// Offset of field Runtime::callee_save_methods_[kSaveAll]
-DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_ALL,      kSaveAll)
-// Offset of field Runtime::callee_save_methods_[kRefsOnly]
-DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(REFS_ONLY,     kRefsOnly)
-// Offset of field Runtime::callee_save_methods_[kRefsAndArgs]
-DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(REFS_AND_ARGS, kRefsAndArgs)
+// Offset of field Runtime::callee_save_methods_[kSaveAllCalleeSaves]
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_ALL_CALLEE_SAVES, kSaveAllCalleeSaves)
+// Offset of field Runtime::callee_save_methods_[kSaveRefsOnly]
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_REFS_ONLY, kSaveRefsOnly)
+// Offset of field Runtime::callee_save_methods_[kSaveRefsAndArgs]
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_REFS_AND_ARGS, kSaveRefsAndArgs)
+// Offset of field Runtime::callee_save_methods_[kSaveEverything]
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_EVERYTHING, kSaveEverything)
 
 #undef DEFINE_RUNTIME_CALLEE_SAVE_OFFSET
 #include "common_undef.def"  // undef DEFINE_OFFSET_EXPR
diff --git a/tools/cpp-define-generator/offsets_all.def b/tools/cpp-define-generator/offsets_all.def
index 01e4d5b..d2d8777 100644
--- a/tools/cpp-define-generator/offsets_all.def
+++ b/tools/cpp-define-generator/offsets_all.def
@@ -48,6 +48,7 @@
 // TODO: MIRROR_*_ARRAY offsets (depends on header size)
 // TODO: MIRROR_STRING offsets (depends on header size)
 #include "offset_dexcache.def"
+#include "constant_heap.def"
 #include "constant_lockword.def"
 #include "constant_globals.def"
 #include "constant_rosalloc.def"
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index bf8d12b..e5d7597 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -36,6 +36,15 @@
   names: ["libcore.io.OsTest#testUnixDomainSockets_in_file_system"]
 },
 {
+  description: "TCP_USER_TIMEOUT is not defined on host's tcp.h (glibc-2.15-4.8).",
+  result: EXEC_FAILED,
+  modes: [host],
+  names: ["libcore.android.system.OsConstantsTest#testTcpUserTimeoutIsDefined",
+          "libcore.io.OsTest#test_socket_tcpUserTimeout_setAndGet",
+          "libcore.io.OsTest#test_socket_tcpUserTimeout_doesNotWorkOnDatagramSocket"],
+  bug: 30402085
+},
+{
   description: "Issue with incorrect device time (1970)",
   result: EXEC_FAILED,
   modes: [device],
@@ -171,43 +180,6 @@
   bug: 25437292
 },
 {
-  description: "Failing tests after OpenJDK move.",
-  result: EXEC_FAILED,
-  bug: 26326992,
-  names: ["libcore.icu.RelativeDateTimeFormatterTest#test_getRelativeDateTimeStringDST",
-          "libcore.java.lang.OldSystemTest#test_load",
-          "libcore.java.text.NumberFormatTest#test_currencyWithPatternDigits",
-          "libcore.java.text.NumberFormatTest#test_setCurrency",
-          "libcore.java.text.OldNumberFormatTest#test_getIntegerInstanceLjava_util_Locale",
-          "libcore.java.util.CalendarTest#testAddOneDayAndOneDayOver30MinuteDstForwardAdds48Hours",
-          "libcore.java.util.CalendarTest#testNewCalendarKoreaIsSelfConsistent",
-          "libcore.java.util.CalendarTest#testSetTimeInZoneWhereDstIsNoLongerUsed",
-          "libcore.java.util.CalendarTest#test_nullLocale",
-          "libcore.java.util.FormatterTest#test_numberLocalization",
-          "libcore.java.util.FormatterTest#test_uppercaseConversions",
-          "libcore.javax.crypto.CipherTest#testCipher_getInstance_WrongType_Failure",
-          "libcore.javax.crypto.CipherTest#testDecryptBufferZeroSize_mustDecodeToEmptyString",
-          "libcore.javax.security.auth.x500.X500PrincipalTest#testExceptionsForWrongDNs",
-          "org.apache.harmony.luni.tests.java.net.URLConnectionTest#test_getDate",
-          "org.apache.harmony.luni.tests.java.net.URLConnectionTest#test_getExpiration",
-          "org.apache.harmony.regex.tests.java.util.regex.PatternSyntaxExceptionTest#testPatternSyntaxException",
-          "org.apache.harmony.tests.java.lang.FloatTest#test_parseFloat_LString_Harmony6261",
-          "org.apache.harmony.tests.java.lang.ThreadTest#test_isDaemon",
-          "org.apache.harmony.tests.java.text.DecimalFormatSymbolsTest#test_setInternationalCurrencySymbolLjava_lang_String",
-          "org.apache.harmony.tests.java.text.DecimalFormatTest#testSerializationHarmonyRICompatible",
-          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parseLjava_lang_StringLjava_text_ParsePosition",
-          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_W_w_dd_MMMM_yyyy_EEEE",
-          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_dayOfYearPatterns",
-          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_h_m_z",
-          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_h_z_2DigitOffsetFromGMT",
-          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_h_z_4DigitOffsetFromGMT",
-          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_h_z_4DigitOffsetNoGMT",
-          "org.apache.harmony.tests.java.util.jar.JarFileTest#test_getInputStreamLjava_util_jar_JarEntry_subtest0",
-          "libcore.java.util.CalendarTest#test_clear_45877",
-          "org.apache.harmony.crypto.tests.javax.crypto.spec.SecretKeySpecTest#testGetFormat",
-          "org.apache.harmony.tests.java.util.TimerTaskTest#test_scheduledExecutionTime"]
-},
-{
   description: "Missing resource in classpath",
   result: EXEC_FAILED,
   modes: [device],
@@ -262,10 +234,12 @@
   names: ["org.apache.harmony.tests.java.lang.ProcessTest#test_destroyForcibly"]
 },
 {
-  description: "Flaky failure, possibly caused by a kernel bug accessing /proc/",
+  description: "Flaky failure, native crash in the runtime.
+                Unclear if this relates to the tests running sh as a child process.",
   result: EXEC_FAILED,
-  bug: 27464570,
+  bug: 30657148,
   modes: [device],
-  names: ["libcore.java.lang.ProcessBuilderTest#testRedirectInherit"]
+  names: ["libcore.java.lang.ProcessBuilderTest#testRedirectInherit",
+          "libcore.java.lang.ProcessBuilderTest#testRedirect_nullStreams"]
 }
 ]
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
index 3605aa0..2a6e172 100755
--- a/tools/run-libcore-tests.sh
+++ b/tools/run-libcore-tests.sh
@@ -25,17 +25,22 @@
   JAVA_LIBRARIES=${ANDROID_PRODUCT_OUT}/../../common/obj/JAVA_LIBRARIES
 fi
 
-# Jar containing jsr166 tests.
-jsr166_test_jack=${JAVA_LIBRARIES}/jsr166-tests_intermediates/classes.jack
+function cparg {
+  for var
+  do
+    printf -- "--classpath ${JAVA_LIBRARIES}/${var}_intermediates/classes.jack ";
+  done
+}
 
-# Jar containing all the other tests.
-test_jack=${JAVA_LIBRARIES}/core-tests_intermediates/classes.jack
+DEPS="core-tests jsr166-tests mockito-target"
 
-if [ ! -f $test_jack ]; then
-  echo "Before running, you must build core-tests, jsr166-tests and vogar: \
-        make core-tests jsr166-tests vogar"
-  exit 1
-fi
+for lib in $DEPS
+do
+  if [ ! -f "${JAVA_LIBRARIES}/${lib}_intermediates/classes.jack" ]; then
+    echo "${lib} is missing. Before running, you must run art/tools/buildbot-build.sh"
+    exit 1
+  fi
+done
 
 expectations="--expectations art/tools/libcore_failures.txt"
 if [ "x$ART_USE_READ_BARRIER" = xtrue ]; then
@@ -133,4 +138,4 @@
 # Run the tests using vogar.
 echo "Running tests for the following test packages:"
 echo ${working_packages[@]} | tr " " "\n"
-vogar $vogar_args $expectations --classpath $jsr166_test_jack --classpath $test_jack ${working_packages[@]}
+vogar $vogar_args $expectations $(cparg $DEPS) ${working_packages[@]}